Merge pull request #19 from opencultureconsulting:felixlohmeier/import-csv-1

resolve #1
This commit is contained in:
Felix Lohmeier 2022-04-13 23:57:03 +02:00 committed by GitHub
commit b59ddd392c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 449 additions and 15 deletions

322
orcli
View File

@ -35,7 +35,7 @@ orcli_usage() {
# :command.usage_commands
printf "Commands:\n"
echo " info show project metadata"
echo " import "
echo " import import commands"
echo " list list projects on OpenRefine server"
echo
@ -61,7 +61,8 @@ orcli_usage() {
printf "Examples:\n"
printf " orcli list\n"
printf " orcli info clipboard\n"
printf " orcli import csv file\n"
printf " orcli info Clipboard\n"
printf " orcli info 1234567890123\n"
echo
# :command.footer
@ -106,7 +107,7 @@ orcli_info_usage() {
# :command.usage_examples
printf "Examples:\n"
printf " info clipboard\n"
printf " info Clipboard\n"
printf " info 1234567890123\n"
echo
@ -116,18 +117,22 @@ orcli_info_usage() {
# :command.usage
orcli_import_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import - \n"
printf "orcli import - import commands\n"
echo
else
printf "orcli import - \n"
printf "orcli import - import commands\n"
echo
fi
printf "Usage:\n"
printf " orcli import\n"
printf " orcli import --help | -h\n"
printf " orcli import [command]\n"
printf " orcli import [command] --help | -h\n"
echo
# :command.usage_commands
printf "Commands:\n"
echo " csv import comma-separated values (CSV)"
echo
if [[ -n $long_usage ]]; then
@ -140,6 +145,70 @@ orcli_import_usage() {
fi
}
# :command.usage
orcli_import_csv_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import csv - import comma-separated values (CSV)\n"
echo
else
printf "orcli import csv - import comma-separated values (CSV)\n"
echo
fi
printf "Usage:\n"
printf " orcli import csv [FILE...] [options]\n"
printf " orcli import csv --help | -h\n"
echo
if [[ -n $long_usage ]]; then
printf "Options:\n"
# :command.usage_fixed_flags
echo " --help, -h"
printf " Show this help\n"
echo
# :command.usage_flags
# :flag.usage
echo " --separator SEPARATOR"
printf " character(s) that separates columns\n"
printf " Default: ,\n"
echo
# :flag.usage
echo " --encoding ENCODING"
printf " set character encoding\n"
echo
# :flag.usage
echo " --trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
echo " --projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :command.usage_args
printf "Arguments:\n"
# :argument.usage
echo " FILE..."
printf " Path to one or more files. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "Examples:\n"
printf " orcli import csv file\n"
printf " cat file | orcli import csv\n"
printf " orcli import csv --separator ; --encoding ISO-8859-1 --trimStrings\n --projectName example\n"
echo
fi
}
# :command.usage
orcli_list_usage() {
if [[ -n $long_usage ]]; then
@ -249,6 +318,56 @@ function get_id() {
echo "$ids"
}
# src/lib/init_import.sh
# common import tasks to support multiple files and URLs
# shellcheck shell=bash
function init_import() {
local files
local file
local tmpdir
# catch args, convert the space delimited string to an array
files=()
eval "files=(${args[file]})"
# create tmp directory
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' 0 2 3 15
# download files if name starts with http:// or https://
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then
error "download of ${files[$i]} failed!"
fi
files[$i]="${tmpdir}/${files[$i]##*/}"
fi
done
# create a zip archive if there are multiple files
if [[ ${#files[@]} -gt 1 ]]; then
file="$tmpdir/Untitled.zip"
zip "$file" "${files[@]}"
else
file="${files[0]}"
fi
# basic post data
if [[ ${file} == "-" ]]; then
data+=("project-file=@-")
else
if ! path=$(readlink -e "${file}"); then
error "file ${file} not found!"
fi
data+=("project-file=@${path}")
fi
if [[ ${args[--projectName]} ]]; then
data+=("project-name=${args[--projectName]}")
else
if [[ ${file} == "-" ]]; then
name="Untitled"
else
name="$(basename "${path}" | tr '.' ' ')"
fi
data+=("project-name=${name}")
fi
}
# src/lib/logging.sh
# print messages to STDERR
# shellcheck shell=bash
@ -264,6 +383,37 @@ function log() {
for msg in "$@"; do echo >&2 "$msg"; done
}
# src/lib/post_import.sh
# post to create-project endpoint and validate
# shellcheck shell=bash disable=SC2154
function post_import() {
local curloptions
local projectid
local projectname
local rows
# post
mapfile -t curloptions < <(for d in "$@"; do
echo "--form"
echo "$d"
done)
if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then
error "import of ${args[file]} failed!"
fi
# validate
projectid=$(cut -d '=' -f 2 <<<"$redirect_url")
if [[ ${#projectid} != 13 ]]; then
error "import of ${args[file]} failed!"
fi
projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2)
projectname="${projectname:1:${#projectname}-2}"
rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2)
if [[ "$rows" = "0" ]]; then
error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}"
else
log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}"
fi
}
# :command.command_functions
# :command.function
orcli_info_command() {
@ -273,10 +423,36 @@ orcli_info_command() {
}
# :command.function
orcli_import_command() {
# src/import_command.sh
orcli_import_csv_command() {
# src/import_csv_command.sh
# shellcheck shell=bash
get_csrf
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]] && [ -t 0 ]; then
orcli_import_csv_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/line-based/*sv")
options='{ '
options+="\"separator\": \"${args[--separator]}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+="\"trimStrings\": true"
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
@ -448,7 +624,26 @@ orcli_import_parse_requirements() {
# :command.environment_variables_filter
# :command.dependencies_filter
# :command.command_filter
action="import"
action=${1:-}
case $action in
-* )
;;
csv )
action="csv"
shift
orcli_import_csv_parse_requirements "$@"
shift $#
;;
# :command.command_fallback
* )
orcli_import_usage
exit 1
;;
esac
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
@ -475,6 +670,103 @@ orcli_import_parse_requirements() {
# :command.user_filter
}
# :command.parse_requirements
orcli_import_csv_parse_requirements() {
# :command.fixed_flags_filter
case "${1:-}" in
--help | -h )
long_usage=yes
orcli_import_csv_usage
exit
;;
esac
# :command.environment_variables_filter
# :command.dependencies_filter
# :command.command_filter
action="import csv"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--separator )
# :flag.conflicts
if [[ -n ${2+x} ]]; then
# :flag.validations
args[--separator]="$2"
shift
shift
else
printf "%s\n" "--separator requires an argument: --separator SEPARATOR"
exit 1
fi
;;
# :flag.case
--encoding )
# :flag.conflicts
if [[ -n ${2+x} ]]; then
# :flag.validations
args[--encoding]="$2"
shift
shift
else
printf "%s\n" "--encoding requires an argument: --encoding ENCODING"
exit 1
fi
;;
# :flag.case
--trimStrings )
# :flag.conflicts
args[--trimStrings]=1
shift
;;
# :flag.case
--projectName )
# :flag.conflicts
if [[ -n ${2+x} ]]; then
# :flag.validations
args[--projectName]="$2"
shift
shift
else
printf "%s\n" "--projectName requires an argument: --projectName PROJECTNAME"
exit 1
fi
;;
-?* )
printf "invalid option: %s\n" "$key"
exit 1
;;
* )
# :command.parse_requirements_case
if [[ -z ${args[file]+x} ]]; then
# :argument.validations
args[file]="\"$1\""
shift
else
args[file]="${args[file]} \"$1\""
shift
fi
;;
esac
done
# :command.required_args_filter
# :command.required_flags_filter
# :command.catch_all_filter
# :command.default_assignments
[[ -n ${args[file]:-} ]] || args[file]="-"
[[ -n ${args[--separator]:-} ]] || args[--separator]=","
# :command.whitelist_filter
# :command.user_filter
}
# :command.parse_requirements
orcli_list_parse_requirements() {
# :command.fixed_flags_filter
@ -549,6 +841,14 @@ run() {
orcli_import_command
fi
elif [[ $action == "import csv" ]]; then
if [[ ${args[--help]:-} ]]; then
long_usage=yes
orcli_import_csv_usage
else
orcli_import_csv_command
fi
elif [[ $action == "list" ]]; then
if [[ ${args[--help]:-} ]]; then
long_usage=yes

View File

@ -14,7 +14,9 @@ environment_variables:
examples:
- orcli list
- orcli info clipboard
- orcli import csv file
- orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv"
- orcli info Clipboard
- orcli info 1234567890123
commands:
@ -25,10 +27,39 @@ commands:
help: project name or id
required: true
examples:
- info clipboard
- info Clipboard
- info 1234567890123
- name: import
help: import commands
commands:
- name: csv
help: import comma-separated values (CSV)
args:
- name: file
help: Path to one or more files or URLs. When FILE is -, read standard input.
default: "-"
repeatable: true
flags:
- long: --separator
help: character(s) that separates columns
arg: separator
default: ","
- long: --encoding
help: set character encoding
arg: encoding
- long: --trimStrings
help: trim leading & trailing whitespace from strings
- long: --projectName
arg: projectName
help: set a name for the OpenRefine project
examples:
- orcli import csv file
- cat file | orcli import csv
- orcli import csv file --separator ; --encoding ISO-8859-1 --trimStrings --projectName example
- orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv"
- name: list
help: list projects on OpenRefine server

View File

@ -1,2 +0,0 @@
# shellcheck shell=bash
get_csrf

28
src/import_csv_command.sh Normal file
View File

@ -0,0 +1,28 @@
# shellcheck shell=bash
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]] && [ -t 0 ]; then
orcli_import_csv_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/line-based/*sv")
options='{ '
options+="\"separator\": \"${args[--separator]}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+="\"trimStrings\": true"
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"

48
src/lib/init_import.sh Normal file
View File

@ -0,0 +1,48 @@
# common import tasks to support multiple files and URLs
# shellcheck shell=bash
function init_import() {
local files
local file
local tmpdir
# catch args, convert the space delimited string to an array
files=()
eval "files=(${args[file]})"
# create tmp directory
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' 0 2 3 15
# download files if name starts with http:// or https://
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then
error "download of ${files[$i]} failed!"
fi
files[$i]="${tmpdir}/${files[$i]##*/}"
fi
done
# create a zip archive if there are multiple files
if [[ ${#files[@]} -gt 1 ]]; then
file="$tmpdir/Untitled.zip"
zip "$file" "${files[@]}"
else
file="${files[0]}"
fi
# basic post data
if [[ ${file} == "-" ]]; then
data+=("project-file=@-")
else
if ! path=$(readlink -e "${file}"); then
error "file ${file} not found!"
fi
data+=("project-file=@${path}")
fi
if [[ ${args[--projectName]} ]]; then
data+=("project-name=${args[--projectName]}")
else
if [[ ${file} == "-" ]]; then
name="Untitled"
else
name="$(basename "${path}" | tr '.' ' ')"
fi
data+=("project-name=${name}")
fi
}

29
src/lib/post_import.sh Normal file
View File

@ -0,0 +1,29 @@
# post to create-project endpoint and validate
# shellcheck shell=bash disable=SC2154
function post_import() {
local curloptions
local projectid
local projectname
local rows
# post
mapfile -t curloptions < <(for d in "$@"; do
echo "--form"
echo "$d"
done)
if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then
error "import of ${args[file]} failed!"
fi
# validate
projectid=$(cut -d '=' -f 2 <<<"$redirect_url")
if [[ ${#projectid} != 13 ]]; then
error "import of ${args[file]} failed!"
fi
projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2)
projectname="${projectname:1:${#projectname}-2}"
rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2)
if [[ "$rows" = "0" ]]; then
error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}"
else
log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}"
fi
}