diff --git a/orcli b/orcli index 1ca47f3..0485883 100755 --- a/orcli +++ b/orcli @@ -35,7 +35,7 @@ orcli_usage() { # :command.usage_commands printf "Commands:\n" echo " info show project metadata" - echo " import " + echo " import import commands" echo " list list projects on OpenRefine server" echo @@ -61,7 +61,8 @@ orcli_usage() { printf "Examples:\n" printf " orcli list\n" - printf " orcli info clipboard\n" + printf " orcli import csv file\n" + printf " orcli info Clipboard\n" printf " orcli info 1234567890123\n" echo # :command.footer @@ -106,7 +107,7 @@ orcli_info_usage() { # :command.usage_examples printf "Examples:\n" - printf " info clipboard\n" + printf " info Clipboard\n" printf " info 1234567890123\n" echo @@ -116,18 +117,22 @@ orcli_info_usage() { # :command.usage orcli_import_usage() { if [[ -n $long_usage ]]; then - printf "orcli import - \n" + printf "orcli import - import commands\n" echo else - printf "orcli import - \n" + printf "orcli import - import commands\n" echo fi printf "Usage:\n" - printf " orcli import\n" - printf " orcli import --help | -h\n" + printf " orcli import [command]\n" + printf " orcli import [command] --help | -h\n" + echo + # :command.usage_commands + printf "Commands:\n" + echo " csv import comma-separated values (CSV)" echo if [[ -n $long_usage ]]; then @@ -140,6 +145,70 @@ orcli_import_usage() { fi } +# :command.usage +orcli_import_csv_usage() { + if [[ -n $long_usage ]]; then + printf "orcli import csv - import comma-separated values (CSV)\n" + echo + + else + printf "orcli import csv - import comma-separated values (CSV)\n" + echo + + fi + + printf "Usage:\n" + printf " orcli import csv [FILE...] [options]\n" + printf " orcli import csv --help | -h\n" + echo + + if [[ -n $long_usage ]]; then + printf "Options:\n" + # :command.usage_fixed_flags + echo " --help, -h" + printf " Show this help\n" + echo + # :command.usage_flags + # :flag.usage + echo " --separator SEPARATOR" + printf " character(s) that separates columns\n" + printf " Default: ,\n" + echo + + # :flag.usage + echo " --encoding ENCODING" + printf " set character encoding\n" + echo + + # :flag.usage + echo " --trimStrings" + printf " trim leading & trailing whitespace from strings\n" + echo + + # :flag.usage + echo " --projectName PROJECTNAME" + printf " set a name for the OpenRefine project\n" + echo + # :command.usage_args + printf "Arguments:\n" + + # :argument.usage + echo " FILE..." + printf " Path to one or more files. When FILE is -, read standard input.\n" + printf " Default: -\n" + echo + + # :command.usage_examples + printf "Examples:\n" + + printf " orcli import csv file\n" + printf " cat file | orcli import csv\n" + printf " orcli import csv --separator ; --encoding ISO-8859-1 --trimStrings\n --projectName example\n" + echo + + fi +} + # :command.usage orcli_list_usage() { if [[ -n $long_usage ]]; then @@ -249,6 +318,56 @@ function get_id() { echo "$ids" } +# src/lib/init_import.sh +# common import tasks to support multiple files and URLs +# shellcheck shell=bash +function init_import() { + local files + local file + local tmpdir + # catch args, convert the space delimited string to an array + files=() + eval "files=(${args[file]})" + # create tmp directory + tmpdir="$(mktemp -d)" + trap 'rm -rf "$tmpdir"' 0 2 3 15 + # download files if name starts with http:// or https:// + for i in "${!files[@]}"; do + if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then + if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then + error "download of ${files[$i]} failed!" + fi + files[$i]="${tmpdir}/${files[$i]##*/}" + fi + done + # create a zip archive if there are multiple files + if [[ ${#files[@]} -gt 1 ]]; then + file="$tmpdir/Untitled.zip" + zip "$file" "${files[@]}" + else + file="${files[0]}" + fi + # basic post data + if [[ ${file} == "-" ]]; then + data+=("project-file=@-") + else + if ! path=$(readlink -e "${file}"); then + error "file ${file} not found!" + fi + data+=("project-file=@${path}") + fi + if [[ ${args[--projectName]} ]]; then + data+=("project-name=${args[--projectName]}") + else + if [[ ${file} == "-" ]]; then + name="Untitled" + else + name="$(basename "${path}" | tr '.' ' ')" + fi + data+=("project-name=${name}") + fi +} + # src/lib/logging.sh # print messages to STDERR # shellcheck shell=bash @@ -264,6 +383,37 @@ function log() { for msg in "$@"; do echo >&2 "$msg"; done } +# src/lib/post_import.sh +# post to create-project endpoint and validate +# shellcheck shell=bash disable=SC2154 +function post_import() { + local curloptions + local projectid + local projectname + local rows + # post + mapfile -t curloptions < <(for d in "$@"; do + echo "--form" + echo "$d" + done) + if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then + error "import of ${args[file]} failed!" + fi + # validate + projectid=$(cut -d '=' -f 2 <<<"$redirect_url") + if [[ ${#projectid} != 13 ]]; then + error "import of ${args[file]} failed!" + fi + projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) + projectname="${projectname:1:${#projectname}-2}" + rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) + if [[ "$rows" = "0" ]]; then + error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" + else + log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" + fi +} + # :command.command_functions # :command.function orcli_info_command() { @@ -273,10 +423,36 @@ orcli_info_command() { } # :command.function -orcli_import_command() { - # src/import_command.sh +orcli_import_csv_command() { + # src/import_csv_command.sh # shellcheck shell=bash - get_csrf + + # call init_import function to eval args and to set basic post data + init_import + + # check if stdin is present if selected + if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]] && [ -t 0 ]; then + orcli_import_csv_usage + exit 1 + fi + + # assemble specific post data (some options require json format) + data+=("format=text/line-based/*sv") + options='{ ' + options+="\"separator\": \"${args[--separator]}\"" + if [[ ${args[--encoding]} ]]; then + options+=', ' + options+="\"encoding\": \"${args[--encoding]}\"" + fi + if [[ ${args[--trimStrings]} ]]; then + options+=', ' + options+="\"trimStrings\": true" + fi + options+=' }' + data+=("options=${options}") + + # call post_import function to post data and validate results + post_import "${data[@]}" } # :command.function @@ -448,7 +624,26 @@ orcli_import_parse_requirements() { # :command.environment_variables_filter # :command.dependencies_filter # :command.command_filter - action="import" + action=${1:-} + + case $action in + -* ) + ;; + + csv ) + action="csv" + shift + orcli_import_csv_parse_requirements "$@" + shift $# + ;; + + # :command.command_fallback + * ) + orcli_import_usage + exit 1 + ;; + + esac # :command.parse_requirements_while while [[ $# -gt 0 ]]; do key="$1" @@ -475,6 +670,103 @@ orcli_import_parse_requirements() { # :command.user_filter } +# :command.parse_requirements +orcli_import_csv_parse_requirements() { + # :command.fixed_flags_filter + case "${1:-}" in + --help | -h ) + long_usage=yes + orcli_import_csv_usage + exit + ;; + + esac + # :command.environment_variables_filter + # :command.dependencies_filter + # :command.command_filter + action="import csv" + # :command.parse_requirements_while + while [[ $# -gt 0 ]]; do + key="$1" + case "$key" in + # :flag.case + --separator ) + # :flag.conflicts + if [[ -n ${2+x} ]]; then + # :flag.validations + args[--separator]="$2" + shift + shift + else + printf "%s\n" "--separator requires an argument: --separator SEPARATOR" + exit 1 + fi + ;; + + # :flag.case + --encoding ) + # :flag.conflicts + if [[ -n ${2+x} ]]; then + # :flag.validations + args[--encoding]="$2" + shift + shift + else + printf "%s\n" "--encoding requires an argument: --encoding ENCODING" + exit 1 + fi + ;; + + # :flag.case + --trimStrings ) + # :flag.conflicts + args[--trimStrings]=1 + shift + ;; + + # :flag.case + --projectName ) + # :flag.conflicts + if [[ -n ${2+x} ]]; then + # :flag.validations + args[--projectName]="$2" + shift + shift + else + printf "%s\n" "--projectName requires an argument: --projectName PROJECTNAME" + exit 1 + fi + ;; + + -?* ) + printf "invalid option: %s\n" "$key" + exit 1 + ;; + + * ) + # :command.parse_requirements_case + if [[ -z ${args[file]+x} ]]; then + # :argument.validations + args[file]="\"$1\"" + shift + else + args[file]="${args[file]} \"$1\"" + shift + fi + ;; + + esac + done + # :command.required_args_filter + # :command.required_flags_filter + # :command.catch_all_filter + # :command.default_assignments + [[ -n ${args[file]:-} ]] || args[file]="-" + [[ -n ${args[--separator]:-} ]] || args[--separator]="," + # :command.whitelist_filter + # :command.user_filter +} + # :command.parse_requirements orcli_list_parse_requirements() { # :command.fixed_flags_filter @@ -549,6 +841,14 @@ run() { orcli_import_command fi + elif [[ $action == "import csv" ]]; then + if [[ ${args[--help]:-} ]]; then + long_usage=yes + orcli_import_csv_usage + else + orcli_import_csv_command + fi + elif [[ $action == "list" ]]; then if [[ ${args[--help]:-} ]]; then long_usage=yes diff --git a/src/bashly.yml b/src/bashly.yml index 0647884..a62d62d 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -14,7 +14,9 @@ environment_variables: examples: - orcli list - - orcli info clipboard + - orcli import csv file + - orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" + - orcli info Clipboard - orcli info 1234567890123 commands: @@ -25,10 +27,39 @@ commands: help: project name or id required: true examples: - - info clipboard + - info Clipboard - info 1234567890123 - name: import + help: import commands + + commands: + - name: csv + help: import comma-separated values (CSV) + args: + - name: file + help: Path to one or more files or URLs. When FILE is -, read standard input. + default: "-" + repeatable: true + flags: + - long: --separator + help: character(s) that separates columns + arg: separator + default: "," + - long: --encoding + help: set character encoding + arg: encoding + - long: --trimStrings + help: trim leading & trailing whitespace from strings + - long: --projectName + arg: projectName + help: set a name for the OpenRefine project + + examples: + - orcli import csv file + - cat file | orcli import csv + - orcli import csv file --separator ; --encoding ISO-8859-1 --trimStrings --projectName example + - orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" - name: list help: list projects on OpenRefine server diff --git a/src/import_command.sh b/src/import_command.sh deleted file mode 100644 index 19e14dc..0000000 --- a/src/import_command.sh +++ /dev/null @@ -1,2 +0,0 @@ -# shellcheck shell=bash -get_csrf \ No newline at end of file diff --git a/src/import_csv_command.sh b/src/import_csv_command.sh new file mode 100644 index 0000000..53fbc4a --- /dev/null +++ b/src/import_csv_command.sh @@ -0,0 +1,28 @@ +# shellcheck shell=bash + +# call init_import function to eval args and to set basic post data +init_import + +# check if stdin is present if selected +if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]] && [ -t 0 ]; then + orcli_import_csv_usage + exit 1 +fi + +# assemble specific post data (some options require json format) +data+=("format=text/line-based/*sv") +options='{ ' +options+="\"separator\": \"${args[--separator]}\"" +if [[ ${args[--encoding]} ]]; then + options+=', ' + options+="\"encoding\": \"${args[--encoding]}\"" +fi +if [[ ${args[--trimStrings]} ]]; then + options+=', ' + options+="\"trimStrings\": true" +fi +options+=' }' +data+=("options=${options}") + +# call post_import function to post data and validate results +post_import "${data[@]}" diff --git a/src/lib/init_import.sh b/src/lib/init_import.sh new file mode 100644 index 0000000..df2a405 --- /dev/null +++ b/src/lib/init_import.sh @@ -0,0 +1,48 @@ +# common import tasks to support multiple files and URLs +# shellcheck shell=bash +function init_import() { + local files + local file + local tmpdir + # catch args, convert the space delimited string to an array + files=() + eval "files=(${args[file]})" + # create tmp directory + tmpdir="$(mktemp -d)" + trap 'rm -rf "$tmpdir"' 0 2 3 15 + # download files if name starts with http:// or https:// + for i in "${!files[@]}"; do + if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then + if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then + error "download of ${files[$i]} failed!" + fi + files[$i]="${tmpdir}/${files[$i]##*/}" + fi + done + # create a zip archive if there are multiple files + if [[ ${#files[@]} -gt 1 ]]; then + file="$tmpdir/Untitled.zip" + zip "$file" "${files[@]}" + else + file="${files[0]}" + fi + # basic post data + if [[ ${file} == "-" ]]; then + data+=("project-file=@-") + else + if ! path=$(readlink -e "${file}"); then + error "file ${file} not found!" + fi + data+=("project-file=@${path}") + fi + if [[ ${args[--projectName]} ]]; then + data+=("project-name=${args[--projectName]}") + else + if [[ ${file} == "-" ]]; then + name="Untitled" + else + name="$(basename "${path}" | tr '.' ' ')" + fi + data+=("project-name=${name}") + fi +} diff --git a/src/lib/post_import.sh b/src/lib/post_import.sh new file mode 100644 index 0000000..aac2bbc --- /dev/null +++ b/src/lib/post_import.sh @@ -0,0 +1,29 @@ +# post to create-project endpoint and validate +# shellcheck shell=bash disable=SC2154 +function post_import() { + local curloptions + local projectid + local projectname + local rows + # post + mapfile -t curloptions < <(for d in "$@"; do + echo "--form" + echo "$d" + done) + if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then + error "import of ${args[file]} failed!" + fi + # validate + projectid=$(cut -d '=' -f 2 <<<"$redirect_url") + if [[ ${#projectid} != 13 ]]; then + error "import of ${args[file]} failed!" + fi + projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) + projectname="${projectname:1:${#projectname}-2}" + rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) + if [[ "$rows" = "0" ]]; then + error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" + else + log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" + fi +}