From 521343b92f96e189dd99168d18c16257bbda4d4e Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Wed, 13 Apr 2022 21:55:47 +0000 Subject: [PATCH] support multiple files and URLs --- orcli | 139 ++++++++++++++++++++++++++------------ src/bashly.yml | 50 +++++++------- src/import_csv_command.sh | 59 ++++------------ src/lib/init_import.sh | 48 +++++++++++++ src/lib/post_import.sh | 29 ++++++++ 5 files changed, 214 insertions(+), 111 deletions(-) create mode 100644 src/lib/init_import.sh create mode 100644 src/lib/post_import.sh diff --git a/orcli b/orcli index 61ff541..0485883 100755 --- a/orcli +++ b/orcli @@ -318,6 +318,56 @@ function get_id() { echo "$ids" } +# src/lib/init_import.sh +# common import tasks to support multiple files and URLs +# shellcheck shell=bash +function init_import() { + local files + local file + local tmpdir + # catch args, convert the space delimited string to an array + files=() + eval "files=(${args[file]})" + # create tmp directory + tmpdir="$(mktemp -d)" + trap 'rm -rf "$tmpdir"' 0 2 3 15 + # download files if name starts with http:// or https:// + for i in "${!files[@]}"; do + if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then + if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then + error "download of ${files[$i]} failed!" + fi + files[$i]="${tmpdir}/${files[$i]##*/}" + fi + done + # create a zip archive if there are multiple files + if [[ ${#files[@]} -gt 1 ]]; then + file="$tmpdir/Untitled.zip" + zip "$file" "${files[@]}" + else + file="${files[0]}" + fi + # basic post data + if [[ ${file} == "-" ]]; then + data+=("project-file=@-") + else + if ! path=$(readlink -e "${file}"); then + error "file ${file} not found!" + fi + data+=("project-file=@${path}") + fi + if [[ ${args[--projectName]} ]]; then + data+=("project-name=${args[--projectName]}") + else + if [[ ${file} == "-" ]]; then + name="Untitled" + else + name="$(basename "${path}" | tr '.' ' ')" + fi + data+=("project-name=${name}") + fi +} + # src/lib/logging.sh # print messages to STDERR # shellcheck shell=bash @@ -333,6 +383,37 @@ function log() { for msg in "$@"; do echo >&2 "$msg"; done } +# src/lib/post_import.sh +# post to create-project endpoint and validate +# shellcheck shell=bash disable=SC2154 +function post_import() { + local curloptions + local projectid + local projectname + local rows + # post + mapfile -t curloptions < <(for d in "$@"; do + echo "--form" + echo "$d" + done) + if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then + error "import of ${args[file]} failed!" + fi + # validate + projectid=$(cut -d '=' -f 2 <<<"$redirect_url") + if [[ ${#projectid} != 13 ]]; then + error "import of ${args[file]} failed!" + fi + projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) + projectname="${projectname:1:${#projectname}-2}" + rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) + if [[ "$rows" = "0" ]]; then + error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" + else + log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" + fi +} + # :command.command_functions # :command.function orcli_info_command() { @@ -346,60 +427,32 @@ orcli_import_csv_command() { # src/import_csv_command.sh # shellcheck shell=bash - # catch args, convert the space delimited string to an array - files=() - eval "files=(${args[file]})" - if [ "${files[*]}" = "-" ] && [ -t 0 ]; then - printf "missing required argument or standard input\nusage: cli FILE...\n" + # call init_import function to eval args and to set basic post data + init_import + + # check if stdin is present if selected + if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]] && [ -t 0 ]; then + orcli_import_csv_usage exit 1 fi - # TODO: zip files if more than 1 - file="${files[*]}" - - # prepare input - data=() - data+=("--form" "format=text/line-based/*sv") - if [[ ${file} == "-" ]]; then - data+=("--form" "project-file=@-") - else - if ! path=$(readlink -e "${file}"); then - error "file ${file} not found!" - fi - data+=("--form" "project-file=@${path}") - fi - if [[ ${args[--projectName]} ]]; then - data+=("--form" "project-name=${args[--projectName]}") - else - name="$(basename "${path}" | tr '.' ' ')" - data+=("--form" "project-name=${name}") - fi + # assemble specific post data (some options require json format) + data+=("format=text/line-based/*sv") options='{ ' options+="\"separator\": \"${args[--separator]}\"" if [[ ${args[--encoding]} ]]; then - options+=", \"encoding\": \"${args[--encoding]}\"" + options+=', ' + options+="\"encoding\": \"${args[--encoding]}\"" fi if [[ ${args[--trimStrings]} ]]; then - options+=", \"trimStrings\": true" + options+=', ' + options+="\"trimStrings\": true" fi options+=' }' + data+=("options=${options}") - # execute curl - if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${data[@]}" --form options="${options}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then - error "import of ${files[*]} failed!" - fi - - # validate import - projectid=$(cut -d '=' -f 2 <<< "$redirect_url") - if [[ ${#projectid} != 13 ]]; then - error "import of ${files[*]} failed!" - fi - rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) - if [[ "$rows" = "0" ]]; then - error "import of ${files[*]} contains 0 rows!" "${redirect_url}" "name:${name}" "rows:${rows}" - else - log "import of ${files[*]} successful" "${redirect_url}" "name:${name}" "rows:${rows}" - fi + # call post_import function to post data and validate results + post_import "${data[@]}" } # :command.function diff --git a/src/bashly.yml b/src/bashly.yml index 20a8227..a62d62d 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -15,6 +15,7 @@ environment_variables: examples: - orcli list - orcli import csv file + - orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" - orcli info Clipboard - orcli info 1234567890123 @@ -33,31 +34,32 @@ commands: help: import commands commands: - - name: csv - help: import comma-separated values (CSV) - args: - - name: file - help: Path to one or more files. When FILE is -, read standard input. - default: "-" - repeatable: true - flags: - - long: --separator - help: character(s) that separates columns - arg: separator - default: "," - - long: --encoding - help: set character encoding - arg: encoding - - long: --trimStrings - help: trim leading & trailing whitespace from strings - - long: --projectName - arg: projectName - help: set a name for the OpenRefine project + - name: csv + help: import comma-separated values (CSV) + args: + - name: file + help: Path to one or more files or URLs. When FILE is -, read standard input. + default: "-" + repeatable: true + flags: + - long: --separator + help: character(s) that separates columns + arg: separator + default: "," + - long: --encoding + help: set character encoding + arg: encoding + - long: --trimStrings + help: trim leading & trailing whitespace from strings + - long: --projectName + arg: projectName + help: set a name for the OpenRefine project - examples: - - orcli import csv file - - cat file | orcli import csv - - orcli import csv --separator ; --encoding ISO-8859-1 --trimStrings --projectName example + examples: + - orcli import csv file + - cat file | orcli import csv + - orcli import csv file --separator ; --encoding ISO-8859-1 --trimStrings --projectName example + - orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" - name: list help: list projects on OpenRefine server diff --git a/src/import_csv_command.sh b/src/import_csv_command.sh index f5bc815..53fbc4a 100644 --- a/src/import_csv_command.sh +++ b/src/import_csv_command.sh @@ -1,57 +1,28 @@ # shellcheck shell=bash -# catch args, convert the space delimited string to an array -files=() -eval "files=(${args[file]})" -if [ "${files[*]}" = "-" ] && [ -t 0 ]; then - printf "missing required argument or standard input\nusage: cli FILE...\n" +# call init_import function to eval args and to set basic post data +init_import + +# check if stdin is present if selected +if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]] && [ -t 0 ]; then + orcli_import_csv_usage exit 1 fi -# TODO: support URLs -# TODO: zip files if more than 1 -file="${files[*]}" - -# prepare input -data=() -data+=("--form" "format=text/line-based/*sv") -if [[ ${file} == "-" ]]; then - data+=("--form" "project-file=@-") -else - if ! path=$(readlink -e "${file}"); then - error "file ${file} not found!" - fi - data+=("--form" "project-file=@${path}") -fi -if [[ ${args[--projectName]} ]]; then - data+=("--form" "project-name=${args[--projectName]}") -else - name="$(basename "${path}" | tr '.' ' ')" - data+=("--form" "project-name=${name}") -fi +# assemble specific post data (some options require json format) +data+=("format=text/line-based/*sv") options='{ ' options+="\"separator\": \"${args[--separator]}\"" if [[ ${args[--encoding]} ]]; then - options+=", \"encoding\": \"${args[--encoding]}\"" + options+=', ' + options+="\"encoding\": \"${args[--encoding]}\"" fi if [[ ${args[--trimStrings]} ]]; then - options+=", \"trimStrings\": true" + options+=', ' + options+="\"trimStrings\": true" fi options+=' }' +data+=("options=${options}") -# execute curl -if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${data[@]}" --form options="${options}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then - error "import of ${files[*]} failed!" -fi - -# validate import -projectid=$(cut -d '=' -f 2 <<< "$redirect_url") -if [[ ${#projectid} != 13 ]]; then - error "import of ${files[*]} failed!" -fi -rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) -if [[ "$rows" = "0" ]]; then - error "import of ${files[*]} contains 0 rows!" "${redirect_url}" "name:${name}" "rows:${rows}" -else - log "import of ${files[*]} successful" "${redirect_url}" "name:${name}" "rows:${rows}" -fi \ No newline at end of file +# call post_import function to post data and validate results +post_import "${data[@]}" diff --git a/src/lib/init_import.sh b/src/lib/init_import.sh new file mode 100644 index 0000000..df2a405 --- /dev/null +++ b/src/lib/init_import.sh @@ -0,0 +1,48 @@ +# common import tasks to support multiple files and URLs +# shellcheck shell=bash +function init_import() { + local files + local file + local tmpdir + # catch args, convert the space delimited string to an array + files=() + eval "files=(${args[file]})" + # create tmp directory + tmpdir="$(mktemp -d)" + trap 'rm -rf "$tmpdir"' 0 2 3 15 + # download files if name starts with http:// or https:// + for i in "${!files[@]}"; do + if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then + if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then + error "download of ${files[$i]} failed!" + fi + files[$i]="${tmpdir}/${files[$i]##*/}" + fi + done + # create a zip archive if there are multiple files + if [[ ${#files[@]} -gt 1 ]]; then + file="$tmpdir/Untitled.zip" + zip "$file" "${files[@]}" + else + file="${files[0]}" + fi + # basic post data + if [[ ${file} == "-" ]]; then + data+=("project-file=@-") + else + if ! path=$(readlink -e "${file}"); then + error "file ${file} not found!" + fi + data+=("project-file=@${path}") + fi + if [[ ${args[--projectName]} ]]; then + data+=("project-name=${args[--projectName]}") + else + if [[ ${file} == "-" ]]; then + name="Untitled" + else + name="$(basename "${path}" | tr '.' ' ')" + fi + data+=("project-name=${name}") + fi +} diff --git a/src/lib/post_import.sh b/src/lib/post_import.sh new file mode 100644 index 0000000..aac2bbc --- /dev/null +++ b/src/lib/post_import.sh @@ -0,0 +1,29 @@ +# post to create-project endpoint and validate +# shellcheck shell=bash disable=SC2154 +function post_import() { + local curloptions + local projectid + local projectname + local rows + # post + mapfile -t curloptions < <(for d in "$@"; do + echo "--form" + echo "$d" + done) + if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then + error "import of ${args[file]} failed!" + fi + # validate + projectid=$(cut -d '=' -f 2 <<<"$redirect_url") + if [[ ${#projectid} != 13 ]]; then + error "import of ${args[file]} failed!" + fi + projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) + projectname="${projectname:1:${#projectname}-2}" + rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) + if [[ "$rows" = "0" ]]; then + error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" + else + log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" + fi +}