From 6979c41e8afcd0ba8370a51f107214630aa1acb2 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Wed, 20 Apr 2022 10:27:53 +0000 Subject: [PATCH] first draft batch processing --- README.md | 20 ++- orcli | 348 ++++++++++++++++++++++++++++++++++---- src/bashly.yml | 85 ++++++++-- src/batch_command.sh | 64 +++++++ src/export_tsv_command.sh | 4 +- src/lib/logging.sh | 10 +- src/lib/post_import.sh | 8 +- src/list_command.sh | 2 +- 8 files changed, 476 insertions(+), 65 deletions(-) create mode 100644 src/batch_command.sh diff --git a/README.md b/README.md index 431906a..7e9705f 100644 --- a/README.md +++ b/README.md @@ -56,9 +56,11 @@ Usage: orcli --version | -v Commands: - info show project metadata + batch start tmp OpenRefine workspace and run multiple orcli commands import import commands list list projects on OpenRefine server + info show project metadata + export export commands Options: --help, -h @@ -73,12 +75,15 @@ Environment Variables: Default: http://localhost:3333 Examples: + orcli import csv "https://git.io/fj5hF" --projectName "duplicates" orcli list - orcli import csv file - orcli import csv - "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" - orcli info Clipboard - orcli info 1234567890123 + orcli info "duplicates" + orcli export tsv "duplicates" + orcli export tsv "duplicates" --output "duplicates.tsv" + orcli batch \ + import csv "https://git.io/fj5hF" --projectName "duplicates" \ + info "duplicates" \ + export tsv "duplicates" https://github.com/opencultureconsulting/orcli ``` @@ -95,9 +100,8 @@ gem install bashly 2. Edit code in [src](src) directory -3. Validate and generate script +3. Generate script ```sh -bashly validate bashly generate ``` diff --git a/orcli b/orcli index 90896a8..647a833 100755 --- a/orcli +++ b/orcli @@ -34,6 +34,7 @@ orcli_usage() { echo # :command.usage_commands printf "Commands:\n" + echo " batch start tmp OpenRefine workspace and run multiple orcli commands" echo " import import commands" echo " list list projects on OpenRefine server" echo " info show project metadata" @@ -61,12 +62,12 @@ orcli_usage() { # :command.usage_examples printf "Examples:\n" - printf " orcli import csv file\n" - printf " orcli import csv\n \"https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv\"\n" + printf " orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n" printf " orcli list\n" - printf " orcli info \"doaj article sample csv\"\n" - printf " orcli export tsv \"doaj article sample csv\"\n" - printf " orcli export tsv \"doaj article sample csv\" --output doaj.tsv\n" + printf " orcli info \"duplicates\"\n" + printf " orcli export tsv \"duplicates\"\n" + printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" + printf " orcli batch \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n info \"duplicates\" \\\\\n export tsv \"duplicates\"\n" echo # :command.footer printf "https://github.com/opencultureconsulting/orcli\n" @@ -75,6 +76,64 @@ orcli_usage() { fi } +# :command.usage +orcli_batch_usage() { + if [[ -n $long_usage ]]; then + printf "orcli batch - start tmp OpenRefine workspace and run multiple orcli commands\n" + echo + + else + printf "orcli batch - start tmp OpenRefine workspace and run multiple orcli commands\n" + echo + + fi + + printf "Usage:\n" + printf " orcli batch [options] ORCLI COMMANDS...\n" + printf " orcli batch --help | -h\n" + echo + + if [[ -n $long_usage ]]; then + printf "Options:\n" + # :command.usage_fixed_flags + echo " --help, -h" + printf " Show this help\n" + echo + # :command.usage_flags + # :flag.usage + echo " --memory RAM" + printf " maximum RAM for OpenRefine java heap space\n" + printf " Default: 2048M\n" + echo + + # :flag.usage + echo " --port PORT" + printf " PORT on which OpenRefine should listen\n" + printf " Default: 3333\n" + echo + + # :flag.usage + echo " --quiet, -q" + printf " suppress log output, print errors only\n" + echo + # :command.usage_args + printf "Arguments:\n" + + echo " ORCLI COMMANDS..." + printf " provide orcli commands without further separators (see examples below)\n avoid \"import\" \"info\" \"list\" \"transform\" \"export\" in file or project names\n use bash -c to execute custom commands\n" + echo + + # :command.usage_examples + printf "Examples:\n" + + printf " orcli batch \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n info \"duplicates\" \\\\\n export tsv \"duplicates\"\n" + printf " orcli batch --memory \"2000M\" --port \"3334\" \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n export tsv \"duplicates\"\n" + printf " orcli batch --quiet \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n export tsv \"duplicates\" --output \"output/duplicates.tsv\" \\\\\n bash -c 'wc -l output/*; echo \"finished\" in \$SECONDS seconds'\n" + echo + + fi +} + # :command.usage orcli_import_usage() { if [[ -n $long_usage ]]; then @@ -150,6 +209,11 @@ orcli_import_csv_usage() { echo " --projectName PROJECTNAME" printf " set a name for the OpenRefine project\n" echo + + # :flag.usage + echo " --quiet, -q" + printf " suppress log output, print errors only\n" + echo # :command.usage_args printf "Arguments:\n" @@ -162,10 +226,11 @@ orcli_import_csv_usage() { # :command.usage_examples printf "Examples:\n" - printf " orcli import csv file\n" - printf " cat file | orcli import csv\n" - printf " orcli import csv file --separator ; --encoding ISO-8859-1 --trimStrings\n --projectName example\n" - printf " orcli import csv\n \"https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv\"\n" + printf " orcli import csv \"file\"\n" + printf " orcli import csv \"file1\" \"file2\"\n" + printf " cat \"file\" | orcli import csv\n" + printf " orcli import csv \"https://git.io/fj5hF\"\n" + printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --encoding \"ISO-8859-1\" \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n" echo fi @@ -184,7 +249,7 @@ orcli_list_usage() { fi printf "Usage:\n" - printf " orcli list\n" + printf " orcli list [options]\n" printf " orcli list --help | -h\n" echo @@ -194,6 +259,11 @@ orcli_list_usage() { echo " --help, -h" printf " Show this help\n" echo + # :command.usage_flags + # :flag.usage + echo " --quiet, -q" + printf " suppress log output, print errors only\n" + echo fi } @@ -211,7 +281,7 @@ orcli_info_usage() { fi printf "Usage:\n" - printf " orcli info PROJECT\n" + printf " orcli info PROJECT [options]\n" printf " orcli info --help | -h\n" echo @@ -221,7 +291,11 @@ orcli_info_usage() { echo " --help, -h" printf " Show this help\n" echo - + # :command.usage_flags + # :flag.usage + echo " --quiet, -q" + printf " suppress log output, print errors only\n" + echo # :command.usage_args printf "Arguments:\n" @@ -233,7 +307,7 @@ orcli_info_usage() { # :command.usage_examples printf "Examples:\n" - printf " info Clipboard\n" + printf " info \"duplicates\"\n" printf " info 1234567890123\n" echo @@ -305,6 +379,11 @@ orcli_export_tsv_usage() { printf " set character encoding\n" printf " Default: UTF-8\n" echo + + # :flag.usage + echo " --quiet, -q" + printf " suppress log output, print errors only\n" + echo # :command.usage_args printf "Arguments:\n" @@ -316,8 +395,8 @@ orcli_export_tsv_usage() { # :command.usage_examples printf "Examples:\n" - printf " orcli export tsv Clipboard\n" - printf " orcli export tsv Clipboard --output clipboard.tsv\n" + printf " orcli export tsv \"duplicates\"\n" + printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" echo fi @@ -409,9 +488,7 @@ function get_id() { # common import tasks to support multiple files and URLs # shellcheck shell=bash function init_import() { - local files - local file - local tmpdir + local files file tmpdir # catch args, convert the space delimited string to an array files=() eval "files=(${args[file]})" @@ -421,16 +498,27 @@ function init_import() { # download files if name starts with http:// or https:// for i in "${!files[@]}"; do if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then - if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then + if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then error "download of ${files[$i]} failed!" fi - files[$i]="${tmpdir}/${files[$i]##*/}" + files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" + fi + done + # read pipes if name starts with /dev/fd + for i in "${!files[@]}"; do + if [[ ${files[$i]} == "/dev/fd"* ]]; then + if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then + error "reading of ${files[$i]} failed!" + fi + files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" fi done # create a zip archive if there are multiple files if [[ ${#files[@]} -gt 1 ]]; then file="$tmpdir/Untitled.zip" - zip "$file" "${files[@]}" + if ! zip --quiet --must-match "$file" "${files[@]}"; then + error "creating zip archive with ${files[*]} failed!" + fi else file="${files[0]}" fi @@ -461,13 +549,15 @@ function init_import() { function error() { echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] ERROR: $1" shift - for msg in "$@"; do echo >&2 "$msg"; done + for msg in "$@"; do echo >&2 " $msg"; done exit 1 } function log() { - echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1" - shift - for msg in "$@"; do echo >&2 "$msg"; done + if ! [[ ${args[--quiet]} ]]; then + echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1" + shift + for msg in "$@"; do echo >&2 " $msg"; done + fi } # src/lib/post_import.sh @@ -484,24 +574,92 @@ function post_import() { echo "$d" done) if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then - error "import of ${args[file]} failed!" + error "importing ${args[file]} failed!" fi # validate projectid=$(cut -d '=' -f 2 <<<"$redirect_url") if [[ ${#projectid} != 13 ]]; then - error "import of ${args[file]} failed!" + error "importing ${args[file]} failed!" fi projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) projectname="${projectname:1:${#projectname}-2}" rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) if [[ "$rows" = "0" ]]; then - error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" + error "import of ${args[file]} contains 0 rows!" else - log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" + log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}" fi } # :command.command_functions +# :command.function +orcli_batch_command() { + # src/batch_command.sh + # shellcheck shell=bash disable=SC2154 + + # locate orcli and OpenRefine + if command -v orcli &>/dev/null; then + orcli="orcli" + elif [[ -x "orcli" ]]; then + orcli="./orcli" + else + error "orcli is not executable!" "Try: chmod + ./orcli" + fi + if [[ -x "refine" ]]; then + openrefine="./refine" + else + error "OpenRefine's startup script (refine) not found!" "Did you put orcli in your OpenRefine app dir?" + fi + + # create tmp directory + tmpdir="$(mktemp -d)" + trap '{ rm -rf "$tmpdir"; }' 0 2 3 15 + + # update OPENREFINE_URL env + OPENREFINE_URL="http://localhost:${args[--port]}" + + # check if OpenRefine is already running + if curl -fs "${OPENREFINE_URL}" &>/dev/null; then + error "OpenRefine is already running on port ${args[--port]}." "Hint: Stop the other process or use another port." + fi + + # start OpenRefine with tmp workspace and autosave period 25 hours + $openrefine -d "$tmpdir" -m "${args[--memory]}" -p "${args[--port]}" -x refine.autosave=1440 -v warn &>"$tmpdir/openrefine.log" & + openrefine_pid="$!" + + # update trap to kill OpenRefine on error or exit + trap '{ rm -rf "$tmpdir"; kill -9 "$openrefine_pid"; }' 0 2 3 15 + + # wait until OpenRefine is running (timeout 20s) + if ! curl -fs --retry 20 --retry-connrefused --retry-delay 1 "${OPENREFINE_URL}/command/core/get-version" &>/dev/null; then + error "starting OpenRefine server failed!" + else + log "started OpenRefine" "port: ${args[--port]}" "memory: ${args[--memory]}" "tmpdir: ${tmpdir}" "pid: ${openrefine_pid}" + fi + + # assemble command groups from catch-all + i=0 + for arg in "${other_args[@]}"; do + if [[ $arg =~ ^(bash|import|info|list|transform|export)$ ]]; then + ((i = i + 1)) + groups+=("group$i") + fi + declare -a group${i}+="(\"$arg\")" + done + + # call command for each group + for group in "${groups[@]}"; do + declare arrayRef="${group}[@]" + command=("${!arrayRef}") + if [[ ${command[0]} == "bash" ]]; then + "${command[@]}" + elif [[ ${args[--quiet]} ]]; then + "$orcli" "${command[@]}" --quiet + else + "$orcli" "${command[@]}" + fi + done +} # :command.function orcli_import_csv_command() { @@ -545,7 +703,7 @@ orcli_list_command() { error "no OpenRefine reachable/running at ${OPENREFINE_URL}" else if [[ "${response}" == '{"projects":{}}' ]]; then - log "${OPENREFINE_URL} contains zero projects" + log "${OPENREFINE_URL} does not contain any projects yet." else echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"' fi @@ -591,10 +749,10 @@ orcli_export_tsv_command() { curloptions+=("${args[--output]}") fi if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then - error "export of ${args[project]} failed!" + error "exporting ${args[project]} failed!" else if [[ ${args[--output]} ]]; then - log "export of ${args[project]} successful" "file:${args[--output]}" "rows:$(cat "${args[--output]}" | wc -l )" + log "exported ${args[project]}" "file: ${args[--output]}" "rows: $(wc -l <"${args[--output]}")" fi fi } @@ -633,6 +791,13 @@ parse_requirements() { -* ) ;; + batch ) + action="batch" + shift + orcli_batch_parse_requirements "$@" + shift $# + ;; + import ) action="import" shift @@ -694,6 +859,87 @@ parse_requirements() { # :command.user_filter } +# :command.parse_requirements +orcli_batch_parse_requirements() { + # :command.fixed_flags_filter + case "${1:-}" in + --help | -h ) + long_usage=yes + orcli_batch_usage + exit + ;; + + esac + # :command.environment_variables_filter + # :command.dependencies_filter + # :command.command_filter + action="batch" + # :command.parse_requirements_while + while [[ $# -gt 0 ]]; do + key="$1" + case "$key" in + # :flag.case + --memory ) + # :flag.conflicts + if [[ -n ${2+x} ]]; then + # :flag.validations + args[--memory]="$2" + shift + shift + else + printf "%s\n" "--memory requires an argument: --memory RAM" + exit 1 + fi + ;; + + # :flag.case + --port ) + # :flag.conflicts + if [[ -n ${2+x} ]]; then + # :flag.validations + args[--port]="$2" + shift + shift + else + printf "%s\n" "--port requires an argument: --port PORT" + exit 1 + fi + ;; + + # :flag.case + --quiet | -q ) + # :flag.conflicts + args[--quiet]=1 + shift + ;; + + -?* ) + other_args+=("$1") + shift + ;; + + * ) + # :command.parse_requirements_case + other_args+=("$1") + shift + ;; + + esac + done + # :command.required_args_filter + # :command.required_flags_filter + # :command.catch_all_filter + if [[ ${#other_args[@]} -eq 0 ]]; then + printf "missing required argument: ORCLI COMMANDS...\nusage: orcli batch [options] ORCLI COMMANDS...\n" + exit 1 + fi + # :command.default_assignments + [[ -n ${args[--memory]:-} ]] || args[--memory]="2048M" + [[ -n ${args[--port]:-} ]] || args[--port]="3333" + # :command.whitelist_filter + # :command.user_filter +} + # :command.parse_requirements orcli_import_parse_requirements() { # :command.fixed_flags_filter @@ -822,6 +1068,13 @@ orcli_import_csv_parse_requirements() { fi ;; + # :flag.case + --quiet | -q ) + # :flag.conflicts + args[--quiet]=1 + shift + ;; + -?* ) printf "invalid option: %s\n" "$key" exit 1 @@ -870,6 +1123,12 @@ orcli_list_parse_requirements() { while [[ $# -gt 0 ]]; do key="$1" case "$key" in + # :flag.case + --quiet | -q ) + # :flag.conflicts + args[--quiet]=1 + shift + ;; -?* ) printf "invalid option: %s\n" "$key" @@ -911,6 +1170,12 @@ orcli_info_parse_requirements() { while [[ $# -gt 0 ]]; do key="$1" case "$key" in + # :flag.case + --quiet | -q ) + # :flag.conflicts + args[--quiet]=1 + shift + ;; -?* ) printf "invalid option: %s\n" "$key" @@ -933,7 +1198,7 @@ orcli_info_parse_requirements() { done # :command.required_args_filter if [[ -z ${args[project]+x} ]]; then - printf "missing required argument: PROJECT\nusage: orcli info PROJECT\n" + printf "missing required argument: PROJECT\nusage: orcli info PROJECT [options]\n" exit 1 fi # :command.required_flags_filter @@ -1050,6 +1315,13 @@ orcli_export_tsv_parse_requirements() { fi ;; + # :flag.case + --quiet | -q ) + # :flag.conflicts + args[--quiet]=1 + shift + ;; + -?* ) printf "invalid option: %s\n" "$key" exit 1 @@ -1099,7 +1371,15 @@ run() { normalize_input "$@" parse_requirements "${input[@]}" - if [[ $action == "import" ]]; then + if [[ $action == "batch" ]]; then + if [[ ${args[--help]:-} ]]; then + long_usage=yes + orcli_batch_usage + else + orcli_batch_command + fi + + elif [[ $action == "import" ]]; then if [[ ${args[--help]:-} ]]; then long_usage=yes orcli_import_usage diff --git a/src/bashly.yml b/src/bashly.yml index 790c03d..a231a57 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -13,14 +13,55 @@ environment_variables: default: "http://localhost:3333" examples: - - orcli import csv file - - orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" + - orcli import csv "https://git.io/fj5hF" --projectName "duplicates" - orcli list - - orcli info "doaj article sample csv" - - orcli export tsv "doaj article sample csv" - - orcli export tsv "doaj article sample csv" --output doaj.tsv + - orcli info "duplicates" + - orcli export tsv "duplicates" + - orcli export tsv "duplicates" --output "duplicates.tsv" + - |- + orcli batch \\\\ + import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\ + info "duplicates" \\\\ + export tsv "duplicates" commands: + - name: batch + help: start tmp OpenRefine workspace and run multiple orcli commands + catch_all: + label: orcli commands + help: |- + provide orcli commands without further separators (see examples below) + avoid "import" "info" "list" "transform" "export" in file or project names + use bash -c to execute custom commands + required: true + flags: + - long: --memory + help: maximum RAM for OpenRefine java heap space + arg: ram + default: "2048M" + - long: --port + help: PORT on which OpenRefine should listen + arg: port + default: "3333" + - long: --quiet + short: -q + help: suppress log output, print errors only + examples: + - |- + orcli batch \\\\ + import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\ + info "duplicates" \\\\ + export tsv "duplicates" + - |- + orcli batch --memory "2000M" --port "3334" \\\\ + import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\ + export tsv "duplicates" + - |- + orcli batch --quiet \\\\ + import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\ + export tsv "duplicates" --output "output/duplicates.tsv" \\\\ + bash -c 'wc -l output/*; echo "finished" in \$SECONDS seconds' + - name: import help: import commands @@ -45,14 +86,27 @@ commands: - long: --projectName arg: projectName help: set a name for the OpenRefine project + - long: --quiet + short: -q + help: suppress log output, print errors only examples: - - orcli import csv file - - cat file | orcli import csv - - orcli import csv file --separator ; --encoding ISO-8859-1 --trimStrings --projectName example - - orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" + - orcli import csv "file" + - orcli import csv "file1" "file2" + - cat "file" | orcli import csv + - orcli import csv "https://git.io/fj5hF" + - |- + orcli import csv "file" \\\\ + --separator ";" \\\\ + --encoding "ISO-8859-1" \\\\ + --trimStrings \\\\ + --projectName "duplicates" - name: list help: list projects on OpenRefine server + flags: + - long: --quiet + short: -q + help: suppress log output, print errors only - name: info help: show project metadata @@ -60,8 +114,12 @@ commands: - name: project help: project name or id required: true + flags: + - long: --quiet + short: -q + help: suppress log output, print errors only examples: - - info Clipboard + - info "duplicates" - info 1234567890123 - name: export @@ -82,6 +140,9 @@ commands: help: set character encoding arg: encoding default: "UTF-8" + - long: --quiet + short: -q + help: suppress log output, print errors only examples: - - orcli export tsv Clipboard - - orcli export tsv Clipboard --output clipboard.tsv + - orcli export tsv "duplicates" + - orcli export tsv "duplicates" --output "duplicates.tsv" diff --git a/src/batch_command.sh b/src/batch_command.sh new file mode 100644 index 0000000..8d2e0d1 --- /dev/null +++ b/src/batch_command.sh @@ -0,0 +1,64 @@ +# shellcheck shell=bash disable=SC2154 + +# locate orcli and OpenRefine +if command -v orcli &>/dev/null; then + orcli="orcli" +elif [[ -x "orcli" ]]; then + orcli="./orcli" +else + error "orcli is not executable!" "Try: chmod + ./orcli" +fi +if [[ -x "refine" ]]; then + openrefine="./refine" +else + error "OpenRefine's startup script (refine) not found!" "Did you put orcli in your OpenRefine app dir?" +fi + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "$tmpdir"; }' 0 2 3 15 + +# update OPENREFINE_URL env +OPENREFINE_URL="http://localhost:${args[--port]}" + +# check if OpenRefine is already running +if curl -fs "${OPENREFINE_URL}" &>/dev/null; then + error "OpenRefine is already running on port ${args[--port]}." "Hint: Stop the other process or use another port." +fi + +# start OpenRefine with tmp workspace and autosave period 25 hours +$openrefine -d "$tmpdir" -m "${args[--memory]}" -p "${args[--port]}" -x refine.autosave=1440 -v warn &>"$tmpdir/openrefine.log" & +openrefine_pid="$!" + +# update trap to kill OpenRefine on error or exit +trap '{ rm -rf "$tmpdir"; kill -9 "$openrefine_pid"; }' 0 2 3 15 + +# wait until OpenRefine is running (timeout 20s) +if ! curl -fs --retry 20 --retry-connrefused --retry-delay 1 "${OPENREFINE_URL}/command/core/get-version" &>/dev/null; then + error "starting OpenRefine server failed!" +else + log "started OpenRefine" "port: ${args[--port]}" "memory: ${args[--memory]}" "tmpdir: ${tmpdir}" "pid: ${openrefine_pid}" +fi + +# assemble command groups from catch-all +i=0 +for arg in "${other_args[@]}"; do + if [[ $arg =~ ^(bash|import|info|list|transform|export)$ ]]; then + ((i = i + 1)) + groups+=("group$i") + fi + declare -a group${i}+="(\"$arg\")" +done + +# call command for each group +for group in "${groups[@]}"; do + declare arrayRef="${group}[@]" + command=("${!arrayRef}") + if [[ ${command[0]} == "bash" ]]; then + "${command[@]}" + elif [[ ${args[--quiet]} ]]; then + "$orcli" "${command[@]}" --quiet + else + "$orcli" "${command[@]}" + fi +done diff --git a/src/export_tsv_command.sh b/src/export_tsv_command.sh index 635c704..b92d3c3 100644 --- a/src/export_tsv_command.sh +++ b/src/export_tsv_command.sh @@ -27,9 +27,9 @@ if [[ ${args[--output]} ]]; then curloptions+=("${args[--output]}") fi if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then - error "export of ${args[project]} failed!" + error "exporting ${args[project]} failed!" else if [[ ${args[--output]} ]]; then - log "export of ${args[project]} successful" "file:${args[--output]}" "rows:$(cat "${args[--output]}" | wc -l )" + log "exported ${args[project]}" "file: ${args[--output]}" "rows: $(wc -l <"${args[--output]}")" fi fi diff --git a/src/lib/logging.sh b/src/lib/logging.sh index a577df0..a2a0a1f 100644 --- a/src/lib/logging.sh +++ b/src/lib/logging.sh @@ -3,11 +3,13 @@ function error() { echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] ERROR: $1" shift - for msg in "$@"; do echo >&2 "$msg"; done + for msg in "$@"; do echo >&2 " $msg"; done exit 1 } function log() { - echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1" - shift - for msg in "$@"; do echo >&2 "$msg"; done + if ! [[ ${args[--quiet]} ]]; then + echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1" + shift + for msg in "$@"; do echo >&2 " $msg"; done + fi } diff --git a/src/lib/post_import.sh b/src/lib/post_import.sh index aac2bbc..245f8f5 100644 --- a/src/lib/post_import.sh +++ b/src/lib/post_import.sh @@ -11,19 +11,19 @@ function post_import() { echo "$d" done) if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then - error "import of ${args[file]} failed!" + error "importing ${args[file]} failed!" fi # validate projectid=$(cut -d '=' -f 2 <<<"$redirect_url") if [[ ${#projectid} != 13 ]]; then - error "import of ${args[file]} failed!" + error "importing ${args[file]} failed!" fi projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) projectname="${projectname:1:${#projectname}-2}" rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) if [[ "$rows" = "0" ]]; then - error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" + error "import of ${args[file]} contains 0 rows!" else - log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" + log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}" fi } diff --git a/src/list_command.sh b/src/list_command.sh index 6b26e36..27d8c33 100644 --- a/src/list_command.sh +++ b/src/list_command.sh @@ -4,7 +4,7 @@ if ! response="$(curl -fs --get "${OPENREFINE_URL}/command/core/get-all-project- error "no OpenRefine reachable/running at ${OPENREFINE_URL}" else if [[ "${response}" == '{"projects":{}}' ]]; then - log "${OPENREFINE_URL} contains zero projects" + log "${OPENREFINE_URL} does not contain any projects yet." else echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"' fi