first draft batch processing

This commit is contained in:
felixlohmeier 2022-04-20 10:27:53 +00:00
parent a2b921895b
commit 6979c41e8a
8 changed files with 476 additions and 65 deletions

View File

@ -56,9 +56,11 @@ Usage:
orcli --version | -v orcli --version | -v
Commands: Commands:
info show project metadata batch start tmp OpenRefine workspace and run multiple orcli commands
import import commands import import commands
list list projects on OpenRefine server list list projects on OpenRefine server
info show project metadata
export export commands
Options: Options:
--help, -h --help, -h
@ -73,12 +75,15 @@ Environment Variables:
Default: http://localhost:3333 Default: http://localhost:3333
Examples: Examples:
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli list orcli list
orcli import csv file orcli info "duplicates"
orcli import csv orcli export tsv "duplicates"
"https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" orcli export tsv "duplicates" --output "duplicates.tsv"
orcli info Clipboard orcli batch \
orcli info 1234567890123 import csv "https://git.io/fj5hF" --projectName "duplicates" \
info "duplicates" \
export tsv "duplicates"
https://github.com/opencultureconsulting/orcli https://github.com/opencultureconsulting/orcli
``` ```
@ -95,9 +100,8 @@ gem install bashly
2. Edit code in [src](src) directory 2. Edit code in [src](src) directory
3. Validate and generate script 3. Generate script
```sh ```sh
bashly validate
bashly generate bashly generate
``` ```

348
orcli
View File

@ -34,6 +34,7 @@ orcli_usage() {
echo echo
# :command.usage_commands # :command.usage_commands
printf "Commands:\n" printf "Commands:\n"
echo " batch start tmp OpenRefine workspace and run multiple orcli commands"
echo " import import commands" echo " import import commands"
echo " list list projects on OpenRefine server" echo " list list projects on OpenRefine server"
echo " info show project metadata" echo " info show project metadata"
@ -61,12 +62,12 @@ orcli_usage() {
# :command.usage_examples # :command.usage_examples
printf "Examples:\n" printf "Examples:\n"
printf " orcli import csv file\n" printf " orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n"
printf " orcli import csv\n \"https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv\"\n"
printf " orcli list\n" printf " orcli list\n"
printf " orcli info \"doaj article sample csv\"\n" printf " orcli info \"duplicates\"\n"
printf " orcli export tsv \"doaj article sample csv\"\n" printf " orcli export tsv \"duplicates\"\n"
printf " orcli export tsv \"doaj article sample csv\" --output doaj.tsv\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli batch \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n info \"duplicates\" \\\\\n export tsv \"duplicates\"\n"
echo echo
# :command.footer # :command.footer
printf "https://github.com/opencultureconsulting/orcli\n" printf "https://github.com/opencultureconsulting/orcli\n"
@ -75,6 +76,64 @@ orcli_usage() {
fi fi
} }
# :command.usage
orcli_batch_usage() {
if [[ -n $long_usage ]]; then
printf "orcli batch - start tmp OpenRefine workspace and run multiple orcli commands\n"
echo
else
printf "orcli batch - start tmp OpenRefine workspace and run multiple orcli commands\n"
echo
fi
printf "Usage:\n"
printf " orcli batch [options] ORCLI COMMANDS...\n"
printf " orcli batch --help | -h\n"
echo
if [[ -n $long_usage ]]; then
printf "Options:\n"
# :command.usage_fixed_flags
echo " --help, -h"
printf " Show this help\n"
echo
# :command.usage_flags
# :flag.usage
echo " --memory RAM"
printf " maximum RAM for OpenRefine java heap space\n"
printf " Default: 2048M\n"
echo
# :flag.usage
echo " --port PORT"
printf " PORT on which OpenRefine should listen\n"
printf " Default: 3333\n"
echo
# :flag.usage
echo " --quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_args
printf "Arguments:\n"
echo " ORCLI COMMANDS..."
printf " provide orcli commands without further separators (see examples below)\n avoid \"import\" \"info\" \"list\" \"transform\" \"export\" in file or project names\n use bash -c to execute custom commands\n"
echo
# :command.usage_examples
printf "Examples:\n"
printf " orcli batch \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n info \"duplicates\" \\\\\n export tsv \"duplicates\"\n"
printf " orcli batch --memory \"2000M\" --port \"3334\" \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n export tsv \"duplicates\"\n"
printf " orcli batch --quiet \\\\\n import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" \\\\\n export tsv \"duplicates\" --output \"output/duplicates.tsv\" \\\\\n bash -c 'wc -l output/*; echo \"finished\" in \$SECONDS seconds'\n"
echo
fi
}
# :command.usage # :command.usage
orcli_import_usage() { orcli_import_usage() {
if [[ -n $long_usage ]]; then if [[ -n $long_usage ]]; then
@ -150,6 +209,11 @@ orcli_import_csv_usage() {
echo " --projectName PROJECTNAME" echo " --projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n" printf " set a name for the OpenRefine project\n"
echo echo
# :flag.usage
echo " --quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_args # :command.usage_args
printf "Arguments:\n" printf "Arguments:\n"
@ -162,10 +226,11 @@ orcli_import_csv_usage() {
# :command.usage_examples # :command.usage_examples
printf "Examples:\n" printf "Examples:\n"
printf " orcli import csv file\n" printf " orcli import csv \"file\"\n"
printf " cat file | orcli import csv\n" printf " orcli import csv \"file1\" \"file2\"\n"
printf " orcli import csv file --separator ; --encoding ISO-8859-1 --trimStrings\n --projectName example\n" printf " cat \"file\" | orcli import csv\n"
printf " orcli import csv\n \"https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv\"\n" printf " orcli import csv \"https://git.io/fj5hF\"\n"
printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --encoding \"ISO-8859-1\" \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n"
echo echo
fi fi
@ -184,7 +249,7 @@ orcli_list_usage() {
fi fi
printf "Usage:\n" printf "Usage:\n"
printf " orcli list\n" printf " orcli list [options]\n"
printf " orcli list --help | -h\n" printf " orcli list --help | -h\n"
echo echo
@ -194,6 +259,11 @@ orcli_list_usage() {
echo " --help, -h" echo " --help, -h"
printf " Show this help\n" printf " Show this help\n"
echo echo
# :command.usage_flags
# :flag.usage
echo " --quiet, -q"
printf " suppress log output, print errors only\n"
echo
fi fi
} }
@ -211,7 +281,7 @@ orcli_info_usage() {
fi fi
printf "Usage:\n" printf "Usage:\n"
printf " orcli info PROJECT\n" printf " orcli info PROJECT [options]\n"
printf " orcli info --help | -h\n" printf " orcli info --help | -h\n"
echo echo
@ -221,7 +291,11 @@ orcli_info_usage() {
echo " --help, -h" echo " --help, -h"
printf " Show this help\n" printf " Show this help\n"
echo echo
# :command.usage_flags
# :flag.usage
echo " --quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_args # :command.usage_args
printf "Arguments:\n" printf "Arguments:\n"
@ -233,7 +307,7 @@ orcli_info_usage() {
# :command.usage_examples # :command.usage_examples
printf "Examples:\n" printf "Examples:\n"
printf " info Clipboard\n" printf " info \"duplicates\"\n"
printf " info 1234567890123\n" printf " info 1234567890123\n"
echo echo
@ -305,6 +379,11 @@ orcli_export_tsv_usage() {
printf " set character encoding\n" printf " set character encoding\n"
printf " Default: UTF-8\n" printf " Default: UTF-8\n"
echo echo
# :flag.usage
echo " --quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_args # :command.usage_args
printf "Arguments:\n" printf "Arguments:\n"
@ -316,8 +395,8 @@ orcli_export_tsv_usage() {
# :command.usage_examples # :command.usage_examples
printf "Examples:\n" printf "Examples:\n"
printf " orcli export tsv Clipboard\n" printf " orcli export tsv \"duplicates\"\n"
printf " orcli export tsv Clipboard --output clipboard.tsv\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n"
echo echo
fi fi
@ -409,9 +488,7 @@ function get_id() {
# common import tasks to support multiple files and URLs # common import tasks to support multiple files and URLs
# shellcheck shell=bash # shellcheck shell=bash
function init_import() { function init_import() {
local files local files file tmpdir
local file
local tmpdir
# catch args, convert the space delimited string to an array # catch args, convert the space delimited string to an array
files=() files=()
eval "files=(${args[file]})" eval "files=(${args[file]})"
@ -421,16 +498,27 @@ function init_import() {
# download files if name starts with http:// or https:// # download files if name starts with http:// or https://
for i in "${!files[@]}"; do for i in "${!files[@]}"; do
if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]##*/}"; then if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "download of ${files[$i]} failed!" error "download of ${files[$i]} failed!"
fi fi
files[$i]="${tmpdir}/${files[$i]##*/}" files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# read pipes if name starts with /dev/fd
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "/dev/fd"* ]]; then
if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "reading of ${files[$i]} failed!"
fi
files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi fi
done done
# create a zip archive if there are multiple files # create a zip archive if there are multiple files
if [[ ${#files[@]} -gt 1 ]]; then if [[ ${#files[@]} -gt 1 ]]; then
file="$tmpdir/Untitled.zip" file="$tmpdir/Untitled.zip"
zip "$file" "${files[@]}" if ! zip --quiet --must-match "$file" "${files[@]}"; then
error "creating zip archive with ${files[*]} failed!"
fi
else else
file="${files[0]}" file="${files[0]}"
fi fi
@ -461,13 +549,15 @@ function init_import() {
function error() { function error() {
echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] ERROR: $1" echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] ERROR: $1"
shift shift
for msg in "$@"; do echo >&2 "$msg"; done for msg in "$@"; do echo >&2 " $msg"; done
exit 1 exit 1
} }
function log() { function log() {
echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1" if ! [[ ${args[--quiet]} ]]; then
shift echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1"
for msg in "$@"; do echo >&2 "$msg"; done shift
for msg in "$@"; do echo >&2 " $msg"; done
fi
} }
# src/lib/post_import.sh # src/lib/post_import.sh
@ -484,24 +574,92 @@ function post_import() {
echo "$d" echo "$d"
done) done)
if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then
error "import of ${args[file]} failed!" error "importing ${args[file]} failed!"
fi fi
# validate # validate
projectid=$(cut -d '=' -f 2 <<<"$redirect_url") projectid=$(cut -d '=' -f 2 <<<"$redirect_url")
if [[ ${#projectid} != 13 ]]; then if [[ ${#projectid} != 13 ]]; then
error "import of ${args[file]} failed!" error "importing ${args[file]} failed!"
fi fi
projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2)
projectname="${projectname:1:${#projectname}-2}" projectname="${projectname:1:${#projectname}-2}"
rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2)
if [[ "$rows" = "0" ]]; then if [[ "$rows" = "0" ]]; then
error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" error "import of ${args[file]} contains 0 rows!"
else else
log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}"
fi fi
} }
# :command.command_functions # :command.command_functions
# :command.function
orcli_batch_command() {
# src/batch_command.sh
# shellcheck shell=bash disable=SC2154
# locate orcli and OpenRefine
if command -v orcli &>/dev/null; then
orcli="orcli"
elif [[ -x "orcli" ]]; then
orcli="./orcli"
else
error "orcli is not executable!" "Try: chmod + ./orcli"
fi
if [[ -x "refine" ]]; then
openrefine="./refine"
else
error "OpenRefine's startup script (refine) not found!" "Did you put orcli in your OpenRefine app dir?"
fi
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "$tmpdir"; }' 0 2 3 15
# update OPENREFINE_URL env
OPENREFINE_URL="http://localhost:${args[--port]}"
# check if OpenRefine is already running
if curl -fs "${OPENREFINE_URL}" &>/dev/null; then
error "OpenRefine is already running on port ${args[--port]}." "Hint: Stop the other process or use another port."
fi
# start OpenRefine with tmp workspace and autosave period 25 hours
$openrefine -d "$tmpdir" -m "${args[--memory]}" -p "${args[--port]}" -x refine.autosave=1440 -v warn &>"$tmpdir/openrefine.log" &
openrefine_pid="$!"
# update trap to kill OpenRefine on error or exit
trap '{ rm -rf "$tmpdir"; kill -9 "$openrefine_pid"; }' 0 2 3 15
# wait until OpenRefine is running (timeout 20s)
if ! curl -fs --retry 20 --retry-connrefused --retry-delay 1 "${OPENREFINE_URL}/command/core/get-version" &>/dev/null; then
error "starting OpenRefine server failed!"
else
log "started OpenRefine" "port: ${args[--port]}" "memory: ${args[--memory]}" "tmpdir: ${tmpdir}" "pid: ${openrefine_pid}"
fi
# assemble command groups from catch-all
i=0
for arg in "${other_args[@]}"; do
if [[ $arg =~ ^(bash|import|info|list|transform|export)$ ]]; then
((i = i + 1))
groups+=("group$i")
fi
declare -a group${i}+="(\"$arg\")"
done
# call command for each group
for group in "${groups[@]}"; do
declare arrayRef="${group}[@]"
command=("${!arrayRef}")
if [[ ${command[0]} == "bash" ]]; then
"${command[@]}"
elif [[ ${args[--quiet]} ]]; then
"$orcli" "${command[@]}" --quiet
else
"$orcli" "${command[@]}"
fi
done
}
# :command.function # :command.function
orcli_import_csv_command() { orcli_import_csv_command() {
@ -545,7 +703,7 @@ orcli_list_command() {
error "no OpenRefine reachable/running at ${OPENREFINE_URL}" error "no OpenRefine reachable/running at ${OPENREFINE_URL}"
else else
if [[ "${response}" == '{"projects":{}}' ]]; then if [[ "${response}" == '{"projects":{}}' ]]; then
log "${OPENREFINE_URL} contains zero projects" log "${OPENREFINE_URL} does not contain any projects yet."
else else
echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"' echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"'
fi fi
@ -591,10 +749,10 @@ orcli_export_tsv_command() {
curloptions+=("${args[--output]}") curloptions+=("${args[--output]}")
fi fi
if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then
error "export of ${args[project]} failed!" error "exporting ${args[project]} failed!"
else else
if [[ ${args[--output]} ]]; then if [[ ${args[--output]} ]]; then
log "export of ${args[project]} successful" "file:${args[--output]}" "rows:$(cat "${args[--output]}" | wc -l )" log "exported ${args[project]}" "file: ${args[--output]}" "rows: $(wc -l <"${args[--output]}")"
fi fi
fi fi
} }
@ -633,6 +791,13 @@ parse_requirements() {
-* ) -* )
;; ;;
batch )
action="batch"
shift
orcli_batch_parse_requirements "$@"
shift $#
;;
import ) import )
action="import" action="import"
shift shift
@ -694,6 +859,87 @@ parse_requirements() {
# :command.user_filter # :command.user_filter
} }
# :command.parse_requirements
orcli_batch_parse_requirements() {
# :command.fixed_flags_filter
case "${1:-}" in
--help | -h )
long_usage=yes
orcli_batch_usage
exit
;;
esac
# :command.environment_variables_filter
# :command.dependencies_filter
# :command.command_filter
action="batch"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--memory )
# :flag.conflicts
if [[ -n ${2+x} ]]; then
# :flag.validations
args[--memory]="$2"
shift
shift
else
printf "%s\n" "--memory requires an argument: --memory RAM"
exit 1
fi
;;
# :flag.case
--port )
# :flag.conflicts
if [[ -n ${2+x} ]]; then
# :flag.validations
args[--port]="$2"
shift
shift
else
printf "%s\n" "--port requires an argument: --port PORT"
exit 1
fi
;;
# :flag.case
--quiet | -q )
# :flag.conflicts
args[--quiet]=1
shift
;;
-?* )
other_args+=("$1")
shift
;;
* )
# :command.parse_requirements_case
other_args+=("$1")
shift
;;
esac
done
# :command.required_args_filter
# :command.required_flags_filter
# :command.catch_all_filter
if [[ ${#other_args[@]} -eq 0 ]]; then
printf "missing required argument: ORCLI COMMANDS...\nusage: orcli batch [options] ORCLI COMMANDS...\n"
exit 1
fi
# :command.default_assignments
[[ -n ${args[--memory]:-} ]] || args[--memory]="2048M"
[[ -n ${args[--port]:-} ]] || args[--port]="3333"
# :command.whitelist_filter
# :command.user_filter
}
# :command.parse_requirements # :command.parse_requirements
orcli_import_parse_requirements() { orcli_import_parse_requirements() {
# :command.fixed_flags_filter # :command.fixed_flags_filter
@ -822,6 +1068,13 @@ orcli_import_csv_parse_requirements() {
fi fi
;; ;;
# :flag.case
--quiet | -q )
# :flag.conflicts
args[--quiet]=1
shift
;;
-?* ) -?* )
printf "invalid option: %s\n" "$key" printf "invalid option: %s\n" "$key"
exit 1 exit 1
@ -870,6 +1123,12 @@ orcli_list_parse_requirements() {
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
key="$1" key="$1"
case "$key" in case "$key" in
# :flag.case
--quiet | -q )
# :flag.conflicts
args[--quiet]=1
shift
;;
-?* ) -?* )
printf "invalid option: %s\n" "$key" printf "invalid option: %s\n" "$key"
@ -911,6 +1170,12 @@ orcli_info_parse_requirements() {
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
key="$1" key="$1"
case "$key" in case "$key" in
# :flag.case
--quiet | -q )
# :flag.conflicts
args[--quiet]=1
shift
;;
-?* ) -?* )
printf "invalid option: %s\n" "$key" printf "invalid option: %s\n" "$key"
@ -933,7 +1198,7 @@ orcli_info_parse_requirements() {
done done
# :command.required_args_filter # :command.required_args_filter
if [[ -z ${args[project]+x} ]]; then if [[ -z ${args[project]+x} ]]; then
printf "missing required argument: PROJECT\nusage: orcli info PROJECT\n" printf "missing required argument: PROJECT\nusage: orcli info PROJECT [options]\n"
exit 1 exit 1
fi fi
# :command.required_flags_filter # :command.required_flags_filter
@ -1050,6 +1315,13 @@ orcli_export_tsv_parse_requirements() {
fi fi
;; ;;
# :flag.case
--quiet | -q )
# :flag.conflicts
args[--quiet]=1
shift
;;
-?* ) -?* )
printf "invalid option: %s\n" "$key" printf "invalid option: %s\n" "$key"
exit 1 exit 1
@ -1099,7 +1371,15 @@ run() {
normalize_input "$@" normalize_input "$@"
parse_requirements "${input[@]}" parse_requirements "${input[@]}"
if [[ $action == "import" ]]; then if [[ $action == "batch" ]]; then
if [[ ${args[--help]:-} ]]; then
long_usage=yes
orcli_batch_usage
else
orcli_batch_command
fi
elif [[ $action == "import" ]]; then
if [[ ${args[--help]:-} ]]; then if [[ ${args[--help]:-} ]]; then
long_usage=yes long_usage=yes
orcli_import_usage orcli_import_usage

View File

@ -13,14 +13,55 @@ environment_variables:
default: "http://localhost:3333" default: "http://localhost:3333"
examples: examples:
- orcli import csv file - orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
- orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv"
- orcli list - orcli list
- orcli info "doaj article sample csv" - orcli info "duplicates"
- orcli export tsv "doaj article sample csv" - orcli export tsv "duplicates"
- orcli export tsv "doaj article sample csv" --output doaj.tsv - orcli export tsv "duplicates" --output "duplicates.tsv"
- |-
orcli batch \\\\
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
info "duplicates" \\\\
export tsv "duplicates"
commands: commands:
- name: batch
help: start tmp OpenRefine workspace and run multiple orcli commands
catch_all:
label: orcli commands
help: |-
provide orcli commands without further separators (see examples below)
avoid "import" "info" "list" "transform" "export" in file or project names
use bash -c to execute custom commands
required: true
flags:
- long: --memory
help: maximum RAM for OpenRefine java heap space
arg: ram
default: "2048M"
- long: --port
help: PORT on which OpenRefine should listen
arg: port
default: "3333"
- long: --quiet
short: -q
help: suppress log output, print errors only
examples:
- |-
orcli batch \\\\
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
info "duplicates" \\\\
export tsv "duplicates"
- |-
orcli batch --memory "2000M" --port "3334" \\\\
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
export tsv "duplicates"
- |-
orcli batch --quiet \\\\
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
export tsv "duplicates" --output "output/duplicates.tsv" \\\\
bash -c 'wc -l output/*; echo "finished" in \$SECONDS seconds'
- name: import - name: import
help: import commands help: import commands
@ -45,14 +86,27 @@ commands:
- long: --projectName - long: --projectName
arg: projectName arg: projectName
help: set a name for the OpenRefine project help: set a name for the OpenRefine project
- long: --quiet
short: -q
help: suppress log output, print errors only
examples: examples:
- orcli import csv file - orcli import csv "file"
- cat file | orcli import csv - orcli import csv "file1" "file2"
- orcli import csv file --separator ; --encoding ISO-8859-1 --trimStrings --projectName example - cat "file" | orcli import csv
- orcli import csv "https://github.com/LibraryCarpentry/lc-open-refine/raw/gh-pages/data/doaj-article-sample.csv" - orcli import csv "https://git.io/fj5hF"
- |-
orcli import csv "file" \\\\
--separator ";" \\\\
--encoding "ISO-8859-1" \\\\
--trimStrings \\\\
--projectName "duplicates"
- name: list - name: list
help: list projects on OpenRefine server help: list projects on OpenRefine server
flags:
- long: --quiet
short: -q
help: suppress log output, print errors only
- name: info - name: info
help: show project metadata help: show project metadata
@ -60,8 +114,12 @@ commands:
- name: project - name: project
help: project name or id help: project name or id
required: true required: true
flags:
- long: --quiet
short: -q
help: suppress log output, print errors only
examples: examples:
- info Clipboard - info "duplicates"
- info 1234567890123 - info 1234567890123
- name: export - name: export
@ -82,6 +140,9 @@ commands:
help: set character encoding help: set character encoding
arg: encoding arg: encoding
default: "UTF-8" default: "UTF-8"
- long: --quiet
short: -q
help: suppress log output, print errors only
examples: examples:
- orcli export tsv Clipboard - orcli export tsv "duplicates"
- orcli export tsv Clipboard --output clipboard.tsv - orcli export tsv "duplicates" --output "duplicates.tsv"

64
src/batch_command.sh Normal file
View File

@ -0,0 +1,64 @@
# shellcheck shell=bash disable=SC2154
# locate orcli and OpenRefine
if command -v orcli &>/dev/null; then
orcli="orcli"
elif [[ -x "orcli" ]]; then
orcli="./orcli"
else
error "orcli is not executable!" "Try: chmod + ./orcli"
fi
if [[ -x "refine" ]]; then
openrefine="./refine"
else
error "OpenRefine's startup script (refine) not found!" "Did you put orcli in your OpenRefine app dir?"
fi
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "$tmpdir"; }' 0 2 3 15
# update OPENREFINE_URL env
OPENREFINE_URL="http://localhost:${args[--port]}"
# check if OpenRefine is already running
if curl -fs "${OPENREFINE_URL}" &>/dev/null; then
error "OpenRefine is already running on port ${args[--port]}." "Hint: Stop the other process or use another port."
fi
# start OpenRefine with tmp workspace and autosave period 25 hours
$openrefine -d "$tmpdir" -m "${args[--memory]}" -p "${args[--port]}" -x refine.autosave=1440 -v warn &>"$tmpdir/openrefine.log" &
openrefine_pid="$!"
# update trap to kill OpenRefine on error or exit
trap '{ rm -rf "$tmpdir"; kill -9 "$openrefine_pid"; }' 0 2 3 15
# wait until OpenRefine is running (timeout 20s)
if ! curl -fs --retry 20 --retry-connrefused --retry-delay 1 "${OPENREFINE_URL}/command/core/get-version" &>/dev/null; then
error "starting OpenRefine server failed!"
else
log "started OpenRefine" "port: ${args[--port]}" "memory: ${args[--memory]}" "tmpdir: ${tmpdir}" "pid: ${openrefine_pid}"
fi
# assemble command groups from catch-all
i=0
for arg in "${other_args[@]}"; do
if [[ $arg =~ ^(bash|import|info|list|transform|export)$ ]]; then
((i = i + 1))
groups+=("group$i")
fi
declare -a group${i}+="(\"$arg\")"
done
# call command for each group
for group in "${groups[@]}"; do
declare arrayRef="${group}[@]"
command=("${!arrayRef}")
if [[ ${command[0]} == "bash" ]]; then
"${command[@]}"
elif [[ ${args[--quiet]} ]]; then
"$orcli" "${command[@]}" --quiet
else
"$orcli" "${command[@]}"
fi
done

View File

@ -27,9 +27,9 @@ if [[ ${args[--output]} ]]; then
curloptions+=("${args[--output]}") curloptions+=("${args[--output]}")
fi fi
if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then
error "export of ${args[project]} failed!" error "exporting ${args[project]} failed!"
else else
if [[ ${args[--output]} ]]; then if [[ ${args[--output]} ]]; then
log "export of ${args[project]} successful" "file:${args[--output]}" "rows:$(cat "${args[--output]}" | wc -l )" log "exported ${args[project]}" "file: ${args[--output]}" "rows: $(wc -l <"${args[--output]}")"
fi fi
fi fi

View File

@ -3,11 +3,13 @@
function error() { function error() {
echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] ERROR: $1" echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] ERROR: $1"
shift shift
for msg in "$@"; do echo >&2 "$msg"; done for msg in "$@"; do echo >&2 " $msg"; done
exit 1 exit 1
} }
function log() { function log() {
echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1" if ! [[ ${args[--quiet]} ]]; then
shift echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1"
for msg in "$@"; do echo >&2 "$msg"; done shift
for msg in "$@"; do echo >&2 " $msg"; done
fi
} }

View File

@ -11,19 +11,19 @@ function post_import() {
echo "$d" echo "$d"
done) done)
if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then
error "import of ${args[file]} failed!" error "importing ${args[file]} failed!"
fi fi
# validate # validate
projectid=$(cut -d '=' -f 2 <<<"$redirect_url") projectid=$(cut -d '=' -f 2 <<<"$redirect_url")
if [[ ${#projectid} != 13 ]]; then if [[ ${#projectid} != 13 ]]; then
error "import of ${args[file]} failed!" error "importing ${args[file]} failed!"
fi fi
projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2) projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2)
projectname="${projectname:1:${#projectname}-2}" projectname="${projectname:1:${#projectname}-2}"
rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2) rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2)
if [[ "$rows" = "0" ]]; then if [[ "$rows" = "0" ]]; then
error "import of ${args[file]} contains 0 rows!" "${redirect_url}" "name:${projectname}" "rows:${rows}" error "import of ${args[file]} contains 0 rows!"
else else
log "import of ${args[file]} successful" "${redirect_url}" "name:${projectname}" "rows:${rows}" log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}"
fi fi
} }

View File

@ -4,7 +4,7 @@ if ! response="$(curl -fs --get "${OPENREFINE_URL}/command/core/get-all-project-
error "no OpenRefine reachable/running at ${OPENREFINE_URL}" error "no OpenRefine reachable/running at ${OPENREFINE_URL}"
else else
if [[ "${response}" == '{"projects":{}}' ]]; then if [[ "${response}" == '{"projects":{}}' ]]; then
log "${OPENREFINE_URL} contains zero projects" log "${OPENREFINE_URL} does not contain any projects yet."
else else
echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"' echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"'
fi fi