From 26feff650af10b4fc1c74ad114ea3f0e13134d01 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Thu, 3 Nov 2022 21:07:08 +0000 Subject: [PATCH 1/5] first draft --- README.md | 6 +- orcli | 176 +++++++++++++++++++++++++++++++++++- src/bashly.yml | 21 ++++- src/lib/send_completions.sh | 6 +- src/transform_command.sh | 36 ++++++++ 5 files changed, 238 insertions(+), 7 deletions(-) create mode 100644 src/transform_command.sh diff --git a/README.md b/README.md index a4a05de..cbd141a 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,11 @@ Optional: orcli import csv "https://git.io/fj5hF" --projectName "duplicates" ``` -3. Remove duplicates (coming soon) +3. Remove duplicates by applying an undo/redo JSON file + + ```sh + orcli transform "duplicates" "https://git.io/fj5hF" + ``` 4. Export data from OpenRefine project to tab-separated-values (TSV) file `duplicates.tsv` diff --git a/orcli b/orcli index 136fc88..fd7055b 100755 --- a/orcli +++ b/orcli @@ -38,6 +38,7 @@ orcli_usage() { echo " import commands to create OpenRefine projects from files or URLs" echo " list list projects on OpenRefine server" echo " info show OpenRefine project's metadata" + echo " transform apply undo/redo JSON file(s) to an OpenRefine project" echo " export commands to export data from OpenRefine projects to files" echo " run run tmp OpenRefine workspace and execute shell script(s)" echo @@ -74,6 +75,7 @@ orcli_usage() { printf " orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n" printf " orcli list\n" printf " orcli info \"duplicates\"\n" + printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n" printf " orcli export tsv \"duplicates\"\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" printf " orcli run --interactive\n" @@ -286,8 +288,59 @@ orcli_info_usage() { # :command.usage_examples printf "Examples:\n" - printf " info \"duplicates\"\n" - printf " info 1234567890123\n" + printf " orcli info \"duplicates\"\n" + printf " orcli info 1234567890123\n" + echo + + fi +} + +# :command.usage +orcli_transform_usage() { + if [[ -n $long_usage ]]; then + printf "orcli transform - apply undo/redo JSON file(s) to an OpenRefine project\n" + echo + + else + printf "orcli transform - apply undo/redo JSON file(s) to an OpenRefine project\n" + echo + + fi + + printf "Usage:\n" + printf " orcli transform PROJECT [FILE...]\n" + printf " orcli transform --help | -h\n" + echo + + # :command.long_usage + if [[ -n $long_usage ]]; then + printf "Options:\n" + + # :command.usage_fixed_flags + echo " --help, -h" + printf " Show this help\n" + echo + + # :command.usage_args + printf "Arguments:\n" + + # :argument.usage + echo " PROJECT" + printf " project name or id\n" + echo + + # :argument.usage + echo " FILE..." + printf " Path to one or more files or URLs containing OpenRefine's undo/redo\n operation history in JSON format. When FILE is -, read standard input.\n" + printf " Default: -\n" + echo + + # :command.usage_examples + printf "Examples:\n" + printf " orcli transform \"duplicates\" \"history.json\"\n" + printf " cat \"history.json\" | orcli transform \"duplicates\"\n" + printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n" + printf " orcli transform 1234567890123 \"history.json\"\n" echo fi @@ -693,6 +746,10 @@ send_completions() { echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --output -h")" -- "$cur" )' echo $' ;;' echo $'' + echo $' \'transform\'*)' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )' + echo $' ;;' + echo $'' echo $' \'import\'*)' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv")" -- "$cur" )' echo $' ;;' @@ -714,7 +771,7 @@ send_completions() { echo $' ;;' echo $'' echo $' *)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run transform")" -- "$cur" )' echo $' ;;' echo $'' echo $' esac' @@ -796,6 +853,47 @@ orcli_info_command() { } +# :command.function +orcli_transform_command() { + # src/transform_command.sh + # shellcheck shell=bash disable=SC2154 + #get_id "${args[project]}" + + # check if stdin is present if selected + if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then + if ! read -u 0 -t 0; then + orcli_transform_usage + exit 1 + fi + fi + # catch args, convert the space delimited string to an array + files=() + eval "files=(${args[file]})" + # create tmp directory + tmpdir="$(mktemp -d)" + trap 'rm -rf "$tmpdir"' 0 2 3 15 + # download files if name starts with http:// or https:// + for i in "${!files[@]}"; do + if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then + if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then + error "download of ${files[$i]} failed!" + fi + files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" + fi + done + + # support multiple files and stdin + readarray -t jsonlines < <(cat "${files[@]}" | jq --slurp --compact-output 'add | .[]') + for line in "${jsonlines[@]}"; do + declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" + echo "${data[op]#core/}" + unset "data[op]" + unset "data[description]" + for K in "${!data[@]}"; do echo "$K" --- "${data[$K]}"; done + unset data + done +} + # :command.function orcli_export_tsv_command() { # src/export_tsv_command.sh @@ -1010,6 +1108,13 @@ parse_requirements() { shift $# ;; + transform ) + action="transform" + shift + orcli_transform_parse_requirements "$@" + shift $# + ;; + export ) action="export" shift @@ -1348,6 +1453,63 @@ orcli_info_parse_requirements() { } +# :command.parse_requirements +orcli_transform_parse_requirements() { + # :command.fixed_flags_filter + case "${1:-}" in + --help | -h ) + long_usage=yes + orcli_transform_usage + exit + ;; + + esac + + # :command.command_filter + action="transform" + + # :command.parse_requirements_while + while [[ $# -gt 0 ]]; do + key="$1" + case "$key" in + + -?* ) + printf "invalid option: %s\n" "$key" >&2 + exit 1 + ;; + + * ) + # :command.parse_requirements_case + # :command.parse_requirements_case_repeatable + if [[ -z ${args[project]+x} ]]; then + + args[project]=$1 + shift + elif [[ -z ${args[file]+x} ]]; then + + args[file]="\"$1\"" + shift + else + args[file]="${args[file]} \"$1\"" + shift + fi + + ;; + + esac + done + + # :command.required_args_filter + if [[ -z ${args[project]+x} ]]; then + printf "missing required argument: PROJECT\nusage: orcli transform PROJECT [FILE...]\n" >&2 + exit 1 + fi + + # :command.default_assignments + [[ -n ${args[file]:-} ]] || args[file]="-" + +} + # :command.parse_requirements orcli_export_parse_requirements() { # :command.fixed_flags_filter @@ -1639,6 +1801,14 @@ run() { orcli_info_command fi + elif [[ $action == "transform" ]]; then + if [[ ${args[--help]:-} ]]; then + long_usage=yes + orcli_transform_usage + else + orcli_transform_command + fi + elif [[ $action == "export" ]]; then if [[ ${args[--help]:-} ]]; then long_usage=yes diff --git a/src/bashly.yml b/src/bashly.yml index 320b860..28f1cae 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -16,6 +16,7 @@ examples: - orcli import csv "https://git.io/fj5hF" --projectName "duplicates" - orcli list - orcli info "duplicates" + - orcli transform "duplicates" "https://git.io/fj5ju" - orcli export tsv "duplicates" - orcli export tsv "duplicates" --output "duplicates.tsv" - orcli run --interactive @@ -83,8 +84,24 @@ commands: help: project name or id required: true examples: - - info "duplicates" - - info 1234567890123 + - orcli info "duplicates" + - orcli info 1234567890123 + + - name: transform + help: apply undo/redo JSON file(s) to an OpenRefine project + args: + - name: project + help: project name or id + required: true + - name: file + help: Path to one or more files or URLs containing OpenRefine's undo/redo operation history in JSON format. When FILE is -, read standard input. + default: "-" + repeatable: true + examples: + - orcli transform "duplicates" "history.json" + - cat "history.json" | orcli transform "duplicates" + - orcli transform "duplicates" "https://git.io/fj5ju" + - orcli transform 1234567890123 "history.json" - name: export help: commands to export data from OpenRefine projects to files diff --git a/src/lib/send_completions.sh b/src/lib/send_completions.sh index 4b5f5ed..3771a63 100644 --- a/src/lib/send_completions.sh +++ b/src/lib/send_completions.sh @@ -42,6 +42,10 @@ send_completions() { echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --output -h")" -- "$cur" )' echo $' ;;' echo $'' + echo $' \'transform\'*)' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )' + echo $' ;;' + echo $'' echo $' \'import\'*)' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv")" -- "$cur" )' echo $' ;;' @@ -63,7 +67,7 @@ send_completions() { echo $' ;;' echo $'' echo $' *)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run transform")" -- "$cur" )' echo $' ;;' echo $'' echo $' esac' diff --git a/src/transform_command.sh b/src/transform_command.sh new file mode 100644 index 0000000..31cfd31 --- /dev/null +++ b/src/transform_command.sh @@ -0,0 +1,36 @@ +# shellcheck shell=bash disable=SC2154 +#get_id "${args[project]}" + +# check if stdin is present if selected +if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then + if ! read -u 0 -t 0; then + orcli_transform_usage + exit 1 + fi +fi +# catch args, convert the space delimited string to an array +files=() +eval "files=(${args[file]})" +# create tmp directory +tmpdir="$(mktemp -d)" +trap 'rm -rf "$tmpdir"' 0 2 3 15 +# download files if name starts with http:// or https:// +for i in "${!files[@]}"; do + if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then + if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then + error "download of ${files[$i]} failed!" + fi + files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" + fi +done + +# support multiple files and stdin +readarray -t jsonlines < <(cat "${files[@]}" | jq --slurp --compact-output 'add | .[]') +for line in "${jsonlines[@]}"; do + declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" + echo "${data[op]#core/}" + unset "data[op]" + unset "data[description]" + for K in "${!data[@]}"; do echo "$K" --- "${data[$K]}"; done + unset data +done \ No newline at end of file From cf83aac6928d2c7faee9665dcecec81241091d7a Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Sun, 6 Nov 2022 21:39:40 +0000 Subject: [PATCH 2/5] first try (untested) --- src/transform_command.sh | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/src/transform_command.sh b/src/transform_command.sh index 31cfd31..8ab6573 100644 --- a/src/transform_command.sh +++ b/src/transform_command.sh @@ -1,5 +1,4 @@ # shellcheck shell=bash disable=SC2154 -#get_id "${args[project]}" # check if stdin is present if selected if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then @@ -8,12 +7,15 @@ if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then exit 1 fi fi + # catch args, convert the space delimited string to an array files=() eval "files=(${args[file]})" + # create tmp directory tmpdir="$(mktemp -d)" trap 'rm -rf "$tmpdir"' 0 2 3 15 + # download files if name starts with http:// or https:// for i in "${!files[@]}"; do if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then @@ -24,13 +26,24 @@ for i in "${!files[@]}"; do fi done -# support multiple files and stdin -readarray -t jsonlines < <(cat "${files[@]}" | jq --slurp --compact-output 'add | .[]') -for line in "${jsonlines[@]}"; do - declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" - echo "${data[op]#core/}" - unset "data[op]" - unset "data[description]" - for K in "${!data[@]}"; do echo "$K" --- "${data[$K]}"; done - unset data -done \ No newline at end of file +# support multiple files +for i in "${!files[@]}"; do + # read each operation into one line + mapfile -t jsonlines < <(jq -c '.[]' "${files[$i]}") + for line in "${jsonlines[@]}"; do + # parse operation into curl options + declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" + op="${data[op]#core/}" + unset "data[op]" + unset "data[description]" + mapfile -t curloptions < <(for K in "${!data[@]}"; do + echo "--data-urlencode" + echo "$K={data[$K]}" + done) + # get project id and csrf token; post data to it's individual endpoint + if ! curl -fs --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${op}$(get_csrf)"; then + error "applying ${op} from ${files[$i]} failed!" + fi + unset data + done +done From 51c15ea0e06e2820251cb614e2bfca181d305b02 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Mon, 7 Nov 2022 22:03:20 +0000 Subject: [PATCH 3/5] map operation names to commands --- orcli | 45 ++++++++++++++++++++++++++++++---------- src/transform_command.sh | 19 +++++++++++------ 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/orcli b/orcli index fd7055b..fe993e1 100755 --- a/orcli +++ b/orcli @@ -856,8 +856,7 @@ orcli_info_command() { # :command.function orcli_transform_command() { # src/transform_command.sh - # shellcheck shell=bash disable=SC2154 - #get_id "${args[project]}" + # shellcheck shell=bash disable=SC2154 disable=SC2155 # check if stdin is present if selected if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then @@ -866,12 +865,15 @@ orcli_transform_command() { exit 1 fi fi + # catch args, convert the space delimited string to an array files=() eval "files=(${args[file]})" + # create tmp directory tmpdir="$(mktemp -d)" trap 'rm -rf "$tmpdir"' 0 2 3 15 + # download files if name starts with http:// or https:// for i in "${!files[@]}"; do if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then @@ -882,16 +884,37 @@ orcli_transform_command() { fi done - # support multiple files and stdin - readarray -t jsonlines < <(cat "${files[@]}" | jq --slurp --compact-output 'add | .[]') - for line in "${jsonlines[@]}"; do - declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" - echo "${data[op]#core/}" - unset "data[op]" - unset "data[description]" - for K in "${!data[@]}"; do echo "$K" --- "${data[$K]}"; done - unset data + # support multiple files + for i in "${!files[@]}"; do + # read each operation into one line + mapfile -t jsonlines < <(jq -c '.[]' "${files[$i]}") + for line in "${jsonlines[@]}"; do + # parse operation into curl options + declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" + # map operation name to command + com="${data[op]#core/}" + if [[ $com == "row-reorder" ]]; then com="reorder-rows"; fi + unset "data[op]" + # rename engineConfig to engine + data[engine]="${data[engineConfig]}" + unset "data[engineConfig]" + # drop description + unset "data[description]" + mapfile -t curloptions < <(for K in "${!data[@]}"; do + echo "--data" + echo "$K=${data[$K]}" + done) + # get project id and csrf token; post data to it's individual endpoint + set -x + # debug: remove -fs + if ! curl --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)"; then + error "applying ${op} from ${files[$i]} failed!" + fi + set +x + unset data + done done + } # :command.function diff --git a/src/transform_command.sh b/src/transform_command.sh index 8ab6573..927676a 100644 --- a/src/transform_command.sh +++ b/src/transform_command.sh @@ -1,4 +1,4 @@ -# shellcheck shell=bash disable=SC2154 +# shellcheck shell=bash disable=SC2154 disable=SC2155 # check if stdin is present if selected if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then @@ -33,15 +33,22 @@ for i in "${!files[@]}"; do for line in "${jsonlines[@]}"; do # parse operation into curl options declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" - op="${data[op]#core/}" + # map operation name to command + com="${data[op]#core/}" + if [[ $com == "row-reorder" ]]; then com="reorder-rows"; fi unset "data[op]" + # rename engineConfig to engine + data[engine]="${data[engineConfig]}" + unset "data[engineConfig]" + # drop description unset "data[description]" mapfile -t curloptions < <(for K in "${!data[@]}"; do - echo "--data-urlencode" - echo "$K={data[$K]}" + echo "--data" + echo "$K=${data[$K]}" done) - # get project id and csrf token; post data to it's individual endpoint - if ! curl -fs --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${op}$(get_csrf)"; then + # get project id and csrf token; post data to it's individual endpoint + # debug: remove -fs option temporarily + if ! curl --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)"; then error "applying ${op} from ${files[$i]} failed!" fi unset data From c7d5273b1cd10a5cd10f27e451e0db4c9faa1551 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Tue, 8 Nov 2022 13:10:10 +0000 Subject: [PATCH 4/5] parse historyResponse --- orcli | 14 +++++++------- src/transform_command.sh | 12 +++++++----- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/orcli b/orcli index fe993e1..b07d801 100755 --- a/orcli +++ b/orcli @@ -889,9 +889,9 @@ orcli_transform_command() { # read each operation into one line mapfile -t jsonlines < <(jq -c '.[]' "${files[$i]}") for line in "${jsonlines[@]}"; do - # parse operation into curl options + # parse one line/operation into array declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" - # map operation name to command + # map operation names to command endpoints com="${data[op]#core/}" if [[ $com == "row-reorder" ]]; then com="reorder-rows"; fi unset "data[op]" @@ -900,17 +900,17 @@ orcli_transform_command() { unset "data[engineConfig]" # drop description unset "data[description]" + # prepare curl options mapfile -t curloptions < <(for K in "${!data[@]}"; do echo "--data" echo "$K=${data[$K]}" done) # get project id and csrf token; post data to it's individual endpoint - set -x - # debug: remove -fs - if ! curl --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)"; then - error "applying ${op} from ${files[$i]} failed!" + if response="$(curl -fs --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)")"; then + log "applied ${com} to ${args[project]}" "Response: $(jq '.historyEntry.description' <<< "$response")" + else + error "applying ${com} from ${files[$i]} to ${args[project]} failed!" fi - set +x unset data done done diff --git a/src/transform_command.sh b/src/transform_command.sh index 927676a..51aa9d6 100644 --- a/src/transform_command.sh +++ b/src/transform_command.sh @@ -31,9 +31,9 @@ for i in "${!files[@]}"; do # read each operation into one line mapfile -t jsonlines < <(jq -c '.[]' "${files[$i]}") for line in "${jsonlines[@]}"; do - # parse operation into curl options + # parse one line/operation into array declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" - # map operation name to command + # map operation names to command endpoints com="${data[op]#core/}" if [[ $com == "row-reorder" ]]; then com="reorder-rows"; fi unset "data[op]" @@ -42,14 +42,16 @@ for i in "${!files[@]}"; do unset "data[engineConfig]" # drop description unset "data[description]" + # prepare curl options mapfile -t curloptions < <(for K in "${!data[@]}"; do echo "--data" echo "$K=${data[$K]}" done) # get project id and csrf token; post data to it's individual endpoint - # debug: remove -fs option temporarily - if ! curl --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)"; then - error "applying ${op} from ${files[$i]} failed!" + if response="$(curl -fs --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)")"; then + log "applied ${com} to ${args[project]}" "Response: $(jq '.historyEntry.description' <<< "$response")" + else + error "applying ${com} from ${files[$i]} to ${args[project]} failed!" fi unset data done From cdd92fc1811d1908fba21e6f68004e56874a04cf Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Sun, 13 Nov 2022 21:43:04 +0000 Subject: [PATCH 5/5] add mappings and improve error handling --- orcli | 28 ++++++++++++++++++++++++---- src/bashly.yml | 4 ++-- src/transform_command.sh | 24 ++++++++++++++++++++++-- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/orcli b/orcli index b07d801..8287b81 100755 --- a/orcli +++ b/orcli @@ -79,7 +79,7 @@ orcli_usage() { printf " orcli export tsv \"duplicates\"\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" printf " orcli run --interactive\n" - printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli info \"duplicates\"\n orcli export tsv \"duplicates\"\n EOF\n" + printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n orcli export tsv \"duplicates\"\n EOF\n" echo # :command.footer @@ -490,7 +490,7 @@ orcli_run_usage() { # :command.usage_examples printf "Examples:\n" printf " orcli run --interactive\n" - printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli info \"duplicates\"\n orcli export tsv \"duplicates\"\n EOF\n" + printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n orcli export tsv \"duplicates\"\n EOF\n" printf " orcli run --memory \"2000M\" --port \"3334\" << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" &\n orcli import csv \"https://git.io/fj5hF\" --projectName \"copy\" &\n wait\n echo \"finished import\"\n orcli export csv \"duplicates\" --output duplicates.csv &\n orcli export tsv \"duplicates\" --output duplicates.tsv &\n wait\n wc duplicates*\n EOF\n" printf " orcli run --interactive \"file1.sh\" \"file2.sh\" - << EOF\n echo \"finished in \$SECONDS seconds\"\n EOF\n" echo @@ -892,7 +892,22 @@ orcli_transform_command() { # parse one line/operation into array declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" # map operation names to command endpoints + # https://github.com/OpenRefine/OpenRefine/blob/master/main/webapp/modules/core/MOD-INF/controller.js com="${data[op]#core/}" + if [[ $com == "multivalued-cell-join" ]]; then com="join-multi-value-cells"; fi + if [[ $com == "multivalued-cell-split" ]]; then com="split-multi-value-cells"; fi + if [[ $com == "column-addition" ]]; then com="add-column"; fi + if [[ $com == "column-addition-by-fetching-urls" ]]; then com="add-column-by-fetching-urls"; fi + if [[ $com == "column-removal" ]]; then com="remove-column"; fi + if [[ $com == "column-rename" ]]; then com="rename-column"; fi + if [[ $com == "column-move" ]]; then com="move-column"; fi + if [[ $com == "column-split" ]]; then com="split-column"; fi + if [[ $com == "column-reorder" ]]; then com="reorder-columns"; fi + if [[ $com == "recon" ]]; then com="reconcile"; fi + if [[ $com == "extend-reconciled-data" ]]; then com="extend-data"; fi + if [[ $com == "row-star" ]]; then com="annotate-rows"; fi + if [[ $com == "row-flag" ]]; then com="annotate-rows"; fi + if [[ $com == "row-removal" ]]; then com="remove-rows"; fi if [[ $com == "row-reorder" ]]; then com="reorder-rows"; fi unset "data[op]" # rename engineConfig to engine @@ -907,9 +922,14 @@ orcli_transform_command() { done) # get project id and csrf token; post data to it's individual endpoint if response="$(curl -fs --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)")"; then - log "applied ${com} to ${args[project]}" "Response: $(jq '.historyEntry.description' <<< "$response")" + response_code="$(jq -r '.code' <<<"$response")" + if [[ $response_code == "ok" ]]; then + log "transformed ${args[project]} with ${com}" "Response: $(jq -r '.historyEntry.description' <<<"$response")" + else + error "transforming ${args[project]} with ${com} from ${files[$i]} failed!" "Response: $(jq -r '.message' <<<"$response")" + fi else - error "applying ${com} from ${files[$i]} to ${args[project]} failed!" + error "transforming ${args[project]} with ${com} from ${files[$i]} failed!" fi unset data done diff --git a/src/bashly.yml b/src/bashly.yml index 28f1cae..4f05d8b 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -23,7 +23,7 @@ examples: - |- orcli run << EOF orcli import csv "https://git.io/fj5hF" --projectName "duplicates" - orcli info "duplicates" + orcli transform "duplicates" "https://git.io/fj5ju" orcli export tsv "duplicates" EOF @@ -148,7 +148,7 @@ commands: - |- orcli run << EOF orcli import csv "https://git.io/fj5hF" --projectName "duplicates" - orcli info "duplicates" + orcli transform "duplicates" "https://git.io/fj5ju" orcli export tsv "duplicates" EOF - |- diff --git a/src/transform_command.sh b/src/transform_command.sh index 51aa9d6..fde2047 100644 --- a/src/transform_command.sh +++ b/src/transform_command.sh @@ -34,7 +34,22 @@ for i in "${!files[@]}"; do # parse one line/operation into array declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))" # map operation names to command endpoints + # https://github.com/OpenRefine/OpenRefine/blob/master/main/webapp/modules/core/MOD-INF/controller.js com="${data[op]#core/}" + if [[ $com == "multivalued-cell-join" ]]; then com="join-multi-value-cells"; fi + if [[ $com == "multivalued-cell-split" ]]; then com="split-multi-value-cells"; fi + if [[ $com == "column-addition" ]]; then com="add-column"; fi + if [[ $com == "column-addition-by-fetching-urls" ]]; then com="add-column-by-fetching-urls"; fi + if [[ $com == "column-removal" ]]; then com="remove-column"; fi + if [[ $com == "column-rename" ]]; then com="rename-column"; fi + if [[ $com == "column-move" ]]; then com="move-column"; fi + if [[ $com == "column-split" ]]; then com="split-column"; fi + if [[ $com == "column-reorder" ]]; then com="reorder-columns"; fi + if [[ $com == "recon" ]]; then com="reconcile"; fi + if [[ $com == "extend-reconciled-data" ]]; then com="extend-data"; fi + if [[ $com == "row-star" ]]; then com="annotate-rows"; fi + if [[ $com == "row-flag" ]]; then com="annotate-rows"; fi + if [[ $com == "row-removal" ]]; then com="remove-rows"; fi if [[ $com == "row-reorder" ]]; then com="reorder-rows"; fi unset "data[op]" # rename engineConfig to engine @@ -49,9 +64,14 @@ for i in "${!files[@]}"; do done) # get project id and csrf token; post data to it's individual endpoint if response="$(curl -fs --data "project=$(get_id "${args[project]}")" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)")"; then - log "applied ${com} to ${args[project]}" "Response: $(jq '.historyEntry.description' <<< "$response")" + response_code="$(jq -r '.code' <<<"$response")" + if [[ $response_code == "ok" ]]; then + log "transformed ${args[project]} with ${com}" "Response: $(jq -r '.historyEntry.description' <<<"$response")" + else + error "transforming ${args[project]} with ${com} from ${files[$i]} failed!" "Response: $(jq -r '.message' <<<"$response")" + fi else - error "applying ${com} from ${files[$i]} to ${args[project]} failed!" + error "transforming ${args[project]} with ${com} from ${files[$i]} failed!" fi unset data done