first draft

This commit is contained in:
felixlohmeier 2022-11-03 21:07:08 +00:00
parent 600a06e7bd
commit 26feff650a
5 changed files with 238 additions and 7 deletions

View File

@ -73,7 +73,11 @@ Optional:
orcli import csv "https://git.io/fj5hF" --projectName "duplicates" orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
``` ```
3. Remove duplicates (coming soon) 3. Remove duplicates by applying an undo/redo JSON file
```sh
orcli transform "duplicates" "https://git.io/fj5hF"
```
4. Export data from OpenRefine project to tab-separated-values (TSV) file `duplicates.tsv` 4. Export data from OpenRefine project to tab-separated-values (TSV) file `duplicates.tsv`

176
orcli
View File

@ -38,6 +38,7 @@ orcli_usage() {
echo " import commands to create OpenRefine projects from files or URLs" echo " import commands to create OpenRefine projects from files or URLs"
echo " list list projects on OpenRefine server" echo " list list projects on OpenRefine server"
echo " info show OpenRefine project's metadata" echo " info show OpenRefine project's metadata"
echo " transform apply undo/redo JSON file(s) to an OpenRefine project"
echo " export commands to export data from OpenRefine projects to files" echo " export commands to export data from OpenRefine projects to files"
echo " run run tmp OpenRefine workspace and execute shell script(s)" echo " run run tmp OpenRefine workspace and execute shell script(s)"
echo echo
@ -74,6 +75,7 @@ orcli_usage() {
printf " orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n" printf " orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n"
printf " orcli list\n" printf " orcli list\n"
printf " orcli info \"duplicates\"\n" printf " orcli info \"duplicates\"\n"
printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n"
printf " orcli export tsv \"duplicates\"\n" printf " orcli export tsv \"duplicates\"\n"
printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli run --interactive\n" printf " orcli run --interactive\n"
@ -286,8 +288,59 @@ orcli_info_usage() {
# :command.usage_examples # :command.usage_examples
printf "Examples:\n" printf "Examples:\n"
printf " info \"duplicates\"\n" printf " orcli info \"duplicates\"\n"
printf " info 1234567890123\n" printf " orcli info 1234567890123\n"
echo
fi
}
# :command.usage
orcli_transform_usage() {
if [[ -n $long_usage ]]; then
printf "orcli transform - apply undo/redo JSON file(s) to an OpenRefine project\n"
echo
else
printf "orcli transform - apply undo/redo JSON file(s) to an OpenRefine project\n"
echo
fi
printf "Usage:\n"
printf " orcli transform PROJECT [FILE...]\n"
printf " orcli transform --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "Options:\n"
# :command.usage_fixed_flags
echo " --help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "Arguments:\n"
# :argument.usage
echo " PROJECT"
printf " project name or id\n"
echo
# :argument.usage
echo " FILE..."
printf " Path to one or more files or URLs containing OpenRefine's undo/redo\n operation history in JSON format. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "Examples:\n"
printf " orcli transform \"duplicates\" \"history.json\"\n"
printf " cat \"history.json\" | orcli transform \"duplicates\"\n"
printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n"
printf " orcli transform 1234567890123 \"history.json\"\n"
echo echo
fi fi
@ -693,6 +746,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --output -h")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --output -h")" -- "$cur" )'
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' \'transform\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import\'*)' echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv")" -- "$cur" )'
echo $' ;;' echo $' ;;'
@ -714,7 +771,7 @@ send_completions() {
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' *)' echo $' *)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run transform")" -- "$cur" )'
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' esac' echo $' esac'
@ -796,6 +853,47 @@ orcli_info_command() {
} }
# :command.function
orcli_transform_command() {
# src/transform_command.sh
# shellcheck shell=bash disable=SC2154
#get_id "${args[project]}"
# check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then
if ! read -u 0 -t 0; then
orcli_transform_usage
exit 1
fi
fi
# catch args, convert the space delimited string to an array
files=()
eval "files=(${args[file]})"
# create tmp directory
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' 0 2 3 15
# download files if name starts with http:// or https://
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "download of ${files[$i]} failed!"
fi
files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# support multiple files and stdin
readarray -t jsonlines < <(cat "${files[@]}" | jq --slurp --compact-output 'add | .[]')
for line in "${jsonlines[@]}"; do
declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))"
echo "${data[op]#core/}"
unset "data[op]"
unset "data[description]"
for K in "${!data[@]}"; do echo "$K" --- "${data[$K]}"; done
unset data
done
}
# :command.function # :command.function
orcli_export_tsv_command() { orcli_export_tsv_command() {
# src/export_tsv_command.sh # src/export_tsv_command.sh
@ -1010,6 +1108,13 @@ parse_requirements() {
shift $# shift $#
;; ;;
transform )
action="transform"
shift
orcli_transform_parse_requirements "$@"
shift $#
;;
export ) export )
action="export" action="export"
shift shift
@ -1348,6 +1453,63 @@ orcli_info_parse_requirements() {
} }
# :command.parse_requirements
orcli_transform_parse_requirements() {
# :command.fixed_flags_filter
case "${1:-}" in
--help | -h )
long_usage=yes
orcli_transform_usage
exit
;;
esac
# :command.command_filter
action="transform"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
-?* )
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
* )
# :command.parse_requirements_case
# :command.parse_requirements_case_repeatable
if [[ -z ${args[project]+x} ]]; then
args[project]=$1
shift
elif [[ -z ${args[file]+x} ]]; then
args[file]="\"$1\""
shift
else
args[file]="${args[file]} \"$1\""
shift
fi
;;
esac
done
# :command.required_args_filter
if [[ -z ${args[project]+x} ]]; then
printf "missing required argument: PROJECT\nusage: orcli transform PROJECT [FILE...]\n" >&2
exit 1
fi
# :command.default_assignments
[[ -n ${args[file]:-} ]] || args[file]="-"
}
# :command.parse_requirements # :command.parse_requirements
orcli_export_parse_requirements() { orcli_export_parse_requirements() {
# :command.fixed_flags_filter # :command.fixed_flags_filter
@ -1639,6 +1801,14 @@ run() {
orcli_info_command orcli_info_command
fi fi
elif [[ $action == "transform" ]]; then
if [[ ${args[--help]:-} ]]; then
long_usage=yes
orcli_transform_usage
else
orcli_transform_command
fi
elif [[ $action == "export" ]]; then elif [[ $action == "export" ]]; then
if [[ ${args[--help]:-} ]]; then if [[ ${args[--help]:-} ]]; then
long_usage=yes long_usage=yes

View File

@ -16,6 +16,7 @@ examples:
- orcli import csv "https://git.io/fj5hF" --projectName "duplicates" - orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
- orcli list - orcli list
- orcli info "duplicates" - orcli info "duplicates"
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli export tsv "duplicates" - orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv" - orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli run --interactive - orcli run --interactive
@ -83,8 +84,24 @@ commands:
help: project name or id help: project name or id
required: true required: true
examples: examples:
- info "duplicates" - orcli info "duplicates"
- info 1234567890123 - orcli info 1234567890123
- name: transform
help: apply undo/redo JSON file(s) to an OpenRefine project
args:
- name: project
help: project name or id
required: true
- name: file
help: Path to one or more files or URLs containing OpenRefine's undo/redo operation history in JSON format. When FILE is -, read standard input.
default: "-"
repeatable: true
examples:
- orcli transform "duplicates" "history.json"
- cat "history.json" | orcli transform "duplicates"
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli transform 1234567890123 "history.json"
- name: export - name: export
help: commands to export data from OpenRefine projects to files help: commands to export data from OpenRefine projects to files

View File

@ -42,6 +42,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --output -h")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --output -h")" -- "$cur" )'
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' \'transform\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import\'*)' echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv")" -- "$cur" )'
echo $' ;;' echo $' ;;'
@ -63,7 +67,7 @@ send_completions() {
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' *)' echo $' *)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet --version -h -q -v completions export import info list run transform")" -- "$cur" )'
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' esac' echo $' esac'

36
src/transform_command.sh Normal file
View File

@ -0,0 +1,36 @@
# shellcheck shell=bash disable=SC2154
#get_id "${args[project]}"
# check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then
if ! read -u 0 -t 0; then
orcli_transform_usage
exit 1
fi
fi
# catch args, convert the space delimited string to an array
files=()
eval "files=(${args[file]})"
# create tmp directory
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' 0 2 3 15
# download files if name starts with http:// or https://
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "download of ${files[$i]} failed!"
fi
files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# support multiple files and stdin
readarray -t jsonlines < <(cat "${files[@]}" | jq --slurp --compact-output 'add | .[]')
for line in "${jsonlines[@]}"; do
declare -A data="($(echo "$line" | jq -r 'to_entries | map("[\(.key)]=" + @sh "\(.value|tostring)") | .[]'))"
echo "${data[op]#core/}"
unset "data[op]"
unset "data[description]"
for K in "${!data[@]}"; do echo "$K" --- "${data[$K]}"; done
unset data
done