Merge pull request #114 from opencultureconsulting:28-import-json

28-import-json
This commit is contained in:
Felix Lohmeier 2023-10-28 23:37:30 +02:00 committed by GitHub
commit 206bbf60f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 1248 additions and 30 deletions

View File

@ -2,7 +2,7 @@
"image": "mcr.microsoft.com/devcontainers/universal:2",
"features": {
},
"postCreateCommand": "gem install --silent bashly && wget -q -O openrefine.tar.gz https://github.com/OpenRefine/OpenRefine/releases/download/3.7.6/openrefine-linux-3.7.6.tar.gz && tar --exclude 'licenses' --exclude 'LICENSE.txt' --exclude 'licenses.xml' --exclude 'README.md' -xzf openrefine.tar.gz --strip 1 && rm openrefine.tar.gz",
"postCreateCommand": "gem install --silent bashly && wget -q -O openrefine.tar.gz https://github.com/OpenRefine/OpenRefine/releases/download/3.7.6/openrefine-linux-3.7.6.tar.gz && tar --exclude 'licenses' --exclude 'LICENSE.txt' --exclude 'licenses.xml' --exclude 'README.md' -xzf openrefine.tar.gz --strip 1 && rm openrefine.tar.gz && ln -s \"${PWD}/orcli\" ~/.local/bin/",
"forwardPorts": [3333],
"portsAttributes": {
"3333": {

View File

@ -12,7 +12,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
* allows execution of arbitrary bash scripts
* interactive mode for playing around and debugging
* your existing OpenRefine data will not be touched
* import CSV, TSV, ~~line-based TXT, fixed-width TXT, JSON or XML~~
* import CSV, TSV, JSON, JSONL, ~~line-based TXT, fixed-width TXT or XML~~
* supports stdin, multiple files and URLs
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
* orcli calls specific endpoints for each operation to provide improved error handling and logging

View File

@ -7,6 +7,8 @@
- [export template](export_template.md)
- [export tsv](export_tsv.md)
- [import csv](import_csv.md)
- [import json](import_json.md)
- [import jsonl](import_jsonl.md)
- [import tsv](import_tsv.md)
- [info](info.md)
- [list](list.md)

68
help/import_json.md Normal file
View File

@ -0,0 +1,68 @@
# orcli import json
```
orcli import json - import JSON
Usage:
orcli import json [FILE...] [OPTIONS]
orcli import json --help | -h
Options:
--recordPath JSON
specify record path elements in JSON array
Default: [ "_" , "_" ]
--rename
rename columns after import to remove record path fragments
--guessCellValueTypes
attempt to parse cell text into numbers
--includeFileSources
add column with file source
--includeArchiveFileName
add column with archive file name
--limit LIMIT
load at most x row(s) of data
Default: -1
--storeEmptyStrings
preserve empty strings
--trimStrings
trim leading & trailing whitespace from strings
--projectName PROJECTNAME
set a name for the OpenRefine project
--projectTags PROJECTTAGS
set project tags (comma separated)
--quiet, -q
suppress log output, print errors only
--help, -h
Show this help
Arguments:
FILE...
Path to one or more files or URLs. When FILE is -, read standard input.
Default: -
Examples:
orcli import json "file"
orcli import json "file1" "file2"
orcli import json "https://example.com/file.json"
orcli import json "file" \
--recordPath '[ "_", "rows", "_" ]' \
--rename \
--storeEmptyStrings \
--trimStrings \
--projectName "duplicates" \
--projectTags "test,urgent"
```
code: [src/import_json_command.sh](../src/import_json_command.sh)

63
help/import_jsonl.md Normal file
View File

@ -0,0 +1,63 @@
# orcli import jsonl
```
orcli import jsonl - import JSON Lines / newline-delimited JSON
Usage:
orcli import jsonl [FILE...] [OPTIONS]
orcli import jsonl --help | -h
Options:
--rename
rename columns after import to remove record path fragments
--guessCellValueTypes
attempt to parse cell text into numbers
--includeFileSources
add column with file source
--includeArchiveFileName
add column with archive file name
--limit LIMIT
load at most x row(s) of data
Default: -1
--storeEmptyStrings
preserve empty strings
--trimStrings
trim leading & trailing whitespace from strings
--projectName PROJECTNAME
set a name for the OpenRefine project
--projectTags PROJECTTAGS
set project tags (comma separated)
--quiet, -q
suppress log output, print errors only
--help, -h
Show this help
Arguments:
FILE...
Path to one or more files or URLs. When FILE is -, read standard input.
Default: -
Examples:
orcli import jsonl "file"
orcli import jsonl "file1" "file2"
orcli import jsonl "https://example.com/file.json"
orcli import jsonl "file" \
--rename \
--storeEmptyStrings \
--trimStrings \
--projectName "duplicates" \
--projectTags "test,urgent"
```
code: [src/import_jsonl_command.sh](../src/import_jsonl_command.sh)

711
orcli
View File

@ -194,8 +194,10 @@ orcli_import_usage() {
echo
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s import character-separated values (CSV)\n" "csv"
printf " %s import tab-separated values (TSV)\n" "tsv"
printf " %s import character-separated values (CSV)\n" "csv "
printf " %s import tab-separated values (TSV)\n" "tsv "
printf " %s import JSON\n" "json "
printf " %s import JSON Lines / newline-delimited JSON\n" "jsonl"
echo
# :command.long_usage
@ -492,6 +494,208 @@ orcli_import_tsv_usage() {
fi
}
# :command.usage
orcli_import_json_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import json - import JSON\n"
echo
else
printf "orcli import json - import JSON\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import json [FILE...] [OPTIONS]\n"
printf " orcli import json --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--recordPath JSON"
printf " specify record path elements in JSON array\n"
printf " Default: [ \"_\" , \"_\" ]\n"
echo
# :flag.usage
printf " %s\n" "--rename"
printf " rename columns after import to remove record path fragments\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--storeEmptyStrings"
printf " preserve empty strings\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import json \"file\"\n"
printf " orcli import json \"file1\" \"file2\"\n"
printf " orcli import json \"https://example.com/file.json\"\n"
printf " orcli import json \"file\" \\\\\n --recordPath '[ \"_\", \"rows\", \"_\" ]' \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_import_jsonl_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import jsonl - import JSON Lines / newline-delimited JSON\n"
echo
else
printf "orcli import jsonl - import JSON Lines / newline-delimited JSON\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import jsonl [FILE...] [OPTIONS]\n"
printf " orcli import jsonl --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--rename"
printf " rename columns after import to remove record path fragments\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--storeEmptyStrings"
printf " preserve empty strings\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import jsonl \"file\"\n"
printf " orcli import jsonl \"file1\" \"file2\"\n"
printf " orcli import jsonl \"https://example.com/file.json\"\n"
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_list_usage() {
if [[ -n $long_usage ]]; then
@ -1236,6 +1440,17 @@ function post_import() {
else
log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}"
fi
# json / jsonl --rename
if [[ ${args[--rename]} ]]; then
csrf="$(get_csrf)"
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
for c in "${columns[@]}"; do
if ! curl -fs -o /dev/null --data project="$projectid" --data "oldColumnName=${c}" --data "newColumnName=${c##_ - }" "${OPENREFINE_URL}/command/core/rename-column${csrf}"; then
error "renaming columns in ${projectname} failed!"
fi
done
log "renamed columns in ${projectname}"
fi
}
# src/lib/send_completions.sh
@ -1278,10 +1493,18 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'completions\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import json\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
@ -1303,7 +1526,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv json jsonl tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'search\'*)'
@ -1440,10 +1663,6 @@ orcli_import_csv_command() {
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -1456,15 +1675,19 @@ orcli_import_csv_command() {
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
@ -1532,10 +1755,6 @@ orcli_import_tsv_command() {
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -1548,15 +1767,137 @@ orcli_import_tsv_command() {
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_import_json_command() {
# src/import_json_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_json_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": ${args[--recordPath]}"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_import_jsonl_command() {
# src/import_jsonl_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_jsonl_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": [\"_\"]"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
@ -2331,6 +2672,20 @@ orcli_import_parse_requirements() {
shift $#
;;
json)
action="json"
shift
orcli_import_json_parse_requirements "$@"
shift $#
;;
jsonl)
action="jsonl"
shift
orcli_import_jsonl_parse_requirements "$@"
shift $#
;;
# :command.command_fallback
"")
orcli_import_usage >&2
@ -2915,6 +3270,330 @@ orcli_import_tsv_parse_requirements() {
}
# :command.parse_requirements
orcli_import_json_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_import_json_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="import json"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--recordPath)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--recordPath']="$2"
shift
shift
else
printf "%s\n" "--recordPath requires an argument: --recordPath JSON" >&2
exit 1
fi
;;
# :flag.case
--rename)
# :flag.case_no_arg
args['--rename']=1
shift
;;
# :flag.case
--guessCellValueTypes)
# :flag.case_no_arg
args['--guessCellValueTypes']=1
shift
;;
# :flag.case
--includeFileSources)
# :flag.case_no_arg
args['--includeFileSources']=1
shift
;;
# :flag.case
--includeArchiveFileName)
# :flag.case_no_arg
args['--includeArchiveFileName']=1
shift
;;
# :flag.case
--limit)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--limit']="$2"
shift
shift
else
printf "%s\n" "--limit requires an argument: --limit LIMIT" >&2
exit 1
fi
;;
# :flag.case
--storeEmptyStrings)
# :flag.case_no_arg
args['--storeEmptyStrings']=1
shift
;;
# :flag.case
--trimStrings)
# :flag.case_no_arg
args['--trimStrings']=1
shift
;;
# :flag.case
--projectName)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectName']="$2"
shift
shift
else
printf "%s\n" "--projectName requires an argument: --projectName PROJECTNAME" >&2
exit 1
fi
;;
# :flag.case
--projectTags)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectTags']="$2"
shift
shift
else
printf "%s\n" "--projectTags requires an argument: --projectTags PROJECTTAGS" >&2
exit 1
fi
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_repeatable
if [[ -z ${args['file']+x} ]]; then
args['file']="\"$1\""
shift
else
args['file']="${args[file]} \"$1\""
shift
fi
;;
esac
done
# :command.default_assignments
[[ -n ${args['file']:-} ]] || args['file']="-"
[[ -n ${args['--recordPath']:-} ]] || args['--recordPath']="[ \"_\" , \"_\" ]"
[[ -n ${args['--limit']:-} ]] || args['--limit']="-1"
}
# :command.parse_requirements
orcli_import_jsonl_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_import_jsonl_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="import jsonl"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--rename)
# :flag.case_no_arg
args['--rename']=1
shift
;;
# :flag.case
--guessCellValueTypes)
# :flag.case_no_arg
args['--guessCellValueTypes']=1
shift
;;
# :flag.case
--includeFileSources)
# :flag.case_no_arg
args['--includeFileSources']=1
shift
;;
# :flag.case
--includeArchiveFileName)
# :flag.case_no_arg
args['--includeArchiveFileName']=1
shift
;;
# :flag.case
--limit)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--limit']="$2"
shift
shift
else
printf "%s\n" "--limit requires an argument: --limit LIMIT" >&2
exit 1
fi
;;
# :flag.case
--storeEmptyStrings)
# :flag.case_no_arg
args['--storeEmptyStrings']=1
shift
;;
# :flag.case
--trimStrings)
# :flag.case_no_arg
args['--trimStrings']=1
shift
;;
# :flag.case
--projectName)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectName']="$2"
shift
shift
else
printf "%s\n" "--projectName requires an argument: --projectName PROJECTNAME" >&2
exit 1
fi
;;
# :flag.case
--projectTags)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectTags']="$2"
shift
shift
else
printf "%s\n" "--projectTags requires an argument: --projectTags PROJECTTAGS" >&2
exit 1
fi
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_repeatable
if [[ -z ${args['file']+x} ]]; then
args['file']="\"$1\""
shift
else
args['file']="${args[file]} \"$1\""
shift
fi
;;
esac
done
# :command.default_assignments
[[ -n ${args['file']:-} ]] || args['file']="-"
[[ -n ${args['--limit']:-} ]] || args['--limit']="-1"
}
# :command.parse_requirements
orcli_list_parse_requirements() {
# :command.fixed_flags_filter
@ -3701,6 +4380,8 @@ run() {
"import") orcli_import_command ;;
"import csv") orcli_import_csv_command ;;
"import tsv") orcli_import_tsv_command ;;
"import json") orcli_import_json_command ;;
"import jsonl") orcli_import_jsonl_command ;;
"list") orcli_list_command ;;
"info") orcli_info_command ;;
"search") orcli_search_command ;;

View File

@ -195,6 +195,70 @@ commands:
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: json
help: import JSON
args:
- *file
flags:
- &recordPath
long: --recordPath
help: specify record path elements in JSON array
arg: json
default: "[ \\\"_\\\" , \\\"_\\\" ]"
- &rename
long: --rename
help: rename columns after import to remove record path fragments
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
- *limit
- &storeEmptyStrings
long: --storeEmptyStrings
help: preserve empty strings
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import json "file"
- orcli import json "file1" "file2"
- orcli import json "https://example.com/file.json"
- |-
orcli import json "file" \\\\
--recordPath '[ "_", "rows", "_" ]' \\\\
--rename \\\\
--storeEmptyStrings \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: jsonl
help: import JSON Lines / newline-delimited JSON
args:
- *file
flags:
- *rename
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
- *limit
- *storeEmptyStrings
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import jsonl "file"
- orcli import jsonl "file1" "file2"
- orcli import jsonl "https://example.com/file.json"
- |-
orcli import jsonl "file" \\\\
--rename \\\\
--storeEmptyStrings \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: list
help: list projects on OpenRefine server

View File

@ -54,10 +54,6 @@ if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -70,15 +66,19 @@ if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")

View File

@ -0,0 +1,53 @@
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_json_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": ${args[--recordPath]}"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"

View File

@ -0,0 +1,53 @@
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_jsonl_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": [\"_\"]"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"

View File

@ -54,10 +54,6 @@ if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -70,15 +66,19 @@ if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")

View File

@ -42,4 +42,15 @@ function post_import() {
else
log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}"
fi
# json / jsonl --rename
if [[ ${args[--rename]} ]]; then
csrf="$(get_csrf)"
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
for c in "${columns[@]}"; do
if ! curl -fs -o /dev/null --data project="$projectid" --data "oldColumnName=${c}" --data "newColumnName=${c##_ - }" "${OPENREFINE_URL}/command/core/rename-column${csrf}"; then
error "renaming columns in ${projectname} failed!"
fi
done
log "renamed columns in ${projectname}"
fi
}

View File

@ -38,10 +38,18 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'completions\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import json\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
@ -63,7 +71,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv json jsonl tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'search\'*)'

17
tests/data/example.json Normal file
View File

@ -0,0 +1,17 @@
[
{
"a": 1,
"b": 2,
"c": 3
},
{
"a": 0,
"b": 0,
"c": 0
},
{
"a": "$",
"b": "\\",
"c": "'"
}
]

3
tests/data/example.jsonl Normal file
View File

@ -0,0 +1,3 @@
{"a":1, "b":2, "c":3}
{"a":0, "b":0, "c":0}
{"a":"$", "b":"\\", "c":"'"}

View File

@ -0,0 +1,42 @@
#!/bin/bash
t="import-json-recordPath"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.json"
{
"rows":[
{
"a":1,
"b":2,
"c":3
},
{
"a":0,
"b":0,
"c":0
},
{
"a":"$",
"b":"\\",
"c":"'"
}
]
}
DATA
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
sed -i 's/a b c/_ - a _ - b _ - c/' "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}" --recordPath '["_", "rows", "_"]'
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,21 @@
#!/bin/bash
t="import-json-rename"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.json "${tmpdir}/${t}.json"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}" --rename
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,44 @@
#!/bin/bash
t="import-json-storeEmptyStrings"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.json"
[
{
"a": 1,
"b": 2,
"c": 3
},
{
"a": "",
"b": "",
"c": ""
},
{
"a": "$",
"b": "\\",
"c": "'"
}
]
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
_ - a _ - b _ - c
1 2 3
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}" --storeEmptyStrings
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,44 @@
#!/bin/bash
t="import-json-trimStrings"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.json"
[
{
"a": 1,
"b": 2,
"c": 3
},
{
"a": 0,
"b": " 0",
"c": "0 "
},
{
"a": "$",
"b": "\\",
"c": "'"
}
]
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
_ - a _ - b _ - c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}" --trimStrings
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

22
tests/import-json.sh Normal file
View File

@ -0,0 +1,22 @@
#!/bin/bash
t="import-json"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.json "${tmpdir}/${t}.json"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
sed -i 's/a b c/_ - a _ - b _ - c/' "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

22
tests/import-jsonl.sh Normal file
View File

@ -0,0 +1,22 @@
#!/bin/bash
t="import-jsonl"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.jsonl "${tmpdir}/${t}.jsonl"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
sed -i 's/a b c/_ - a _ - b _ - c/' "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import jsonl "${t}.jsonl" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"