import json and jsonl: first draft

This commit is contained in:
Felix Lohmeier 2023-10-28 16:07:08 +00:00
parent aad08917b2
commit d19031a04a
16 changed files with 1111 additions and 29 deletions

View File

@ -12,7 +12,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
* allows execution of arbitrary bash scripts
* interactive mode for playing around and debugging
* your existing OpenRefine data will not be touched
* import CSV, TSV, ~~line-based TXT, fixed-width TXT, JSON or XML~~
* import CSV, TSV, JSON, JSONL, ~~line-based TXT, fixed-width TXT or XML~~
* supports stdin, multiple files and URLs
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
* orcli calls specific endpoints for each operation to provide improved error handling and logging

View File

@ -7,6 +7,8 @@
- [export template](export_template.md)
- [export tsv](export_tsv.md)
- [import csv](import_csv.md)
- [import json](import_json.md)
- [import jsonl](import_jsonl.md)
- [import tsv](import_tsv.md)
- [info](info.md)
- [list](list.md)

64
help/import_json.md Normal file
View File

@ -0,0 +1,64 @@
# orcli import json
```
orcli import json - import JSON
Usage:
orcli import json [FILE...] [OPTIONS]
orcli import json --help | -h
Options:
--recordPath JSON
specify record path elements in JSON array
Default: [ "_" , "_" ]
--guessCellValueTypes
attempt to parse cell text into numbers
--includeFileSources
add column with file source
--includeArchiveFileName
add column with archive file name
--limit LIMIT
load at most x row(s) of data
Default: -1
--storeEmptyStrings
preserve empty strings
--trimStrings
trim leading & trailing whitespace from strings
--projectName PROJECTNAME
set a name for the OpenRefine project
--projectTags PROJECTTAGS
set project tags (comma separated)
--quiet, -q
suppress log output, print errors only
--help, -h
Show this help
Arguments:
FILE...
Path to one or more files or URLs. When FILE is -, read standard input.
Default: -
Examples:
orcli import json "file"
orcli import json "file1" "file2"
orcli import json "https://example.com/file.json"
orcli import json "file" \
--recordPath '[ "_", "rows", "_" ]' \
--limit 100 \
--trimStrings \
--projectName "duplicates" \
--projectTags "test,urgent"
```
code: [src/import_json_command.sh](../src/import_json_command.sh)

64
help/import_jsonl.md Normal file
View File

@ -0,0 +1,64 @@
# orcli import jsonl
```
orcli import jsonl - import JSON Lines / newline-delimited JSON
Usage:
orcli import jsonl [FILE...] [OPTIONS]
orcli import jsonl --help | -h
Options:
--recordPath JSON
specify record path elements in JSON array
Default: [ "_" ]
--guessCellValueTypes
attempt to parse cell text into numbers
--includeFileSources
add column with file source
--includeArchiveFileName
add column with archive file name
--limit LIMIT
load at most x row(s) of data
Default: -1
--storeEmptyStrings
preserve empty strings
--trimStrings
trim leading & trailing whitespace from strings
--projectName PROJECTNAME
set a name for the OpenRefine project
--projectTags PROJECTTAGS
set project tags (comma separated)
--quiet, -q
suppress log output, print errors only
--help, -h
Show this help
Arguments:
FILE...
Path to one or more files or URLs. When FILE is -, read standard input.
Default: -
Examples:
orcli import jsonl "file"
orcli import jsonl "file1" "file2"
orcli import jsonl "https://example.com/file.json"
orcli import jsonl "file" \
--recordPath '[ "_" ]' \
--limit 100 \
--trimStrings \
--projectName "duplicates" \
--projectTags "test,urgent"
```
code: [src/import_jsonl_command.sh](../src/import_jsonl_command.sh)

696
orcli
View File

@ -194,8 +194,10 @@ orcli_import_usage() {
echo
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s import character-separated values (CSV)\n" "csv"
printf " %s import tab-separated values (TSV)\n" "tsv"
printf " %s import character-separated values (CSV)\n" "csv "
printf " %s import tab-separated values (TSV)\n" "tsv "
printf " %s import JSON\n" "json "
printf " %s import JSON Lines / newline-delimited JSON\n" "jsonl"
echo
# :command.long_usage
@ -492,6 +494,204 @@ orcli_import_tsv_usage() {
fi
}
# :command.usage
orcli_import_json_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import json - import JSON\n"
echo
else
printf "orcli import json - import JSON\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import json [FILE...] [OPTIONS]\n"
printf " orcli import json --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--recordPath JSON"
printf " specify record path elements in JSON array\n"
printf " Default: [ \"_\" , \"_\" ]\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--storeEmptyStrings"
printf " preserve empty strings\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import json \"file\"\n"
printf " orcli import json \"file1\" \"file2\"\n"
printf " orcli import json \"https://example.com/file.json\"\n"
printf " orcli import json \"file\" \\\\\n --recordPath '[ \"_\", \"rows\", \"_\" ]' \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_import_jsonl_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import jsonl - import JSON Lines / newline-delimited JSON\n"
echo
else
printf "orcli import jsonl - import JSON Lines / newline-delimited JSON\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import jsonl [FILE...] [OPTIONS]\n"
printf " orcli import jsonl --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--recordPath JSON"
printf " specify record path elements in JSON array\n"
printf " Default: [ \"_\" ]\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--storeEmptyStrings"
printf " preserve empty strings\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import jsonl \"file\"\n"
printf " orcli import jsonl \"file1\" \"file2\"\n"
printf " orcli import jsonl \"https://example.com/file.json\"\n"
printf " orcli import jsonl \"file\" \\\\\n --recordPath '[ \"_\" ]' \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_list_usage() {
if [[ -n $long_usage ]]; then
@ -1278,10 +1478,18 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'completions\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import json\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
@ -1303,7 +1511,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv json jsonl tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'search\'*)'
@ -1440,10 +1648,6 @@ orcli_import_csv_command() {
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -1456,15 +1660,19 @@ orcli_import_csv_command() {
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
@ -1532,10 +1740,6 @@ orcli_import_tsv_command() {
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -1548,15 +1752,137 @@ orcli_import_tsv_command() {
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_import_json_command() {
# src/import_json_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_json_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": ${args[--recordPath]}"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_import_jsonl_command() {
# src/import_jsonl_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_jsonl_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": ${args[--recordPath]}"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
@ -2331,6 +2657,20 @@ orcli_import_parse_requirements() {
shift $#
;;
json)
action="json"
shift
orcli_import_json_parse_requirements "$@"
shift $#
;;
jsonl)
action="jsonl"
shift
orcli_import_jsonl_parse_requirements "$@"
shift $#
;;
# :command.command_fallback
"")
orcli_import_usage >&2
@ -2915,6 +3255,330 @@ orcli_import_tsv_parse_requirements() {
}
# :command.parse_requirements
orcli_import_json_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_import_json_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="import json"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--recordPath)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--recordPath']="$2"
shift
shift
else
printf "%s\n" "--recordPath requires an argument: --recordPath JSON" >&2
exit 1
fi
;;
# :flag.case
--guessCellValueTypes)
# :flag.case_no_arg
args['--guessCellValueTypes']=1
shift
;;
# :flag.case
--includeFileSources)
# :flag.case_no_arg
args['--includeFileSources']=1
shift
;;
# :flag.case
--includeArchiveFileName)
# :flag.case_no_arg
args['--includeArchiveFileName']=1
shift
;;
# :flag.case
--limit)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--limit']="$2"
shift
shift
else
printf "%s\n" "--limit requires an argument: --limit LIMIT" >&2
exit 1
fi
;;
# :flag.case
--storeEmptyStrings)
# :flag.case_no_arg
args['--storeEmptyStrings']=1
shift
;;
# :flag.case
--trimStrings)
# :flag.case_no_arg
args['--trimStrings']=1
shift
;;
# :flag.case
--projectName)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectName']="$2"
shift
shift
else
printf "%s\n" "--projectName requires an argument: --projectName PROJECTNAME" >&2
exit 1
fi
;;
# :flag.case
--projectTags)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectTags']="$2"
shift
shift
else
printf "%s\n" "--projectTags requires an argument: --projectTags PROJECTTAGS" >&2
exit 1
fi
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_repeatable
if [[ -z ${args['file']+x} ]]; then
args['file']="\"$1\""
shift
else
args['file']="${args[file]} \"$1\""
shift
fi
;;
esac
done
# :command.default_assignments
[[ -n ${args['file']:-} ]] || args['file']="-"
[[ -n ${args['--recordPath']:-} ]] || args['--recordPath']="[ \"_\" , \"_\" ]"
[[ -n ${args['--limit']:-} ]] || args['--limit']="-1"
}
# :command.parse_requirements
orcli_import_jsonl_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_import_jsonl_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="import jsonl"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--recordPath)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--recordPath']="$2"
shift
shift
else
printf "%s\n" "--recordPath requires an argument: --recordPath JSON" >&2
exit 1
fi
;;
# :flag.case
--guessCellValueTypes)
# :flag.case_no_arg
args['--guessCellValueTypes']=1
shift
;;
# :flag.case
--includeFileSources)
# :flag.case_no_arg
args['--includeFileSources']=1
shift
;;
# :flag.case
--includeArchiveFileName)
# :flag.case_no_arg
args['--includeArchiveFileName']=1
shift
;;
# :flag.case
--limit)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--limit']="$2"
shift
shift
else
printf "%s\n" "--limit requires an argument: --limit LIMIT" >&2
exit 1
fi
;;
# :flag.case
--storeEmptyStrings)
# :flag.case_no_arg
args['--storeEmptyStrings']=1
shift
;;
# :flag.case
--trimStrings)
# :flag.case_no_arg
args['--trimStrings']=1
shift
;;
# :flag.case
--projectName)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectName']="$2"
shift
shift
else
printf "%s\n" "--projectName requires an argument: --projectName PROJECTNAME" >&2
exit 1
fi
;;
# :flag.case
--projectTags)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectTags']="$2"
shift
shift
else
printf "%s\n" "--projectTags requires an argument: --projectTags PROJECTTAGS" >&2
exit 1
fi
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_repeatable
if [[ -z ${args['file']+x} ]]; then
args['file']="\"$1\""
shift
else
args['file']="${args[file]} \"$1\""
shift
fi
;;
esac
done
# :command.default_assignments
[[ -n ${args['file']:-} ]] || args['file']="-"
[[ -n ${args['--recordPath']:-} ]] || args['--recordPath']="[ \"_\" ]"
[[ -n ${args['--limit']:-} ]] || args['--limit']="-1"
}
# :command.parse_requirements
orcli_list_parse_requirements() {
# :command.fixed_flags_filter
@ -3701,6 +4365,8 @@ run() {
"import") orcli_import_command ;;
"import csv") orcli_import_csv_command ;;
"import tsv") orcli_import_tsv_command ;;
"import json") orcli_import_json_command ;;
"import jsonl") orcli_import_jsonl_command ;;
"list") orcli_list_command ;;
"info") orcli_info_command ;;
"search") orcli_search_command ;;

View File

@ -195,6 +195,72 @@ commands:
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: json
help: import JSON
args:
- *file
flags:
- &recordPath
long: --recordPath
help: specify record path elements in JSON array
arg: json
default: "[ \\\"_\\\" , \\\"_\\\" ]"
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
- *limit
- &storeEmptyStrings
long: --storeEmptyStrings
help: preserve empty strings
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import json "file"
- orcli import json "file1" "file2"
- orcli import json "https://example.com/file.json"
- |-
orcli import json "file" \\\\
--recordPath '[ "_", "rows", "_" ]' \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: jsonl
help: import JSON Lines / newline-delimited JSON
args:
- *file
flags:
- &recordPath
long: --recordPath
help: specify record path elements in JSON array
arg: json
default: "[ \\\"_\\\" ]"
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
- *limit
- &storeEmptyStrings
long: --storeEmptyStrings
help: preserve empty strings
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import jsonl "file"
- orcli import jsonl "file1" "file2"
- orcli import jsonl "https://example.com/file.json"
- |-
orcli import jsonl "file" \\\\
--recordPath '[ "_" ]' \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: list
help: list projects on OpenRefine server

View File

@ -54,10 +54,6 @@ if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -70,15 +66,19 @@ if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")

View File

@ -0,0 +1,53 @@
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_json_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": ${args[--recordPath]}"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"

View File

@ -0,0 +1,53 @@
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_jsonl_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": ${args[--recordPath]}"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"

View File

@ -54,10 +54,6 @@ if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
@ -70,15 +66,19 @@ if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
if [[ ${args[--projectName]} ]]; then
options+=', '
options+='"trimStrings": true'
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")

View File

@ -38,10 +38,18 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'completions\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import json\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
@ -63,7 +71,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv json jsonl tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'search\'*)'

17
tests/data/example.json Normal file
View File

@ -0,0 +1,17 @@
[
{
"a": 1,
"b": 2,
"c": 3
},
{
"a": 0,
"b": 0,
"c": 0
},
{
"a": "$",
"b": "\\",
"c": "'"
}
]

3
tests/data/example.jsonl Normal file
View File

@ -0,0 +1,3 @@
{"a":1, "b":2, "c":3}
{"a":0, "b":0, "c":0}
{"a":"$", "b":"\\", "c":"'"}

View File

@ -0,0 +1,42 @@
#!/bin/bash
t="import-json-recordPath"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.json"
{
"rows":[
{
"a":1,
"b":2,
"c":3
},
{
"a":0,
"b":0,
"c":0
},
{
"a":"$",
"b":"\\",
"c":"'"
}
]
}
DATA
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
sed -i 's/a b c/_ - a _ - b _ - c/' "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}" --recordPath '["_", "rows", "_"]'
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

22
tests/import-json.sh Normal file
View File

@ -0,0 +1,22 @@
#!/bin/bash
t="import-json"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.json "${tmpdir}/${t}.json"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
sed -i 's/a b c/_ - a _ - b _ - c/' "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

22
tests/import-jsonl.sh Normal file
View File

@ -0,0 +1,22 @@
#!/bin/bash
t="import-jsonl"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.jsonl "${tmpdir}/${t}.jsonl"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
sed -i 's/a b c/_ - a _ - b _ - c/' "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import jsonl "${t}.jsonl" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"