export jsonl first draft
This commit is contained in:
parent
206bbf60f6
commit
907cc531ea
|
@ -17,7 +17,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
|
|||
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
|
||||
* orcli calls specific endpoints for each operation to provide improved error handling and logging
|
||||
* supports stdin, multiple files and URLs
|
||||
* export to TSV, ~~CSV, HTML, XLS, XLSX, ODS~~
|
||||
* export to TSV, JSONL, ~~CSV, HTML, XLS, XLSX, ODS~~
|
||||
* [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML
|
||||
|
||||
## Requirements
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
- [completions](completions.md)
|
||||
- [delete](delete.md)
|
||||
- [export jsonl](export_jsonl.md)
|
||||
- [export template](export_template.md)
|
||||
- [export tsv](export_tsv.md)
|
||||
- [import csv](import_csv.md)
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
# orcli export jsonl
|
||||
|
||||
```
|
||||
orcli export jsonl - export JSON Lines / newline-delimited JSON
|
||||
|
||||
Usage:
|
||||
orcli export jsonl PROJECT [OPTIONS]
|
||||
orcli export jsonl --help | -h
|
||||
|
||||
Options:
|
||||
--mode MODE
|
||||
specify if project contains multi-row records
|
||||
Allowed: rows, records
|
||||
Default: rows
|
||||
|
||||
--separator SEPARATOR
|
||||
character(s) that separates multiple values in one cell (row mode only)
|
||||
|
||||
--facets FACETS
|
||||
filter result set by providing an OpenRefine facets config in json
|
||||
Default: []
|
||||
|
||||
--output FILE
|
||||
Write to file instead of stdout
|
||||
|
||||
--encoding ENCODING
|
||||
set character encoding
|
||||
Default: UTF-8
|
||||
|
||||
--quiet, -q
|
||||
suppress log output, print errors only
|
||||
|
||||
--help, -h
|
||||
Show this help
|
||||
|
||||
Arguments:
|
||||
PROJECT
|
||||
project name or id
|
||||
|
||||
Examples:
|
||||
orcli export jsonl "duplicates"
|
||||
orcli export jsonl "duplicates" --output "duplicates.jsonl"
|
||||
orcli export jsonl "duplicates" --separator ' '
|
||||
orcli export jsonl "duplicates" --mode records
|
||||
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName":
|
||||
"name", "mode": "regex", "caseSensitive": false, "invert": false, "query":
|
||||
"^Ben" } ]'
|
||||
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression":
|
||||
"grel:filter([\"gender\",\"purchase\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0",
|
||||
"columnName": "", "selection": [{"v": {"v": true}}] }]'
|
||||
|
||||
```
|
||||
|
||||
code: [src/export_jsonl_command.sh](../src/export_jsonl_command.sh)
|
|
@ -18,7 +18,7 @@ Options:
|
|||
insert character(s) at the end of the file
|
||||
|
||||
--mode MODE
|
||||
specify if template shall be applied to each row or record
|
||||
specify if project contains multi-row records
|
||||
Allowed: rows, records
|
||||
Default: rows
|
||||
|
||||
|
|
|
@ -8,6 +8,11 @@ Usage:
|
|||
orcli export tsv --help | -h
|
||||
|
||||
Options:
|
||||
--mode MODE
|
||||
specify if project contains multi-row records
|
||||
Allowed: rows, records
|
||||
Default: rows
|
||||
|
||||
--facets FACETS
|
||||
filter result set by providing an OpenRefine facets config in json
|
||||
Default: []
|
||||
|
|
|
@ -51,6 +51,7 @@ Examples:
|
|||
orcli import jsonl "file"
|
||||
orcli import jsonl "file1" "file2"
|
||||
orcli import jsonl "https://example.com/file.json"
|
||||
orcli import jsonl --rename <(orcli export jsonl "duplicates")
|
||||
orcli import jsonl "file" \
|
||||
--rename \
|
||||
--storeEmptyStrings \
|
||||
|
|
377
orcli
377
orcli
|
@ -690,6 +690,7 @@ orcli_import_jsonl_usage() {
|
|||
printf " orcli import jsonl \"file\"\n"
|
||||
printf " orcli import jsonl \"file1\" \"file2\"\n"
|
||||
printf " orcli import jsonl \"https://example.com/file.json\"\n"
|
||||
printf " orcli import jsonl --rename <(orcli export jsonl \"duplicates\")\n"
|
||||
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
|
||||
echo
|
||||
|
||||
|
@ -934,6 +935,7 @@ orcli_export_usage() {
|
|||
echo
|
||||
# :command.usage_commands
|
||||
printf "%s\n" "Commands:"
|
||||
printf " %s export JSON Lines / newline-delimited JSON\n" "jsonl "
|
||||
printf " %s export tab-separated values (TSV)\n" "tsv "
|
||||
printf " %s export to any text format by providing your own GREL template\n" "template"
|
||||
echo
|
||||
|
@ -950,6 +952,88 @@ orcli_export_usage() {
|
|||
fi
|
||||
}
|
||||
|
||||
# :command.usage
|
||||
orcli_export_jsonl_usage() {
|
||||
if [[ -n $long_usage ]]; then
|
||||
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
|
||||
echo
|
||||
|
||||
else
|
||||
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
|
||||
echo
|
||||
|
||||
fi
|
||||
|
||||
printf "%s\n" "Usage:"
|
||||
printf " orcli export jsonl PROJECT [OPTIONS]\n"
|
||||
printf " orcli export jsonl --help | -h\n"
|
||||
echo
|
||||
|
||||
# :command.long_usage
|
||||
if [[ -n $long_usage ]]; then
|
||||
printf "%s\n" "Options:"
|
||||
|
||||
# :command.usage_flags
|
||||
# :flag.usage
|
||||
printf " %s\n" "--mode MODE"
|
||||
printf " specify if project contains multi-row records\n"
|
||||
printf " Allowed: rows, records\n"
|
||||
printf " Default: rows\n"
|
||||
echo
|
||||
|
||||
# :flag.usage
|
||||
printf " %s\n" "--separator SEPARATOR"
|
||||
printf " character(s) that separates multiple values in one cell (row mode only)\n"
|
||||
echo
|
||||
|
||||
# :flag.usage
|
||||
printf " %s\n" "--facets FACETS"
|
||||
printf " filter result set by providing an OpenRefine facets config in json\n"
|
||||
printf " Default: []\n"
|
||||
echo
|
||||
|
||||
# :flag.usage
|
||||
printf " %s\n" "--output FILE"
|
||||
printf " Write to file instead of stdout\n"
|
||||
echo
|
||||
|
||||
# :flag.usage
|
||||
printf " %s\n" "--encoding ENCODING"
|
||||
printf " set character encoding\n"
|
||||
printf " Default: UTF-8\n"
|
||||
echo
|
||||
|
||||
# :flag.usage
|
||||
printf " %s\n" "--quiet, -q"
|
||||
printf " suppress log output, print errors only\n"
|
||||
echo
|
||||
|
||||
# :command.usage_fixed_flags
|
||||
printf " %s\n" "--help, -h"
|
||||
printf " Show this help\n"
|
||||
echo
|
||||
|
||||
# :command.usage_args
|
||||
printf "%s\n" "Arguments:"
|
||||
|
||||
# :argument.usage
|
||||
printf " %s\n" "PROJECT"
|
||||
printf " project name or id\n"
|
||||
echo
|
||||
|
||||
# :command.usage_examples
|
||||
printf "%s\n" "Examples:"
|
||||
printf " orcli export jsonl \"duplicates\"\n"
|
||||
printf " orcli export jsonl \"duplicates\" --output \"duplicates.jsonl\"\n"
|
||||
printf " orcli export jsonl \"duplicates\" --separator ' '\n"
|
||||
printf " orcli export jsonl \"duplicates\" --mode records\n"
|
||||
printf " orcli export jsonl \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
|
||||
printf " orcli export jsonl \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
|
||||
echo
|
||||
|
||||
fi
|
||||
}
|
||||
|
||||
# :command.usage
|
||||
orcli_export_tsv_usage() {
|
||||
if [[ -n $long_usage ]]; then
|
||||
|
@ -972,6 +1056,13 @@ orcli_export_tsv_usage() {
|
|||
printf "%s\n" "Options:"
|
||||
|
||||
# :command.usage_flags
|
||||
# :flag.usage
|
||||
printf " %s\n" "--mode MODE"
|
||||
printf " specify if project contains multi-row records\n"
|
||||
printf " Allowed: rows, records\n"
|
||||
printf " Default: rows\n"
|
||||
echo
|
||||
|
||||
# :flag.usage
|
||||
printf " %s\n" "--facets FACETS"
|
||||
printf " filter result set by providing an OpenRefine facets config in json\n"
|
||||
|
@ -1058,7 +1149,7 @@ orcli_export_template_usage() {
|
|||
|
||||
# :flag.usage
|
||||
printf " %s\n" "--mode MODE"
|
||||
printf " specify if template shall be applied to each row or record\n"
|
||||
printf " specify if project contains multi-row records\n"
|
||||
printf " Allowed: rows, records\n"
|
||||
printf " Default: rows\n"
|
||||
echo
|
||||
|
@ -1489,6 +1580,14 @@ send_completions() {
|
|||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export jsonl\'*\'--mode\')'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export tsv\'*\'--mode\')'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export template\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
|
@ -1497,6 +1596,10 @@ send_completions() {
|
|||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export jsonl\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'completions\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
|
@ -1514,7 +1617,7 @@ send_completions() {
|
|||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export tsv\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'transform\'*)'
|
||||
|
@ -1534,7 +1637,7 @@ send_completions() {
|
|||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'list\'*)'
|
||||
|
@ -2172,6 +2275,75 @@ orcli_transform_command() {
|
|||
|
||||
}
|
||||
|
||||
# :command.function
|
||||
orcli_export_jsonl_command() {
|
||||
# src/export_jsonl_command.sh
|
||||
# shellcheck shell=bash disable=SC2154 disable=SC2155
|
||||
projectid="$(get_id "${args[project]}")"
|
||||
|
||||
# get columns that contain multiple values
|
||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||
if [[ ${args[--separator]} ]]; then
|
||||
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,cells[cn].value.contains(\"'
|
||||
engine+="${args[--separator]}"
|
||||
engine+='\"))","selection":[]}],"mode":"row-based"}'
|
||||
fi
|
||||
if [[ ${args[--mode]} == "records" ]]; then
|
||||
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,row.record.cells[cn].value.length()>1)","selection":[]}],"mode":"row-based"}'
|
||||
fi
|
||||
readarray -t columns_mv < <(curl -fs --data project="$projectid" --data "engine=${engine}" "${OPENREFINE_URL}/command/core/compute-facets" | jq -r '.facets[].choices[].v.v')
|
||||
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
|
||||
readarray -t columns_mix < <(for i in "${columns[@]}"; do
|
||||
skip=
|
||||
for j in "${columns_mv[@]}"; do
|
||||
if [[ "$i" == "$j" ]]; then
|
||||
echo "\"$j⊌\"" # add special character that is used in template below
|
||||
skip=1; break
|
||||
fi
|
||||
done
|
||||
if [[ -z $skip ]]; then
|
||||
echo "\"$i\""
|
||||
fi
|
||||
done)
|
||||
multivalued=$(IFS=, ; echo "[${columns_mix[*]}]")
|
||||
fi
|
||||
|
||||
# set template
|
||||
template='{ {{'
|
||||
template+='forEach('
|
||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||
template+="$multivalued"
|
||||
else
|
||||
template+='row.columnNames'
|
||||
fi
|
||||
template+=', cn, forNonBlank('
|
||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||
template+='cells[cn.chomp("⊌")].value, v, if(cn.endsWith("⊌"), "\"" + cn.chomp("⊌") + "\": " +'
|
||||
if [[ ${args[--separator]} ]]; then
|
||||
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||
fi
|
||||
if [[ ${args[--mode]} == "records" ]]; then
|
||||
template+='row.record.cells[cn].jsonize()'
|
||||
fi
|
||||
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||
else
|
||||
template+='cells[cn].value, v, "\"" + cn + "\": " + v.jsonize()'
|
||||
fi
|
||||
template+=', null)'
|
||||
template+=').join(", ")'
|
||||
template+='}} }'
|
||||
template+='{{ "\n" }}'
|
||||
|
||||
# assemble specific post data
|
||||
data+=("project=${projectid}")
|
||||
data+=("format=template")
|
||||
data+=("template=${template}")
|
||||
|
||||
# call post_export function to post data and validate results
|
||||
post_export "${data[@]}"
|
||||
|
||||
}
|
||||
|
||||
# :command.function
|
||||
orcli_export_tsv_command() {
|
||||
# src/export_tsv_command.sh
|
||||
|
@ -3906,6 +4078,13 @@ orcli_export_parse_requirements() {
|
|||
case $action in
|
||||
-*) ;;
|
||||
|
||||
jsonl)
|
||||
action="jsonl"
|
||||
shift
|
||||
orcli_export_jsonl_parse_requirements "$@"
|
||||
shift $#
|
||||
;;
|
||||
|
||||
tsv)
|
||||
action="tsv"
|
||||
shift
|
||||
|
@ -3956,6 +4135,165 @@ orcli_export_parse_requirements() {
|
|||
|
||||
}
|
||||
|
||||
# :command.parse_requirements
|
||||
orcli_export_jsonl_parse_requirements() {
|
||||
# :command.fixed_flags_filter
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "${1:-}" in
|
||||
--help | -h)
|
||||
long_usage=yes
|
||||
orcli_export_jsonl_usage
|
||||
exit
|
||||
;;
|
||||
|
||||
*)
|
||||
break
|
||||
;;
|
||||
|
||||
esac
|
||||
done
|
||||
|
||||
# :command.command_filter
|
||||
action="export jsonl"
|
||||
|
||||
# :command.parse_requirements_while
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case "$key" in
|
||||
# :flag.case
|
||||
--mode)
|
||||
# :flag.conflicts
|
||||
if [[ -n "${args['--separator']:-}" ]]; then
|
||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# :flag.case_arg
|
||||
if [[ -n ${2+x} ]]; then
|
||||
|
||||
args['--mode']="$2"
|
||||
shift
|
||||
shift
|
||||
else
|
||||
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
# :flag.case
|
||||
--separator)
|
||||
# :flag.conflicts
|
||||
if [[ -n "${args['--mode']:-}" ]]; then
|
||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--mode" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# :flag.case_arg
|
||||
if [[ -n ${2+x} ]]; then
|
||||
|
||||
args['--separator']="$2"
|
||||
shift
|
||||
shift
|
||||
else
|
||||
printf "%s\n" "--separator requires an argument: --separator SEPARATOR" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
# :flag.case
|
||||
--facets)
|
||||
|
||||
# :flag.case_arg
|
||||
if [[ -n ${2+x} ]]; then
|
||||
|
||||
args['--facets']="$2"
|
||||
shift
|
||||
shift
|
||||
else
|
||||
printf "%s\n" "--facets requires an argument: --facets FACETS" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
# :flag.case
|
||||
--output)
|
||||
|
||||
# :flag.case_arg
|
||||
if [[ -n ${2+x} ]]; then
|
||||
|
||||
args['--output']="$2"
|
||||
shift
|
||||
shift
|
||||
else
|
||||
printf "%s\n" "--output requires an argument: --output FILE" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
# :flag.case
|
||||
--encoding)
|
||||
|
||||
# :flag.case_arg
|
||||
if [[ -n ${2+x} ]]; then
|
||||
|
||||
args['--encoding']="$2"
|
||||
shift
|
||||
shift
|
||||
else
|
||||
printf "%s\n" "--encoding requires an argument: --encoding ENCODING" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
# :flag.case
|
||||
--quiet | -q)
|
||||
|
||||
# :flag.case_no_arg
|
||||
args['--quiet']=1
|
||||
shift
|
||||
;;
|
||||
|
||||
-?*)
|
||||
printf "invalid option: %s\n" "$key" >&2
|
||||
exit 1
|
||||
;;
|
||||
|
||||
*)
|
||||
# :command.parse_requirements_case
|
||||
# :command.parse_requirements_case_simple
|
||||
if [[ -z ${args['project']+x} ]]; then
|
||||
|
||||
args['project']=$1
|
||||
shift
|
||||
else
|
||||
printf "invalid argument: %s\n" "$key" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
;;
|
||||
|
||||
esac
|
||||
done
|
||||
|
||||
# :command.required_args_filter
|
||||
if [[ -z ${args['project']+x} ]]; then
|
||||
printf "missing required argument: PROJECT\nusage: orcli export jsonl PROJECT [OPTIONS]\n" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# :command.default_assignments
|
||||
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
|
||||
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
||||
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
||||
|
||||
# :command.whitelist_filter
|
||||
if [[ ${args['--mode']} ]] && [[ ! ${args['--mode']} =~ ^(rows|records)$ ]]; then
|
||||
printf "%s\n" "--mode must be one of: rows, records" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# :command.parse_requirements
|
||||
orcli_export_tsv_parse_requirements() {
|
||||
# :command.fixed_flags_filter
|
||||
|
@ -3981,6 +4319,26 @@ orcli_export_tsv_parse_requirements() {
|
|||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case "$key" in
|
||||
# :flag.case
|
||||
--mode)
|
||||
# :flag.conflicts
|
||||
if [[ -n "${args['--separator']:-}" ]]; then
|
||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# :flag.case_arg
|
||||
if [[ -n ${2+x} ]]; then
|
||||
|
||||
args['--mode']="$2"
|
||||
shift
|
||||
shift
|
||||
else
|
||||
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
# :flag.case
|
||||
--facets)
|
||||
|
||||
|
@ -4063,9 +4421,16 @@ orcli_export_tsv_parse_requirements() {
|
|||
fi
|
||||
|
||||
# :command.default_assignments
|
||||
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
|
||||
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
||||
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
||||
|
||||
# :command.whitelist_filter
|
||||
if [[ ${args['--mode']} ]] && [[ ! ${args['--mode']} =~ ^(rows|records)$ ]]; then
|
||||
printf "%s\n" "--mode must be one of: rows, records" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
# :command.parse_requirements
|
||||
|
@ -4140,6 +4505,11 @@ orcli_export_template_parse_requirements() {
|
|||
|
||||
# :flag.case
|
||||
--mode)
|
||||
# :flag.conflicts
|
||||
if [[ -n "${args['--separator']:-}" ]]; then
|
||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# :flag.case_arg
|
||||
if [[ -n ${2+x} ]]; then
|
||||
|
@ -4388,6 +4758,7 @@ run() {
|
|||
"test") orcli_test_command ;;
|
||||
"transform") orcli_transform_command ;;
|
||||
"export") orcli_export_command ;;
|
||||
"export jsonl") orcli_export_jsonl_command ;;
|
||||
"export tsv") orcli_export_tsv_command ;;
|
||||
"export template") orcli_export_template_command ;;
|
||||
"run") orcli_run_command ;;
|
||||
|
|
|
@ -251,6 +251,7 @@ commands:
|
|||
- orcli import jsonl "file"
|
||||
- orcli import jsonl "file1" "file2"
|
||||
- orcli import jsonl "https://example.com/file.json"
|
||||
- orcli import jsonl --rename <(orcli export jsonl "duplicates")
|
||||
- |-
|
||||
orcli import jsonl "file" \\\\
|
||||
--rename \\\\
|
||||
|
@ -316,11 +317,22 @@ commands:
|
|||
help: commands to export data from OpenRefine projects to files
|
||||
|
||||
commands:
|
||||
- name: tsv
|
||||
help: export tab-separated values (TSV)
|
||||
- name: jsonl
|
||||
help: export JSON Lines / newline-delimited JSON
|
||||
args:
|
||||
- *project
|
||||
flags:
|
||||
- &mode
|
||||
long: --mode
|
||||
help: specify if project contains multi-row records
|
||||
arg: mode
|
||||
allowed: [rows, records]
|
||||
default: "rows"
|
||||
conflicts: [--separator]
|
||||
- long: --separator
|
||||
help: character(s) that separates multiple values in one cell (row mode only)
|
||||
arg: separator
|
||||
conflicts: [--mode]
|
||||
- &facets
|
||||
long: --facets
|
||||
help: filter result set by providing an OpenRefine facets config in json
|
||||
|
@ -336,6 +348,25 @@ commands:
|
|||
arg: encoding
|
||||
default: "UTF-8"
|
||||
- *quiet
|
||||
examples:
|
||||
- orcli export jsonl "duplicates"
|
||||
- orcli export jsonl "duplicates" --output "duplicates.jsonl"
|
||||
- orcli export jsonl "duplicates" --separator ' '
|
||||
- orcli export jsonl "duplicates" --mode records
|
||||
- |-
|
||||
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
|
||||
- |-
|
||||
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
|
||||
- name: tsv
|
||||
help: export tab-separated values (TSV)
|
||||
args:
|
||||
- *project
|
||||
flags:
|
||||
- *mode
|
||||
- *facets
|
||||
- *output
|
||||
- *encoding_export
|
||||
- *quiet
|
||||
examples:
|
||||
- orcli export tsv "duplicates"
|
||||
- orcli export tsv "duplicates" --output "duplicates.tsv"
|
||||
|
@ -362,11 +393,7 @@ commands:
|
|||
- long: --suffix
|
||||
help: insert character(s) at the end of the file
|
||||
arg: suffix
|
||||
- long: --mode
|
||||
help: specify if template shall be applied to each row or record
|
||||
arg: mode
|
||||
allowed: [rows, records]
|
||||
default: "rows"
|
||||
- *mode
|
||||
- *facets
|
||||
- *output
|
||||
- *encoding_export
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
# shellcheck shell=bash disable=SC2154 disable=SC2155
|
||||
projectid="$(get_id "${args[project]}")"
|
||||
|
||||
# get columns that contain multiple values
|
||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||
if [[ ${args[--separator]} ]]; then
|
||||
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,cells[cn].value.contains(\"'
|
||||
engine+="${args[--separator]}"
|
||||
engine+='\"))","selection":[]}],"mode":"row-based"}'
|
||||
fi
|
||||
if [[ ${args[--mode]} == "records" ]]; then
|
||||
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,row.record.cells[cn].value.length()>1)","selection":[]}],"mode":"row-based"}'
|
||||
fi
|
||||
readarray -t columns_mv < <(curl -fs --data project="$projectid" --data "engine=${engine}" "${OPENREFINE_URL}/command/core/compute-facets" | jq -r '.facets[].choices[].v.v')
|
||||
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
|
||||
readarray -t columns_mix < <(for i in "${columns[@]}"; do
|
||||
skip=
|
||||
for j in "${columns_mv[@]}"; do
|
||||
if [[ "$i" == "$j" ]]; then
|
||||
echo "\"$j⊌\"" # add special character that is used in template below
|
||||
skip=1; break
|
||||
fi
|
||||
done
|
||||
if [[ -z $skip ]]; then
|
||||
echo "\"$i\""
|
||||
fi
|
||||
done)
|
||||
multivalued=$(IFS=, ; echo "[${columns_mix[*]}]")
|
||||
fi
|
||||
|
||||
# set template
|
||||
template='{ {{'
|
||||
template+='forEach('
|
||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||
template+="$multivalued"
|
||||
else
|
||||
template+='row.columnNames'
|
||||
fi
|
||||
template+=', cn, forNonBlank('
|
||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||
template+='cells[cn.chomp("⊌")].value, v, if(cn.endsWith("⊌"), "\"" + cn.chomp("⊌") + "\": " +'
|
||||
if [[ ${args[--separator]} ]]; then
|
||||
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||
fi
|
||||
if [[ ${args[--mode]} == "records" ]]; then
|
||||
template+='row.record.cells[cn].jsonize()'
|
||||
fi
|
||||
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||
else
|
||||
template+='cells[cn].value, v, "\"" + cn + "\": " + v.jsonize()'
|
||||
fi
|
||||
template+=', null)'
|
||||
template+=').join(", ")'
|
||||
template+='}} }'
|
||||
template+='{{ "\n" }}'
|
||||
|
||||
# assemble specific post data
|
||||
data+=("project=${projectid}")
|
||||
data+=("format=template")
|
||||
data+=("template=${template}")
|
||||
|
||||
# call post_export function to post data and validate results
|
||||
post_export "${data[@]}"
|
|
@ -34,6 +34,14 @@ send_completions() {
|
|||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export jsonl\'*\'--mode\')'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export tsv\'*\'--mode\')'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export template\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
|
@ -42,6 +50,10 @@ send_completions() {
|
|||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export jsonl\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'completions\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
|
@ -59,7 +71,7 @@ send_completions() {
|
|||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export tsv\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet -h -q")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'transform\'*)'
|
||||
|
@ -79,7 +91,7 @@ send_completions() {
|
|||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'export\'*)'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
|
||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
|
||||
echo $' ;;'
|
||||
echo $''
|
||||
echo $' \'list\'*)'
|
||||
|
|
Loading…
Reference in New Issue