export jsonl first draft
This commit is contained in:
parent
206bbf60f6
commit
907cc531ea
|
@ -17,7 +17,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
|
||||||
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
|
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
|
||||||
* orcli calls specific endpoints for each operation to provide improved error handling and logging
|
* orcli calls specific endpoints for each operation to provide improved error handling and logging
|
||||||
* supports stdin, multiple files and URLs
|
* supports stdin, multiple files and URLs
|
||||||
* export to TSV, ~~CSV, HTML, XLS, XLSX, ODS~~
|
* export to TSV, JSONL, ~~CSV, HTML, XLS, XLSX, ODS~~
|
||||||
* [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML
|
* [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
- [completions](completions.md)
|
- [completions](completions.md)
|
||||||
- [delete](delete.md)
|
- [delete](delete.md)
|
||||||
|
- [export jsonl](export_jsonl.md)
|
||||||
- [export template](export_template.md)
|
- [export template](export_template.md)
|
||||||
- [export tsv](export_tsv.md)
|
- [export tsv](export_tsv.md)
|
||||||
- [import csv](import_csv.md)
|
- [import csv](import_csv.md)
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
# orcli export jsonl
|
||||||
|
|
||||||
|
```
|
||||||
|
orcli export jsonl - export JSON Lines / newline-delimited JSON
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
orcli export jsonl PROJECT [OPTIONS]
|
||||||
|
orcli export jsonl --help | -h
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--mode MODE
|
||||||
|
specify if project contains multi-row records
|
||||||
|
Allowed: rows, records
|
||||||
|
Default: rows
|
||||||
|
|
||||||
|
--separator SEPARATOR
|
||||||
|
character(s) that separates multiple values in one cell (row mode only)
|
||||||
|
|
||||||
|
--facets FACETS
|
||||||
|
filter result set by providing an OpenRefine facets config in json
|
||||||
|
Default: []
|
||||||
|
|
||||||
|
--output FILE
|
||||||
|
Write to file instead of stdout
|
||||||
|
|
||||||
|
--encoding ENCODING
|
||||||
|
set character encoding
|
||||||
|
Default: UTF-8
|
||||||
|
|
||||||
|
--quiet, -q
|
||||||
|
suppress log output, print errors only
|
||||||
|
|
||||||
|
--help, -h
|
||||||
|
Show this help
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
PROJECT
|
||||||
|
project name or id
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
orcli export jsonl "duplicates"
|
||||||
|
orcli export jsonl "duplicates" --output "duplicates.jsonl"
|
||||||
|
orcli export jsonl "duplicates" --separator ' '
|
||||||
|
orcli export jsonl "duplicates" --mode records
|
||||||
|
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName":
|
||||||
|
"name", "mode": "regex", "caseSensitive": false, "invert": false, "query":
|
||||||
|
"^Ben" } ]'
|
||||||
|
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression":
|
||||||
|
"grel:filter([\"gender\",\"purchase\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0",
|
||||||
|
"columnName": "", "selection": [{"v": {"v": true}}] }]'
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
code: [src/export_jsonl_command.sh](../src/export_jsonl_command.sh)
|
|
@ -18,7 +18,7 @@ Options:
|
||||||
insert character(s) at the end of the file
|
insert character(s) at the end of the file
|
||||||
|
|
||||||
--mode MODE
|
--mode MODE
|
||||||
specify if template shall be applied to each row or record
|
specify if project contains multi-row records
|
||||||
Allowed: rows, records
|
Allowed: rows, records
|
||||||
Default: rows
|
Default: rows
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,11 @@ Usage:
|
||||||
orcli export tsv --help | -h
|
orcli export tsv --help | -h
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
|
--mode MODE
|
||||||
|
specify if project contains multi-row records
|
||||||
|
Allowed: rows, records
|
||||||
|
Default: rows
|
||||||
|
|
||||||
--facets FACETS
|
--facets FACETS
|
||||||
filter result set by providing an OpenRefine facets config in json
|
filter result set by providing an OpenRefine facets config in json
|
||||||
Default: []
|
Default: []
|
||||||
|
|
|
@ -51,6 +51,7 @@ Examples:
|
||||||
orcli import jsonl "file"
|
orcli import jsonl "file"
|
||||||
orcli import jsonl "file1" "file2"
|
orcli import jsonl "file1" "file2"
|
||||||
orcli import jsonl "https://example.com/file.json"
|
orcli import jsonl "https://example.com/file.json"
|
||||||
|
orcli import jsonl --rename <(orcli export jsonl "duplicates")
|
||||||
orcli import jsonl "file" \
|
orcli import jsonl "file" \
|
||||||
--rename \
|
--rename \
|
||||||
--storeEmptyStrings \
|
--storeEmptyStrings \
|
||||||
|
|
377
orcli
377
orcli
|
@ -690,6 +690,7 @@ orcli_import_jsonl_usage() {
|
||||||
printf " orcli import jsonl \"file\"\n"
|
printf " orcli import jsonl \"file\"\n"
|
||||||
printf " orcli import jsonl \"file1\" \"file2\"\n"
|
printf " orcli import jsonl \"file1\" \"file2\"\n"
|
||||||
printf " orcli import jsonl \"https://example.com/file.json\"\n"
|
printf " orcli import jsonl \"https://example.com/file.json\"\n"
|
||||||
|
printf " orcli import jsonl --rename <(orcli export jsonl \"duplicates\")\n"
|
||||||
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
|
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
|
||||||
echo
|
echo
|
||||||
|
|
||||||
|
@ -934,6 +935,7 @@ orcli_export_usage() {
|
||||||
echo
|
echo
|
||||||
# :command.usage_commands
|
# :command.usage_commands
|
||||||
printf "%s\n" "Commands:"
|
printf "%s\n" "Commands:"
|
||||||
|
printf " %s export JSON Lines / newline-delimited JSON\n" "jsonl "
|
||||||
printf " %s export tab-separated values (TSV)\n" "tsv "
|
printf " %s export tab-separated values (TSV)\n" "tsv "
|
||||||
printf " %s export to any text format by providing your own GREL template\n" "template"
|
printf " %s export to any text format by providing your own GREL template\n" "template"
|
||||||
echo
|
echo
|
||||||
|
@ -950,6 +952,88 @@ orcli_export_usage() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# :command.usage
|
||||||
|
orcli_export_jsonl_usage() {
|
||||||
|
if [[ -n $long_usage ]]; then
|
||||||
|
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
else
|
||||||
|
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf "%s\n" "Usage:"
|
||||||
|
printf " orcli export jsonl PROJECT [OPTIONS]\n"
|
||||||
|
printf " orcli export jsonl --help | -h\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.long_usage
|
||||||
|
if [[ -n $long_usage ]]; then
|
||||||
|
printf "%s\n" "Options:"
|
||||||
|
|
||||||
|
# :command.usage_flags
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--mode MODE"
|
||||||
|
printf " specify if project contains multi-row records\n"
|
||||||
|
printf " Allowed: rows, records\n"
|
||||||
|
printf " Default: rows\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--separator SEPARATOR"
|
||||||
|
printf " character(s) that separates multiple values in one cell (row mode only)\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--facets FACETS"
|
||||||
|
printf " filter result set by providing an OpenRefine facets config in json\n"
|
||||||
|
printf " Default: []\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--output FILE"
|
||||||
|
printf " Write to file instead of stdout\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--encoding ENCODING"
|
||||||
|
printf " set character encoding\n"
|
||||||
|
printf " Default: UTF-8\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--quiet, -q"
|
||||||
|
printf " suppress log output, print errors only\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.usage_fixed_flags
|
||||||
|
printf " %s\n" "--help, -h"
|
||||||
|
printf " Show this help\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.usage_args
|
||||||
|
printf "%s\n" "Arguments:"
|
||||||
|
|
||||||
|
# :argument.usage
|
||||||
|
printf " %s\n" "PROJECT"
|
||||||
|
printf " project name or id\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.usage_examples
|
||||||
|
printf "%s\n" "Examples:"
|
||||||
|
printf " orcli export jsonl \"duplicates\"\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --output \"duplicates.jsonl\"\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --separator ' '\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --mode records\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# :command.usage
|
# :command.usage
|
||||||
orcli_export_tsv_usage() {
|
orcli_export_tsv_usage() {
|
||||||
if [[ -n $long_usage ]]; then
|
if [[ -n $long_usage ]]; then
|
||||||
|
@ -972,6 +1056,13 @@ orcli_export_tsv_usage() {
|
||||||
printf "%s\n" "Options:"
|
printf "%s\n" "Options:"
|
||||||
|
|
||||||
# :command.usage_flags
|
# :command.usage_flags
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--mode MODE"
|
||||||
|
printf " specify if project contains multi-row records\n"
|
||||||
|
printf " Allowed: rows, records\n"
|
||||||
|
printf " Default: rows\n"
|
||||||
|
echo
|
||||||
|
|
||||||
# :flag.usage
|
# :flag.usage
|
||||||
printf " %s\n" "--facets FACETS"
|
printf " %s\n" "--facets FACETS"
|
||||||
printf " filter result set by providing an OpenRefine facets config in json\n"
|
printf " filter result set by providing an OpenRefine facets config in json\n"
|
||||||
|
@ -1058,7 +1149,7 @@ orcli_export_template_usage() {
|
||||||
|
|
||||||
# :flag.usage
|
# :flag.usage
|
||||||
printf " %s\n" "--mode MODE"
|
printf " %s\n" "--mode MODE"
|
||||||
printf " specify if template shall be applied to each row or record\n"
|
printf " specify if project contains multi-row records\n"
|
||||||
printf " Allowed: rows, records\n"
|
printf " Allowed: rows, records\n"
|
||||||
printf " Default: rows\n"
|
printf " Default: rows\n"
|
||||||
echo
|
echo
|
||||||
|
@ -1489,6 +1580,14 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
|
echo $' \'export tsv\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'export template\'*)'
|
echo $' \'export template\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -1497,6 +1596,10 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*)'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'completions\'*)'
|
echo $' \'completions\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -1514,7 +1617,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export tsv\'*)'
|
echo $' \'export tsv\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'transform\'*)'
|
echo $' \'transform\'*)'
|
||||||
|
@ -1534,7 +1637,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export\'*)'
|
echo $' \'export\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'list\'*)'
|
echo $' \'list\'*)'
|
||||||
|
@ -2172,6 +2275,75 @@ orcli_transform_command() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# :command.function
|
||||||
|
orcli_export_jsonl_command() {
|
||||||
|
# src/export_jsonl_command.sh
|
||||||
|
# shellcheck shell=bash disable=SC2154 disable=SC2155
|
||||||
|
projectid="$(get_id "${args[project]}")"
|
||||||
|
|
||||||
|
# get columns that contain multiple values
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,cells[cn].value.contains(\"'
|
||||||
|
engine+="${args[--separator]}"
|
||||||
|
engine+='\"))","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,row.record.cells[cn].value.length()>1)","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
readarray -t columns_mv < <(curl -fs --data project="$projectid" --data "engine=${engine}" "${OPENREFINE_URL}/command/core/compute-facets" | jq -r '.facets[].choices[].v.v')
|
||||||
|
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
|
||||||
|
readarray -t columns_mix < <(for i in "${columns[@]}"; do
|
||||||
|
skip=
|
||||||
|
for j in "${columns_mv[@]}"; do
|
||||||
|
if [[ "$i" == "$j" ]]; then
|
||||||
|
echo "\"$j⊌\"" # add special character that is used in template below
|
||||||
|
skip=1; break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [[ -z $skip ]]; then
|
||||||
|
echo "\"$i\""
|
||||||
|
fi
|
||||||
|
done)
|
||||||
|
multivalued=$(IFS=, ; echo "[${columns_mix[*]}]")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# set template
|
||||||
|
template='{ {{'
|
||||||
|
template+='forEach('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+="$multivalued"
|
||||||
|
else
|
||||||
|
template+='row.columnNames'
|
||||||
|
fi
|
||||||
|
template+=', cn, forNonBlank('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='cells[cn.chomp("⊌")].value, v, if(cn.endsWith("⊌"), "\"" + cn.chomp("⊌") + "\": " +'
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='row.record.cells[cn].jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||||
|
else
|
||||||
|
template+='cells[cn].value, v, "\"" + cn + "\": " + v.jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', null)'
|
||||||
|
template+=').join(", ")'
|
||||||
|
template+='}} }'
|
||||||
|
template+='{{ "\n" }}'
|
||||||
|
|
||||||
|
# assemble specific post data
|
||||||
|
data+=("project=${projectid}")
|
||||||
|
data+=("format=template")
|
||||||
|
data+=("template=${template}")
|
||||||
|
|
||||||
|
# call post_export function to post data and validate results
|
||||||
|
post_export "${data[@]}"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
# :command.function
|
# :command.function
|
||||||
orcli_export_tsv_command() {
|
orcli_export_tsv_command() {
|
||||||
# src/export_tsv_command.sh
|
# src/export_tsv_command.sh
|
||||||
|
@ -3906,6 +4078,13 @@ orcli_export_parse_requirements() {
|
||||||
case $action in
|
case $action in
|
||||||
-*) ;;
|
-*) ;;
|
||||||
|
|
||||||
|
jsonl)
|
||||||
|
action="jsonl"
|
||||||
|
shift
|
||||||
|
orcli_export_jsonl_parse_requirements "$@"
|
||||||
|
shift $#
|
||||||
|
;;
|
||||||
|
|
||||||
tsv)
|
tsv)
|
||||||
action="tsv"
|
action="tsv"
|
||||||
shift
|
shift
|
||||||
|
@ -3956,6 +4135,165 @@ orcli_export_parse_requirements() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# :command.parse_requirements
|
||||||
|
orcli_export_jsonl_parse_requirements() {
|
||||||
|
# :command.fixed_flags_filter
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "${1:-}" in
|
||||||
|
--help | -h)
|
||||||
|
long_usage=yes
|
||||||
|
orcli_export_jsonl_usage
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# :command.command_filter
|
||||||
|
action="export jsonl"
|
||||||
|
|
||||||
|
# :command.parse_requirements_while
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
key="$1"
|
||||||
|
case "$key" in
|
||||||
|
# :flag.case
|
||||||
|
--mode)
|
||||||
|
# :flag.conflicts
|
||||||
|
if [[ -n "${args['--separator']:-}" ]]; then
|
||||||
|
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--mode']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--separator)
|
||||||
|
# :flag.conflicts
|
||||||
|
if [[ -n "${args['--mode']:-}" ]]; then
|
||||||
|
printf "conflicting options: %s cannot be used with %s\n" "$key" "--mode" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--separator']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--separator requires an argument: --separator SEPARATOR" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--facets)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--facets']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--facets requires an argument: --facets FACETS" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--output)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--output']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--output requires an argument: --output FILE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--encoding)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--encoding']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--encoding requires an argument: --encoding ENCODING" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--quiet | -q)
|
||||||
|
|
||||||
|
# :flag.case_no_arg
|
||||||
|
args['--quiet']=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
|
||||||
|
-?*)
|
||||||
|
printf "invalid option: %s\n" "$key" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
# :command.parse_requirements_case
|
||||||
|
# :command.parse_requirements_case_simple
|
||||||
|
if [[ -z ${args['project']+x} ]]; then
|
||||||
|
|
||||||
|
args['project']=$1
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "invalid argument: %s\n" "$key" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
;;
|
||||||
|
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# :command.required_args_filter
|
||||||
|
if [[ -z ${args['project']+x} ]]; then
|
||||||
|
printf "missing required argument: PROJECT\nusage: orcli export jsonl PROJECT [OPTIONS]\n" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# :command.default_assignments
|
||||||
|
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
|
||||||
|
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
||||||
|
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
||||||
|
|
||||||
|
# :command.whitelist_filter
|
||||||
|
if [[ ${args['--mode']} ]] && [[ ! ${args['--mode']} =~ ^(rows|records)$ ]]; then
|
||||||
|
printf "%s\n" "--mode must be one of: rows, records" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
# :command.parse_requirements
|
# :command.parse_requirements
|
||||||
orcli_export_tsv_parse_requirements() {
|
orcli_export_tsv_parse_requirements() {
|
||||||
# :command.fixed_flags_filter
|
# :command.fixed_flags_filter
|
||||||
|
@ -3981,6 +4319,26 @@ orcli_export_tsv_parse_requirements() {
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
key="$1"
|
key="$1"
|
||||||
case "$key" in
|
case "$key" in
|
||||||
|
# :flag.case
|
||||||
|
--mode)
|
||||||
|
# :flag.conflicts
|
||||||
|
if [[ -n "${args['--separator']:-}" ]]; then
|
||||||
|
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--mode']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
# :flag.case
|
# :flag.case
|
||||||
--facets)
|
--facets)
|
||||||
|
|
||||||
|
@ -4063,9 +4421,16 @@ orcli_export_tsv_parse_requirements() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# :command.default_assignments
|
# :command.default_assignments
|
||||||
|
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
|
||||||
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
||||||
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
||||||
|
|
||||||
|
# :command.whitelist_filter
|
||||||
|
if [[ ${args['--mode']} ]] && [[ ! ${args['--mode']} =~ ^(rows|records)$ ]]; then
|
||||||
|
printf "%s\n" "--mode must be one of: rows, records" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# :command.parse_requirements
|
# :command.parse_requirements
|
||||||
|
@ -4140,6 +4505,11 @@ orcli_export_template_parse_requirements() {
|
||||||
|
|
||||||
# :flag.case
|
# :flag.case
|
||||||
--mode)
|
--mode)
|
||||||
|
# :flag.conflicts
|
||||||
|
if [[ -n "${args['--separator']:-}" ]]; then
|
||||||
|
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# :flag.case_arg
|
# :flag.case_arg
|
||||||
if [[ -n ${2+x} ]]; then
|
if [[ -n ${2+x} ]]; then
|
||||||
|
@ -4388,6 +4758,7 @@ run() {
|
||||||
"test") orcli_test_command ;;
|
"test") orcli_test_command ;;
|
||||||
"transform") orcli_transform_command ;;
|
"transform") orcli_transform_command ;;
|
||||||
"export") orcli_export_command ;;
|
"export") orcli_export_command ;;
|
||||||
|
"export jsonl") orcli_export_jsonl_command ;;
|
||||||
"export tsv") orcli_export_tsv_command ;;
|
"export tsv") orcli_export_tsv_command ;;
|
||||||
"export template") orcli_export_template_command ;;
|
"export template") orcli_export_template_command ;;
|
||||||
"run") orcli_run_command ;;
|
"run") orcli_run_command ;;
|
||||||
|
|
|
@ -251,6 +251,7 @@ commands:
|
||||||
- orcli import jsonl "file"
|
- orcli import jsonl "file"
|
||||||
- orcli import jsonl "file1" "file2"
|
- orcli import jsonl "file1" "file2"
|
||||||
- orcli import jsonl "https://example.com/file.json"
|
- orcli import jsonl "https://example.com/file.json"
|
||||||
|
- orcli import jsonl --rename <(orcli export jsonl "duplicates")
|
||||||
- |-
|
- |-
|
||||||
orcli import jsonl "file" \\\\
|
orcli import jsonl "file" \\\\
|
||||||
--rename \\\\
|
--rename \\\\
|
||||||
|
@ -316,11 +317,22 @@ commands:
|
||||||
help: commands to export data from OpenRefine projects to files
|
help: commands to export data from OpenRefine projects to files
|
||||||
|
|
||||||
commands:
|
commands:
|
||||||
- name: tsv
|
- name: jsonl
|
||||||
help: export tab-separated values (TSV)
|
help: export JSON Lines / newline-delimited JSON
|
||||||
args:
|
args:
|
||||||
- *project
|
- *project
|
||||||
flags:
|
flags:
|
||||||
|
- &mode
|
||||||
|
long: --mode
|
||||||
|
help: specify if project contains multi-row records
|
||||||
|
arg: mode
|
||||||
|
allowed: [rows, records]
|
||||||
|
default: "rows"
|
||||||
|
conflicts: [--separator]
|
||||||
|
- long: --separator
|
||||||
|
help: character(s) that separates multiple values in one cell (row mode only)
|
||||||
|
arg: separator
|
||||||
|
conflicts: [--mode]
|
||||||
- &facets
|
- &facets
|
||||||
long: --facets
|
long: --facets
|
||||||
help: filter result set by providing an OpenRefine facets config in json
|
help: filter result set by providing an OpenRefine facets config in json
|
||||||
|
@ -336,6 +348,25 @@ commands:
|
||||||
arg: encoding
|
arg: encoding
|
||||||
default: "UTF-8"
|
default: "UTF-8"
|
||||||
- *quiet
|
- *quiet
|
||||||
|
examples:
|
||||||
|
- orcli export jsonl "duplicates"
|
||||||
|
- orcli export jsonl "duplicates" --output "duplicates.jsonl"
|
||||||
|
- orcli export jsonl "duplicates" --separator ' '
|
||||||
|
- orcli export jsonl "duplicates" --mode records
|
||||||
|
- |-
|
||||||
|
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
|
||||||
|
- |-
|
||||||
|
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
|
||||||
|
- name: tsv
|
||||||
|
help: export tab-separated values (TSV)
|
||||||
|
args:
|
||||||
|
- *project
|
||||||
|
flags:
|
||||||
|
- *mode
|
||||||
|
- *facets
|
||||||
|
- *output
|
||||||
|
- *encoding_export
|
||||||
|
- *quiet
|
||||||
examples:
|
examples:
|
||||||
- orcli export tsv "duplicates"
|
- orcli export tsv "duplicates"
|
||||||
- orcli export tsv "duplicates" --output "duplicates.tsv"
|
- orcli export tsv "duplicates" --output "duplicates.tsv"
|
||||||
|
@ -362,11 +393,7 @@ commands:
|
||||||
- long: --suffix
|
- long: --suffix
|
||||||
help: insert character(s) at the end of the file
|
help: insert character(s) at the end of the file
|
||||||
arg: suffix
|
arg: suffix
|
||||||
- long: --mode
|
- *mode
|
||||||
help: specify if template shall be applied to each row or record
|
|
||||||
arg: mode
|
|
||||||
allowed: [rows, records]
|
|
||||||
default: "rows"
|
|
||||||
- *facets
|
- *facets
|
||||||
- *output
|
- *output
|
||||||
- *encoding_export
|
- *encoding_export
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
# shellcheck shell=bash disable=SC2154 disable=SC2155
|
||||||
|
projectid="$(get_id "${args[project]}")"
|
||||||
|
|
||||||
|
# get columns that contain multiple values
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,cells[cn].value.contains(\"'
|
||||||
|
engine+="${args[--separator]}"
|
||||||
|
engine+='\"))","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,row.record.cells[cn].value.length()>1)","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
readarray -t columns_mv < <(curl -fs --data project="$projectid" --data "engine=${engine}" "${OPENREFINE_URL}/command/core/compute-facets" | jq -r '.facets[].choices[].v.v')
|
||||||
|
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
|
||||||
|
readarray -t columns_mix < <(for i in "${columns[@]}"; do
|
||||||
|
skip=
|
||||||
|
for j in "${columns_mv[@]}"; do
|
||||||
|
if [[ "$i" == "$j" ]]; then
|
||||||
|
echo "\"$j⊌\"" # add special character that is used in template below
|
||||||
|
skip=1; break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [[ -z $skip ]]; then
|
||||||
|
echo "\"$i\""
|
||||||
|
fi
|
||||||
|
done)
|
||||||
|
multivalued=$(IFS=, ; echo "[${columns_mix[*]}]")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# set template
|
||||||
|
template='{ {{'
|
||||||
|
template+='forEach('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+="$multivalued"
|
||||||
|
else
|
||||||
|
template+='row.columnNames'
|
||||||
|
fi
|
||||||
|
template+=', cn, forNonBlank('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='cells[cn.chomp("⊌")].value, v, if(cn.endsWith("⊌"), "\"" + cn.chomp("⊌") + "\": " +'
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='row.record.cells[cn].jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||||
|
else
|
||||||
|
template+='cells[cn].value, v, "\"" + cn + "\": " + v.jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', null)'
|
||||||
|
template+=').join(", ")'
|
||||||
|
template+='}} }'
|
||||||
|
template+='{{ "\n" }}'
|
||||||
|
|
||||||
|
# assemble specific post data
|
||||||
|
data+=("project=${projectid}")
|
||||||
|
data+=("format=template")
|
||||||
|
data+=("template=${template}")
|
||||||
|
|
||||||
|
# call post_export function to post data and validate results
|
||||||
|
post_export "${data[@]}"
|
|
@ -34,6 +34,14 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
|
echo $' \'export tsv\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'export template\'*)'
|
echo $' \'export template\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -42,6 +50,10 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*)'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'completions\'*)'
|
echo $' \'completions\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -59,7 +71,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export tsv\'*)'
|
echo $' \'export tsv\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'transform\'*)'
|
echo $' \'transform\'*)'
|
||||||
|
@ -79,7 +91,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export\'*)'
|
echo $' \'export\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'list\'*)'
|
echo $' \'list\'*)'
|
||||||
|
|
Loading…
Reference in New Issue