Merge pull request #117 from opencultureconsulting/115-export-jsonl
export jsonl
This commit is contained in:
commit
f8e5b4abbd
|
@ -17,7 +17,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
|
||||||
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
|
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
|
||||||
* orcli calls specific endpoints for each operation to provide improved error handling and logging
|
* orcli calls specific endpoints for each operation to provide improved error handling and logging
|
||||||
* supports stdin, multiple files and URLs
|
* supports stdin, multiple files and URLs
|
||||||
* export to TSV, ~~CSV, HTML, XLS, XLSX, ODS~~
|
* export to TSV, JSONL, ~~CSV, HTML, XLS, XLSX, ODS~~
|
||||||
* [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML
|
* [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
- [completions](completions.md)
|
- [completions](completions.md)
|
||||||
- [delete](delete.md)
|
- [delete](delete.md)
|
||||||
|
- [export jsonl](export_jsonl.md)
|
||||||
- [export template](export_template.md)
|
- [export template](export_template.md)
|
||||||
- [export tsv](export_tsv.md)
|
- [export tsv](export_tsv.md)
|
||||||
- [import csv](import_csv.md)
|
- [import csv](import_csv.md)
|
||||||
|
|
|
@ -0,0 +1,54 @@
|
||||||
|
# orcli export jsonl
|
||||||
|
|
||||||
|
```
|
||||||
|
orcli export jsonl - export JSON Lines / newline-delimited JSON
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
orcli export jsonl PROJECT [OPTIONS]
|
||||||
|
orcli export jsonl --help | -h
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--mode MODE
|
||||||
|
specify if project contains multi-row records
|
||||||
|
Allowed: rows, records
|
||||||
|
Default: rows
|
||||||
|
|
||||||
|
--separator SEPARATOR
|
||||||
|
character(s) that separates multiple values in one cell (row mode only)
|
||||||
|
|
||||||
|
--facets FACETS
|
||||||
|
filter result set by providing an OpenRefine facets config in json
|
||||||
|
Default: []
|
||||||
|
|
||||||
|
--output FILE
|
||||||
|
Write to file instead of stdout
|
||||||
|
|
||||||
|
--encoding ENCODING
|
||||||
|
set character encoding
|
||||||
|
Default: UTF-8
|
||||||
|
|
||||||
|
--quiet, -q
|
||||||
|
suppress log output, print errors only
|
||||||
|
|
||||||
|
--help, -h
|
||||||
|
Show this help
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
PROJECT
|
||||||
|
project name or id
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
orcli export jsonl "duplicates"
|
||||||
|
orcli export jsonl "duplicates" --output "duplicates.jsonl"
|
||||||
|
orcli export jsonl "duplicates" --separator ' '
|
||||||
|
orcli export jsonl "duplicates" --mode records
|
||||||
|
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName":
|
||||||
|
"name", "mode": "regex", "caseSensitive": false, "invert": false, "query":
|
||||||
|
"^Ben" } ]'
|
||||||
|
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression":
|
||||||
|
"grel:filter([\"gender\",\"purchase\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0",
|
||||||
|
"columnName": "", "selection": [{"v": {"v": true}}] }]'
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
code: [src/export_jsonl_command.sh](../src/export_jsonl_command.sh)
|
|
@ -18,7 +18,7 @@ Options:
|
||||||
insert character(s) at the end of the file
|
insert character(s) at the end of the file
|
||||||
|
|
||||||
--mode MODE
|
--mode MODE
|
||||||
specify if template shall be applied to each row or record
|
specify if project contains multi-row records
|
||||||
Allowed: rows, records
|
Allowed: rows, records
|
||||||
Default: rows
|
Default: rows
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,11 @@ Usage:
|
||||||
orcli export tsv --help | -h
|
orcli export tsv --help | -h
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
|
--mode MODE
|
||||||
|
specify if project contains multi-row records
|
||||||
|
Allowed: rows, records
|
||||||
|
Default: rows
|
||||||
|
|
||||||
--facets FACETS
|
--facets FACETS
|
||||||
filter result set by providing an OpenRefine facets config in json
|
filter result set by providing an OpenRefine facets config in json
|
||||||
Default: []
|
Default: []
|
||||||
|
|
|
@ -51,6 +51,7 @@ Examples:
|
||||||
orcli import jsonl "file"
|
orcli import jsonl "file"
|
||||||
orcli import jsonl "file1" "file2"
|
orcli import jsonl "file1" "file2"
|
||||||
orcli import jsonl "https://example.com/file.json"
|
orcli import jsonl "https://example.com/file.json"
|
||||||
|
orcli import jsonl --rename <(orcli export jsonl "duplicates")
|
||||||
orcli import jsonl "file" \
|
orcli import jsonl "file" \
|
||||||
--rename \
|
--rename \
|
||||||
--storeEmptyStrings \
|
--storeEmptyStrings \
|
||||||
|
|
364
orcli
364
orcli
|
@ -690,6 +690,7 @@ orcli_import_jsonl_usage() {
|
||||||
printf " orcli import jsonl \"file\"\n"
|
printf " orcli import jsonl \"file\"\n"
|
||||||
printf " orcli import jsonl \"file1\" \"file2\"\n"
|
printf " orcli import jsonl \"file1\" \"file2\"\n"
|
||||||
printf " orcli import jsonl \"https://example.com/file.json\"\n"
|
printf " orcli import jsonl \"https://example.com/file.json\"\n"
|
||||||
|
printf " orcli import jsonl --rename <(orcli export jsonl \"duplicates\")\n"
|
||||||
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
|
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
|
||||||
echo
|
echo
|
||||||
|
|
||||||
|
@ -934,6 +935,7 @@ orcli_export_usage() {
|
||||||
echo
|
echo
|
||||||
# :command.usage_commands
|
# :command.usage_commands
|
||||||
printf "%s\n" "Commands:"
|
printf "%s\n" "Commands:"
|
||||||
|
printf " %s export JSON Lines / newline-delimited JSON\n" "jsonl "
|
||||||
printf " %s export tab-separated values (TSV)\n" "tsv "
|
printf " %s export tab-separated values (TSV)\n" "tsv "
|
||||||
printf " %s export to any text format by providing your own GREL template\n" "template"
|
printf " %s export to any text format by providing your own GREL template\n" "template"
|
||||||
echo
|
echo
|
||||||
|
@ -950,6 +952,88 @@ orcli_export_usage() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# :command.usage
|
||||||
|
orcli_export_jsonl_usage() {
|
||||||
|
if [[ -n $long_usage ]]; then
|
||||||
|
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
else
|
||||||
|
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
printf "%s\n" "Usage:"
|
||||||
|
printf " orcli export jsonl PROJECT [OPTIONS]\n"
|
||||||
|
printf " orcli export jsonl --help | -h\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.long_usage
|
||||||
|
if [[ -n $long_usage ]]; then
|
||||||
|
printf "%s\n" "Options:"
|
||||||
|
|
||||||
|
# :command.usage_flags
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--mode MODE"
|
||||||
|
printf " specify if project contains multi-row records\n"
|
||||||
|
printf " Allowed: rows, records\n"
|
||||||
|
printf " Default: rows\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--separator SEPARATOR"
|
||||||
|
printf " character(s) that separates multiple values in one cell (row mode only)\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--facets FACETS"
|
||||||
|
printf " filter result set by providing an OpenRefine facets config in json\n"
|
||||||
|
printf " Default: []\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--output FILE"
|
||||||
|
printf " Write to file instead of stdout\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--encoding ENCODING"
|
||||||
|
printf " set character encoding\n"
|
||||||
|
printf " Default: UTF-8\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--quiet, -q"
|
||||||
|
printf " suppress log output, print errors only\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.usage_fixed_flags
|
||||||
|
printf " %s\n" "--help, -h"
|
||||||
|
printf " Show this help\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.usage_args
|
||||||
|
printf "%s\n" "Arguments:"
|
||||||
|
|
||||||
|
# :argument.usage
|
||||||
|
printf " %s\n" "PROJECT"
|
||||||
|
printf " project name or id\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# :command.usage_examples
|
||||||
|
printf "%s\n" "Examples:"
|
||||||
|
printf " orcli export jsonl \"duplicates\"\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --output \"duplicates.jsonl\"\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --separator ' '\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --mode records\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
|
||||||
|
printf " orcli export jsonl \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
|
||||||
|
echo
|
||||||
|
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
# :command.usage
|
# :command.usage
|
||||||
orcli_export_tsv_usage() {
|
orcli_export_tsv_usage() {
|
||||||
if [[ -n $long_usage ]]; then
|
if [[ -n $long_usage ]]; then
|
||||||
|
@ -972,6 +1056,13 @@ orcli_export_tsv_usage() {
|
||||||
printf "%s\n" "Options:"
|
printf "%s\n" "Options:"
|
||||||
|
|
||||||
# :command.usage_flags
|
# :command.usage_flags
|
||||||
|
# :flag.usage
|
||||||
|
printf " %s\n" "--mode MODE"
|
||||||
|
printf " specify if project contains multi-row records\n"
|
||||||
|
printf " Allowed: rows, records\n"
|
||||||
|
printf " Default: rows\n"
|
||||||
|
echo
|
||||||
|
|
||||||
# :flag.usage
|
# :flag.usage
|
||||||
printf " %s\n" "--facets FACETS"
|
printf " %s\n" "--facets FACETS"
|
||||||
printf " filter result set by providing an OpenRefine facets config in json\n"
|
printf " filter result set by providing an OpenRefine facets config in json\n"
|
||||||
|
@ -1058,7 +1149,7 @@ orcli_export_template_usage() {
|
||||||
|
|
||||||
# :flag.usage
|
# :flag.usage
|
||||||
printf " %s\n" "--mode MODE"
|
printf " %s\n" "--mode MODE"
|
||||||
printf " specify if template shall be applied to each row or record\n"
|
printf " specify if project contains multi-row records\n"
|
||||||
printf " Allowed: rows, records\n"
|
printf " Allowed: rows, records\n"
|
||||||
printf " Default: rows\n"
|
printf " Default: rows\n"
|
||||||
echo
|
echo
|
||||||
|
@ -1489,6 +1580,14 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
|
echo $' \'export tsv\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'export template\'*)'
|
echo $' \'export template\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -1497,6 +1596,10 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*)'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'completions\'*)'
|
echo $' \'completions\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -1514,7 +1617,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export tsv\'*)'
|
echo $' \'export tsv\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'transform\'*)'
|
echo $' \'transform\'*)'
|
||||||
|
@ -1534,7 +1637,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export\'*)'
|
echo $' \'export\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'list\'*)'
|
echo $' \'list\'*)'
|
||||||
|
@ -2172,6 +2275,82 @@ orcli_transform_command() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# :command.function
|
||||||
|
orcli_export_jsonl_command() {
|
||||||
|
# src/export_jsonl_command.sh
|
||||||
|
# shellcheck shell=bash disable=SC2154 disable=SC2155
|
||||||
|
projectid="$(get_id "${args[project]}")"
|
||||||
|
|
||||||
|
# get columns that contain multiple values
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,cells[cn].value.contains(\"'
|
||||||
|
engine+="${args[--separator]}"
|
||||||
|
engine+='\"))","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,row.record.cells[cn].value.length()>1)","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
readarray -t columns_mv < <(curl -fs --data project="$projectid" --data "engine=${engine}" "${OPENREFINE_URL}/command/core/compute-facets" | jq -r '.facets[].choices[].v.v')
|
||||||
|
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
|
||||||
|
readarray -t columns_mix < <(for i in "${columns[@]}"; do
|
||||||
|
skip=
|
||||||
|
for j in "${columns_mv[@]}"; do
|
||||||
|
if [[ "$i" == "$j" ]]; then
|
||||||
|
echo "\"$j⊌\"" # add special character that is used in template below
|
||||||
|
skip=1; break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [[ -z $skip ]]; then
|
||||||
|
echo "\"$i\""
|
||||||
|
fi
|
||||||
|
done)
|
||||||
|
multivalued=$(IFS=, ; echo "[${columns_mix[*]}]")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# set template
|
||||||
|
template='{{'
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='if(row.index - row.record.fromRowIndex == 0,'
|
||||||
|
fi
|
||||||
|
template+='"%7B".unescape("url") + " " +'
|
||||||
|
template+='forEach('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+="$multivalued"
|
||||||
|
else
|
||||||
|
template+='row.columnNames'
|
||||||
|
fi
|
||||||
|
template+=', cn, forNonBlank('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='cells[cn.chomp("⊌")].value, v, if(cn.endsWith("⊌"), "\"" + cn.chomp("⊌") + "\": " +'
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='row.record.cells[cn.chomp("⊌")].value.jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||||
|
else
|
||||||
|
template+='cells[cn].value, v, "\"" + cn + "\": " + v.jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', null)'
|
||||||
|
template+=').join(", ")'
|
||||||
|
template+='+ " " + "%7D".unescape("url") + "\n"'
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+=', "")'
|
||||||
|
fi
|
||||||
|
template+='}}'
|
||||||
|
|
||||||
|
# assemble specific post data
|
||||||
|
data+=("project=${projectid}")
|
||||||
|
data+=("format=template")
|
||||||
|
data+=("template=${template}")
|
||||||
|
|
||||||
|
# call post_export function to post data and validate results
|
||||||
|
post_export "${data[@]}"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
# :command.function
|
# :command.function
|
||||||
orcli_export_tsv_command() {
|
orcli_export_tsv_command() {
|
||||||
# src/export_tsv_command.sh
|
# src/export_tsv_command.sh
|
||||||
|
@ -3906,6 +4085,13 @@ orcli_export_parse_requirements() {
|
||||||
case $action in
|
case $action in
|
||||||
-*) ;;
|
-*) ;;
|
||||||
|
|
||||||
|
jsonl)
|
||||||
|
action="jsonl"
|
||||||
|
shift
|
||||||
|
orcli_export_jsonl_parse_requirements "$@"
|
||||||
|
shift $#
|
||||||
|
;;
|
||||||
|
|
||||||
tsv)
|
tsv)
|
||||||
action="tsv"
|
action="tsv"
|
||||||
shift
|
shift
|
||||||
|
@ -3956,6 +4142,155 @@ orcli_export_parse_requirements() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# :command.parse_requirements
|
||||||
|
orcli_export_jsonl_parse_requirements() {
|
||||||
|
# :command.fixed_flags_filter
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case "${1:-}" in
|
||||||
|
--help | -h)
|
||||||
|
long_usage=yes
|
||||||
|
orcli_export_jsonl_usage
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
break
|
||||||
|
;;
|
||||||
|
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# :command.command_filter
|
||||||
|
action="export jsonl"
|
||||||
|
|
||||||
|
# :command.parse_requirements_while
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
key="$1"
|
||||||
|
case "$key" in
|
||||||
|
# :flag.case
|
||||||
|
--mode)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--mode']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--separator)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--separator']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--separator requires an argument: --separator SEPARATOR" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--facets)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--facets']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--facets requires an argument: --facets FACETS" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--output)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--output']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--output requires an argument: --output FILE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--encoding)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--encoding']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--encoding requires an argument: --encoding ENCODING" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
# :flag.case
|
||||||
|
--quiet | -q)
|
||||||
|
|
||||||
|
# :flag.case_no_arg
|
||||||
|
args['--quiet']=1
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
|
||||||
|
-?*)
|
||||||
|
printf "invalid option: %s\n" "$key" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
# :command.parse_requirements_case
|
||||||
|
# :command.parse_requirements_case_simple
|
||||||
|
if [[ -z ${args['project']+x} ]]; then
|
||||||
|
|
||||||
|
args['project']=$1
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "invalid argument: %s\n" "$key" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
;;
|
||||||
|
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# :command.required_args_filter
|
||||||
|
if [[ -z ${args['project']+x} ]]; then
|
||||||
|
printf "missing required argument: PROJECT\nusage: orcli export jsonl PROJECT [OPTIONS]\n" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# :command.default_assignments
|
||||||
|
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
|
||||||
|
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
||||||
|
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
||||||
|
|
||||||
|
# :command.whitelist_filter
|
||||||
|
if [[ ${args['--mode']} ]] && [[ ! ${args['--mode']} =~ ^(rows|records)$ ]]; then
|
||||||
|
printf "%s\n" "--mode must be one of: rows, records" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
# :command.parse_requirements
|
# :command.parse_requirements
|
||||||
orcli_export_tsv_parse_requirements() {
|
orcli_export_tsv_parse_requirements() {
|
||||||
# :command.fixed_flags_filter
|
# :command.fixed_flags_filter
|
||||||
|
@ -3981,6 +4316,21 @@ orcli_export_tsv_parse_requirements() {
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
key="$1"
|
key="$1"
|
||||||
case "$key" in
|
case "$key" in
|
||||||
|
# :flag.case
|
||||||
|
--mode)
|
||||||
|
|
||||||
|
# :flag.case_arg
|
||||||
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
||||||
|
args['--mode']="$2"
|
||||||
|
shift
|
||||||
|
shift
|
||||||
|
else
|
||||||
|
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
# :flag.case
|
# :flag.case
|
||||||
--facets)
|
--facets)
|
||||||
|
|
||||||
|
@ -4063,9 +4413,16 @@ orcli_export_tsv_parse_requirements() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# :command.default_assignments
|
# :command.default_assignments
|
||||||
|
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
|
||||||
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
|
||||||
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
|
||||||
|
|
||||||
|
# :command.whitelist_filter
|
||||||
|
if [[ ${args['--mode']} ]] && [[ ! ${args['--mode']} =~ ^(rows|records)$ ]]; then
|
||||||
|
printf "%s\n" "--mode must be one of: rows, records" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# :command.parse_requirements
|
# :command.parse_requirements
|
||||||
|
@ -4388,6 +4745,7 @@ run() {
|
||||||
"test") orcli_test_command ;;
|
"test") orcli_test_command ;;
|
||||||
"transform") orcli_transform_command ;;
|
"transform") orcli_transform_command ;;
|
||||||
"export") orcli_export_command ;;
|
"export") orcli_export_command ;;
|
||||||
|
"export jsonl") orcli_export_jsonl_command ;;
|
||||||
"export tsv") orcli_export_tsv_command ;;
|
"export tsv") orcli_export_tsv_command ;;
|
||||||
"export template") orcli_export_template_command ;;
|
"export template") orcli_export_template_command ;;
|
||||||
"run") orcli_run_command ;;
|
"run") orcli_run_command ;;
|
||||||
|
|
|
@ -251,6 +251,7 @@ commands:
|
||||||
- orcli import jsonl "file"
|
- orcli import jsonl "file"
|
||||||
- orcli import jsonl "file1" "file2"
|
- orcli import jsonl "file1" "file2"
|
||||||
- orcli import jsonl "https://example.com/file.json"
|
- orcli import jsonl "https://example.com/file.json"
|
||||||
|
- orcli import jsonl --rename <(orcli export jsonl "duplicates")
|
||||||
- |-
|
- |-
|
||||||
orcli import jsonl "file" \\\\
|
orcli import jsonl "file" \\\\
|
||||||
--rename \\\\
|
--rename \\\\
|
||||||
|
@ -316,11 +317,20 @@ commands:
|
||||||
help: commands to export data from OpenRefine projects to files
|
help: commands to export data from OpenRefine projects to files
|
||||||
|
|
||||||
commands:
|
commands:
|
||||||
- name: tsv
|
- name: jsonl
|
||||||
help: export tab-separated values (TSV)
|
help: export JSON Lines / newline-delimited JSON
|
||||||
args:
|
args:
|
||||||
- *project
|
- *project
|
||||||
flags:
|
flags:
|
||||||
|
- &mode
|
||||||
|
long: --mode
|
||||||
|
help: specify if project contains multi-row records
|
||||||
|
arg: mode
|
||||||
|
allowed: [rows, records]
|
||||||
|
default: "rows"
|
||||||
|
- long: --separator
|
||||||
|
help: character(s) that separates multiple values in one cell (row mode only)
|
||||||
|
arg: separator
|
||||||
- &facets
|
- &facets
|
||||||
long: --facets
|
long: --facets
|
||||||
help: filter result set by providing an OpenRefine facets config in json
|
help: filter result set by providing an OpenRefine facets config in json
|
||||||
|
@ -336,6 +346,25 @@ commands:
|
||||||
arg: encoding
|
arg: encoding
|
||||||
default: "UTF-8"
|
default: "UTF-8"
|
||||||
- *quiet
|
- *quiet
|
||||||
|
examples:
|
||||||
|
- orcli export jsonl "duplicates"
|
||||||
|
- orcli export jsonl "duplicates" --output "duplicates.jsonl"
|
||||||
|
- orcli export jsonl "duplicates" --separator ' '
|
||||||
|
- orcli export jsonl "duplicates" --mode records
|
||||||
|
- |-
|
||||||
|
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
|
||||||
|
- |-
|
||||||
|
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
|
||||||
|
- name: tsv
|
||||||
|
help: export tab-separated values (TSV)
|
||||||
|
args:
|
||||||
|
- *project
|
||||||
|
flags:
|
||||||
|
- *mode
|
||||||
|
- *facets
|
||||||
|
- *output
|
||||||
|
- *encoding_export
|
||||||
|
- *quiet
|
||||||
examples:
|
examples:
|
||||||
- orcli export tsv "duplicates"
|
- orcli export tsv "duplicates"
|
||||||
- orcli export tsv "duplicates" --output "duplicates.tsv"
|
- orcli export tsv "duplicates" --output "duplicates.tsv"
|
||||||
|
@ -362,11 +391,7 @@ commands:
|
||||||
- long: --suffix
|
- long: --suffix
|
||||||
help: insert character(s) at the end of the file
|
help: insert character(s) at the end of the file
|
||||||
arg: suffix
|
arg: suffix
|
||||||
- long: --mode
|
- *mode
|
||||||
help: specify if template shall be applied to each row or record
|
|
||||||
arg: mode
|
|
||||||
allowed: [rows, records]
|
|
||||||
default: "rows"
|
|
||||||
- *facets
|
- *facets
|
||||||
- *output
|
- *output
|
||||||
- *encoding_export
|
- *encoding_export
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
# shellcheck shell=bash disable=SC2154 disable=SC2155
|
||||||
|
projectid="$(get_id "${args[project]}")"
|
||||||
|
|
||||||
|
# get columns that contain multiple values
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,cells[cn].value.contains(\"'
|
||||||
|
engine+="${args[--separator]}"
|
||||||
|
engine+='\"))","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,row.record.cells[cn].value.length()>1)","selection":[]}],"mode":"row-based"}'
|
||||||
|
fi
|
||||||
|
readarray -t columns_mv < <(curl -fs --data project="$projectid" --data "engine=${engine}" "${OPENREFINE_URL}/command/core/compute-facets" | jq -r '.facets[].choices[].v.v')
|
||||||
|
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
|
||||||
|
readarray -t columns_mix < <(for i in "${columns[@]}"; do
|
||||||
|
skip=
|
||||||
|
for j in "${columns_mv[@]}"; do
|
||||||
|
if [[ "$i" == "$j" ]]; then
|
||||||
|
echo "\"$j⊌\"" # add special character that is used in template below
|
||||||
|
skip=1; break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if [[ -z $skip ]]; then
|
||||||
|
echo "\"$i\""
|
||||||
|
fi
|
||||||
|
done)
|
||||||
|
multivalued=$(IFS=, ; echo "[${columns_mix[*]}]")
|
||||||
|
fi
|
||||||
|
|
||||||
|
# set template
|
||||||
|
template='{{'
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='if(row.index - row.record.fromRowIndex == 0,'
|
||||||
|
fi
|
||||||
|
template+='"%7B".unescape("url") + " " +'
|
||||||
|
template+='forEach('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+="$multivalued"
|
||||||
|
else
|
||||||
|
template+='row.columnNames'
|
||||||
|
fi
|
||||||
|
template+=', cn, forNonBlank('
|
||||||
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='cells[cn.chomp("⊌")].value, v, if(cn.endsWith("⊌"), "\"" + cn.chomp("⊌") + "\": " +'
|
||||||
|
if [[ ${args[--separator]} ]]; then
|
||||||
|
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||||
|
fi
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='row.record.cells[cn.chomp("⊌")].value.jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||||
|
else
|
||||||
|
template+='cells[cn].value, v, "\"" + cn + "\": " + v.jsonize()'
|
||||||
|
fi
|
||||||
|
template+=', null)'
|
||||||
|
template+=').join(", ")'
|
||||||
|
template+='+ " " + "%7D".unescape("url") + "\n"'
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+=', "")'
|
||||||
|
fi
|
||||||
|
template+='}}'
|
||||||
|
|
||||||
|
# assemble specific post data
|
||||||
|
data+=("project=${projectid}")
|
||||||
|
data+=("format=template")
|
||||||
|
data+=("template=${template}")
|
||||||
|
|
||||||
|
# call post_export function to post data and validate results
|
||||||
|
post_export "${data[@]}"
|
|
@ -34,6 +34,14 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
|
echo $' \'export tsv\'*\'--mode\')'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'export template\'*)'
|
echo $' \'export template\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -42,6 +50,10 @@ send_completions() {
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
|
echo $' \'export jsonl\'*)'
|
||||||
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
|
||||||
|
echo $' ;;'
|
||||||
|
echo $''
|
||||||
echo $' \'completions\'*)'
|
echo $' \'completions\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
|
@ -59,7 +71,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export tsv\'*)'
|
echo $' \'export tsv\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet -h -q")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'transform\'*)'
|
echo $' \'transform\'*)'
|
||||||
|
@ -79,7 +91,7 @@ send_completions() {
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'export\'*)'
|
echo $' \'export\'*)'
|
||||||
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
|
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
|
||||||
echo $' ;;'
|
echo $' ;;'
|
||||||
echo $''
|
echo $''
|
||||||
echo $' \'list\'*)'
|
echo $' \'list\'*)'
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl-facets"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cp data/duplicates.csv "${tmpdir}/${t}.csv"
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": "Ben Tyler", "state": "NV", "gender": "M", "purchase": "Flashlight" }
|
||||||
|
{ "email": "ben.morisson@example6.org", "name": "Ben Morisson", "state": "FL", "gender": "M", "purchase": "Amplifier" }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" \
|
||||||
|
--output "${t}.output" \
|
||||||
|
--facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "query": "^Ben" } ]'
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
|
@ -0,0 +1,34 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl-records"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.csv"
|
||||||
|
email,name,state,gender,purchase
|
||||||
|
danny.baron@example1.com,Danny Baron,CA,M,TV
|
||||||
|
,D. Baron,,,Winter jacket
|
||||||
|
,Daniel Baron,,,Bike
|
||||||
|
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight
|
||||||
|
melanie.white@example2.edu,Melanie White,NC,F,iPad
|
||||||
|
,,,,iPhone
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "Danny Baron", "D. Baron", "Daniel Baron" ], "state": "CA", "gender": "M", "purchase": [ "TV", "Winter jacket", "Bike" ] }
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": [ "Ben Tyler" ], "state": "NV", "gender": "M", "purchase": [ "Flashlight" ] }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": [ "Melanie White" ], "state": "NC", "gender": "F", "purchase": [ "iPad", "iPhone" ] }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" --output "${t}.output" --mode records
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl-separator"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cp data/duplicates.csv "${tmpdir}/${t}.csv"
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "Danny", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "TV" ] }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": [ "Melanie", "White" ], "state": "NC", "gender": "F", "purchase": [ "iPhone" ] }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "D.", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "Winter", "jacket" ] }
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": [ "Ben", "Tyler" ], "state": "NV", "gender": "M", "purchase": [ "Flashlight" ] }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": [ "Arthur", "Duff" ], "state": "OR", "gender": "M", "purchase": [ "Dining", "table" ] }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "Daniel", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "Bike" ] }
|
||||||
|
{ "email": "jean.griffith@example5.org", "name": [ "Jean", "Griffith" ], "state": "WA", "gender": "F", "purchase": [ "Power", "drill" ] }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": [ "Melanie", "White" ], "state": "NC", "gender": "F", "purchase": [ "iPad" ] }
|
||||||
|
{ "email": "ben.morisson@example6.org", "name": [ "Ben", "Morisson" ], "state": "FL", "gender": "M", "purchase": [ "Amplifier" ] }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": [ "Arthur", "Duff" ], "state": "OR", "gender": "M", "purchase": [ "Night", "table" ] }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" --output "${t}.output" --separator ' '
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cp data/duplicates.csv "${tmpdir}/${t}.csv"
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "danny.baron@example1.com", "name": "Danny Baron", "state": "CA", "gender": "M", "purchase": "TV" }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": "Melanie White", "state": "NC", "gender": "F", "purchase": "iPhone" }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": "D. Baron", "state": "CA", "gender": "M", "purchase": "Winter jacket" }
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": "Ben Tyler", "state": "NV", "gender": "M", "purchase": "Flashlight" }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": "Arthur Duff", "state": "OR", "gender": "M", "purchase": "Dining table" }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": "Daniel Baron", "state": "CA", "gender": "M", "purchase": "Bike" }
|
||||||
|
{ "email": "jean.griffith@example5.org", "name": "Jean Griffith", "state": "WA", "gender": "F", "purchase": "Power drill" }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": "Melanie White", "state": "NC", "gender": "F", "purchase": "iPad" }
|
||||||
|
{ "email": "ben.morisson@example6.org", "name": "Ben Morisson", "state": "FL", "gender": "M", "purchase": "Amplifier" }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": "Arthur Duff", "state": "OR", "gender": "M", "purchase": "Night table" }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" --output "${t}.output"
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
Loading…
Reference in New Issue