export jsonl tests und korrekturen

This commit is contained in:
Felix Lohmeier 2023-11-08 11:31:11 +00:00
parent 907cc531ea
commit d939acc257
7 changed files with 149 additions and 30 deletions

35
orcli
View File

@ -2309,7 +2309,11 @@ orcli_export_jsonl_command() {
fi fi
# set template # set template
template='{ {{' template='{{'
if [[ ${args[--mode]} == "records" ]]; then
template+='if(row.index - row.record.fromRowIndex == 0,'
fi
template+='"%7B".unescape("url") + " " +'
template+='forEach(' template+='forEach('
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
template+="$multivalued" template+="$multivalued"
@ -2323,7 +2327,7 @@ orcli_export_jsonl_command() {
template+="v.split(\"${args[--separator]}\").jsonize()" template+="v.split(\"${args[--separator]}\").jsonize()"
fi fi
if [[ ${args[--mode]} == "records" ]]; then if [[ ${args[--mode]} == "records" ]]; then
template+='row.record.cells[cn].jsonize()' template+='row.record.cells[cn.chomp("⊌")].value.jsonize()'
fi fi
template+=', "\"" + cn + "\": " + v.jsonize())' template+=', "\"" + cn + "\": " + v.jsonize())'
else else
@ -2331,8 +2335,11 @@ orcli_export_jsonl_command() {
fi fi
template+=', null)' template+=', null)'
template+=').join(", ")' template+=').join(", ")'
template+='}} }' template+='+ " " + "%7D".unescape("url") + "\n"'
template+='{{ "\n" }}' if [[ ${args[--mode]} == "records" ]]; then
template+=', "")'
fi
template+='}}'
# assemble specific post data # assemble specific post data
data+=("project=${projectid}") data+=("project=${projectid}")
@ -4162,11 +4169,6 @@ orcli_export_jsonl_parse_requirements() {
case "$key" in case "$key" in
# :flag.case # :flag.case
--mode) --mode)
# :flag.conflicts
if [[ -n "${args['--separator']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then
@ -4182,11 +4184,6 @@ orcli_export_jsonl_parse_requirements() {
# :flag.case # :flag.case
--separator) --separator)
# :flag.conflicts
if [[ -n "${args['--mode']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--mode" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then
@ -4321,11 +4318,6 @@ orcli_export_tsv_parse_requirements() {
case "$key" in case "$key" in
# :flag.case # :flag.case
--mode) --mode)
# :flag.conflicts
if [[ -n "${args['--separator']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then
@ -4505,11 +4497,6 @@ orcli_export_template_parse_requirements() {
# :flag.case # :flag.case
--mode) --mode)
# :flag.conflicts
if [[ -n "${args['--separator']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then

View File

@ -328,11 +328,9 @@ commands:
arg: mode arg: mode
allowed: [rows, records] allowed: [rows, records]
default: "rows" default: "rows"
conflicts: [--separator]
- long: --separator - long: --separator
help: character(s) that separates multiple values in one cell (row mode only) help: character(s) that separates multiple values in one cell (row mode only)
arg: separator arg: separator
conflicts: [--mode]
- &facets - &facets
long: --facets long: --facets
help: filter result set by providing an OpenRefine facets config in json help: filter result set by providing an OpenRefine facets config in json

View File

@ -29,7 +29,11 @@ if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
fi fi
# set template # set template
template='{ {{' template='{{'
if [[ ${args[--mode]} == "records" ]]; then
template+='if(row.index - row.record.fromRowIndex == 0,'
fi
template+='"%7B".unescape("url") + " " +'
template+='forEach(' template+='forEach('
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
template+="$multivalued" template+="$multivalued"
@ -43,7 +47,7 @@ if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
template+="v.split(\"${args[--separator]}\").jsonize()" template+="v.split(\"${args[--separator]}\").jsonize()"
fi fi
if [[ ${args[--mode]} == "records" ]]; then if [[ ${args[--mode]} == "records" ]]; then
template+='row.record.cells[cn].jsonize()' template+='row.record.cells[cn.chomp("⊌")].value.jsonize()'
fi fi
template+=', "\"" + cn + "\": " + v.jsonize())' template+=', "\"" + cn + "\": " + v.jsonize())'
else else
@ -51,8 +55,11 @@ else
fi fi
template+=', null)' template+=', null)'
template+=').join(", ")' template+=').join(", ")'
template+='}} }' template+='+ " " + "%7D".unescape("url") + "\n"'
template+='{{ "\n" }}' if [[ ${args[--mode]} == "records" ]]; then
template+=', "")'
fi
template+='}}'
# assemble specific post data # assemble specific post data
data+=("project=${projectid}") data+=("project=${projectid}")

View File

@ -0,0 +1,27 @@
#!/bin/bash
# shellcheck disable=SC1083
t="export-jsonl-facets"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/duplicates.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
{ "email": "ben.tyler@example3.org", "name": "Ben Tyler", "state": "NV", "gender": "M", "purchase": "Flashlight" }
{ "email": "ben.morisson@example6.org", "name": "Ben Morisson", "state": "FL", "gender": "M", "purchase": "Amplifier" }
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export jsonl "${t}" \
--output "${t}.output" \
--facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "query": "^Ben" } ]'
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,34 @@
#!/bin/bash
# shellcheck disable=SC1083
t="export-jsonl-records"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
email,name,state,gender,purchase
danny.baron@example1.com,Danny Baron,CA,M,TV
,D. Baron,,,Winter jacket
,Daniel Baron,,,Bike
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight
melanie.white@example2.edu,Melanie White,NC,F,iPad
,,,,iPhone
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
{ "email": "danny.baron@example1.com", "name": [ "Danny Baron", "D. Baron", "Daniel Baron" ], "state": "CA", "gender": "M", "purchase": [ "TV", "Winter jacket", "Bike" ] }
{ "email": "ben.tyler@example3.org", "name": [ "Ben Tyler" ], "state": "NV", "gender": "M", "purchase": [ "Flashlight" ] }
{ "email": "melanie.white@example2.edu", "name": [ "Melanie White" ], "state": "NC", "gender": "F", "purchase": [ "iPad", "iPhone" ] }
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export jsonl "${t}" --output "${t}.output" --mode records
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,33 @@
#!/bin/bash
# shellcheck disable=SC1083
t="export-jsonl-separator"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/duplicates.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
{ "email": "danny.baron@example1.com", "name": [ "Danny", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "TV" ] }
{ "email": "melanie.white@example2.edu", "name": [ "Melanie", "White" ], "state": "NC", "gender": "F", "purchase": [ "iPhone" ] }
{ "email": "danny.baron@example1.com", "name": [ "D.", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "Winter", "jacket" ] }
{ "email": "ben.tyler@example3.org", "name": [ "Ben", "Tyler" ], "state": "NV", "gender": "M", "purchase": [ "Flashlight" ] }
{ "email": "arthur.duff@example4.com", "name": [ "Arthur", "Duff" ], "state": "OR", "gender": "M", "purchase": [ "Dining", "table" ] }
{ "email": "danny.baron@example1.com", "name": [ "Daniel", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "Bike" ] }
{ "email": "jean.griffith@example5.org", "name": [ "Jean", "Griffith" ], "state": "WA", "gender": "F", "purchase": [ "Power", "drill" ] }
{ "email": "melanie.white@example2.edu", "name": [ "Melanie", "White" ], "state": "NC", "gender": "F", "purchase": [ "iPad" ] }
{ "email": "ben.morisson@example6.org", "name": [ "Ben", "Morisson" ], "state": "FL", "gender": "M", "purchase": [ "Amplifier" ] }
{ "email": "arthur.duff@example4.com", "name": [ "Arthur", "Duff" ], "state": "OR", "gender": "M", "purchase": [ "Night", "table" ] }
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export jsonl "${t}" --output "${t}.output" --separator ' '
# test
diff -u "${t}.assert" "${t}.output"

33
tests/export-jsonl.sh Normal file
View File

@ -0,0 +1,33 @@
#!/bin/bash
# shellcheck disable=SC1083
t="export-jsonl"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/duplicates.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
{ "email": "danny.baron@example1.com", "name": "Danny Baron", "state": "CA", "gender": "M", "purchase": "TV" }
{ "email": "melanie.white@example2.edu", "name": "Melanie White", "state": "NC", "gender": "F", "purchase": "iPhone" }
{ "email": "danny.baron@example1.com", "name": "D. Baron", "state": "CA", "gender": "M", "purchase": "Winter jacket" }
{ "email": "ben.tyler@example3.org", "name": "Ben Tyler", "state": "NV", "gender": "M", "purchase": "Flashlight" }
{ "email": "arthur.duff@example4.com", "name": "Arthur Duff", "state": "OR", "gender": "M", "purchase": "Dining table" }
{ "email": "danny.baron@example1.com", "name": "Daniel Baron", "state": "CA", "gender": "M", "purchase": "Bike" }
{ "email": "jean.griffith@example5.org", "name": "Jean Griffith", "state": "WA", "gender": "F", "purchase": "Power drill" }
{ "email": "melanie.white@example2.edu", "name": "Melanie White", "state": "NC", "gender": "F", "purchase": "iPad" }
{ "email": "ben.morisson@example6.org", "name": "Ben Morisson", "state": "FL", "gender": "M", "purchase": "Amplifier" }
{ "email": "arthur.duff@example4.com", "name": "Arthur Duff", "state": "OR", "gender": "M", "purchase": "Night table" }
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export jsonl "${t}" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"