export jsonl tests und korrekturen
This commit is contained in:
parent
907cc531ea
commit
d939acc257
35
orcli
35
orcli
|
@ -2309,7 +2309,11 @@ orcli_export_jsonl_command() {
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# set template
|
# set template
|
||||||
template='{ {{'
|
template='{{'
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='if(row.index - row.record.fromRowIndex == 0,'
|
||||||
|
fi
|
||||||
|
template+='"%7B".unescape("url") + " " +'
|
||||||
template+='forEach('
|
template+='forEach('
|
||||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
template+="$multivalued"
|
template+="$multivalued"
|
||||||
|
@ -2323,7 +2327,7 @@ orcli_export_jsonl_command() {
|
||||||
template+="v.split(\"${args[--separator]}\").jsonize()"
|
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||||
fi
|
fi
|
||||||
if [[ ${args[--mode]} == "records" ]]; then
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
template+='row.record.cells[cn].jsonize()'
|
template+='row.record.cells[cn.chomp("⊌")].value.jsonize()'
|
||||||
fi
|
fi
|
||||||
template+=', "\"" + cn + "\": " + v.jsonize())'
|
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||||
else
|
else
|
||||||
|
@ -2331,8 +2335,11 @@ orcli_export_jsonl_command() {
|
||||||
fi
|
fi
|
||||||
template+=', null)'
|
template+=', null)'
|
||||||
template+=').join(", ")'
|
template+=').join(", ")'
|
||||||
template+='}} }'
|
template+='+ " " + "%7D".unescape("url") + "\n"'
|
||||||
template+='{{ "\n" }}'
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+=', "")'
|
||||||
|
fi
|
||||||
|
template+='}}'
|
||||||
|
|
||||||
# assemble specific post data
|
# assemble specific post data
|
||||||
data+=("project=${projectid}")
|
data+=("project=${projectid}")
|
||||||
|
@ -4162,11 +4169,6 @@ orcli_export_jsonl_parse_requirements() {
|
||||||
case "$key" in
|
case "$key" in
|
||||||
# :flag.case
|
# :flag.case
|
||||||
--mode)
|
--mode)
|
||||||
# :flag.conflicts
|
|
||||||
if [[ -n "${args['--separator']:-}" ]]; then
|
|
||||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# :flag.case_arg
|
# :flag.case_arg
|
||||||
if [[ -n ${2+x} ]]; then
|
if [[ -n ${2+x} ]]; then
|
||||||
|
@ -4182,11 +4184,6 @@ orcli_export_jsonl_parse_requirements() {
|
||||||
|
|
||||||
# :flag.case
|
# :flag.case
|
||||||
--separator)
|
--separator)
|
||||||
# :flag.conflicts
|
|
||||||
if [[ -n "${args['--mode']:-}" ]]; then
|
|
||||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--mode" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# :flag.case_arg
|
# :flag.case_arg
|
||||||
if [[ -n ${2+x} ]]; then
|
if [[ -n ${2+x} ]]; then
|
||||||
|
@ -4321,11 +4318,6 @@ orcli_export_tsv_parse_requirements() {
|
||||||
case "$key" in
|
case "$key" in
|
||||||
# :flag.case
|
# :flag.case
|
||||||
--mode)
|
--mode)
|
||||||
# :flag.conflicts
|
|
||||||
if [[ -n "${args['--separator']:-}" ]]; then
|
|
||||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# :flag.case_arg
|
# :flag.case_arg
|
||||||
if [[ -n ${2+x} ]]; then
|
if [[ -n ${2+x} ]]; then
|
||||||
|
@ -4505,11 +4497,6 @@ orcli_export_template_parse_requirements() {
|
||||||
|
|
||||||
# :flag.case
|
# :flag.case
|
||||||
--mode)
|
--mode)
|
||||||
# :flag.conflicts
|
|
||||||
if [[ -n "${args['--separator']:-}" ]]; then
|
|
||||||
printf "conflicting options: %s cannot be used with %s\n" "$key" "--separator" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# :flag.case_arg
|
# :flag.case_arg
|
||||||
if [[ -n ${2+x} ]]; then
|
if [[ -n ${2+x} ]]; then
|
||||||
|
|
|
@ -328,11 +328,9 @@ commands:
|
||||||
arg: mode
|
arg: mode
|
||||||
allowed: [rows, records]
|
allowed: [rows, records]
|
||||||
default: "rows"
|
default: "rows"
|
||||||
conflicts: [--separator]
|
|
||||||
- long: --separator
|
- long: --separator
|
||||||
help: character(s) that separates multiple values in one cell (row mode only)
|
help: character(s) that separates multiple values in one cell (row mode only)
|
||||||
arg: separator
|
arg: separator
|
||||||
conflicts: [--mode]
|
|
||||||
- &facets
|
- &facets
|
||||||
long: --facets
|
long: --facets
|
||||||
help: filter result set by providing an OpenRefine facets config in json
|
help: filter result set by providing an OpenRefine facets config in json
|
||||||
|
|
|
@ -29,7 +29,11 @@ if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# set template
|
# set template
|
||||||
template='{ {{'
|
template='{{'
|
||||||
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+='if(row.index - row.record.fromRowIndex == 0,'
|
||||||
|
fi
|
||||||
|
template+='"%7B".unescape("url") + " " +'
|
||||||
template+='forEach('
|
template+='forEach('
|
||||||
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
template+="$multivalued"
|
template+="$multivalued"
|
||||||
|
@ -43,7 +47,7 @@ if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
|
||||||
template+="v.split(\"${args[--separator]}\").jsonize()"
|
template+="v.split(\"${args[--separator]}\").jsonize()"
|
||||||
fi
|
fi
|
||||||
if [[ ${args[--mode]} == "records" ]]; then
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
template+='row.record.cells[cn].jsonize()'
|
template+='row.record.cells[cn.chomp("⊌")].value.jsonize()'
|
||||||
fi
|
fi
|
||||||
template+=', "\"" + cn + "\": " + v.jsonize())'
|
template+=', "\"" + cn + "\": " + v.jsonize())'
|
||||||
else
|
else
|
||||||
|
@ -51,8 +55,11 @@ else
|
||||||
fi
|
fi
|
||||||
template+=', null)'
|
template+=', null)'
|
||||||
template+=').join(", ")'
|
template+=').join(", ")'
|
||||||
template+='}} }'
|
template+='+ " " + "%7D".unescape("url") + "\n"'
|
||||||
template+='{{ "\n" }}'
|
if [[ ${args[--mode]} == "records" ]]; then
|
||||||
|
template+=', "")'
|
||||||
|
fi
|
||||||
|
template+='}}'
|
||||||
|
|
||||||
# assemble specific post data
|
# assemble specific post data
|
||||||
data+=("project=${projectid}")
|
data+=("project=${projectid}")
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl-facets"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cp data/duplicates.csv "${tmpdir}/${t}.csv"
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": "Ben Tyler", "state": "NV", "gender": "M", "purchase": "Flashlight" }
|
||||||
|
{ "email": "ben.morisson@example6.org", "name": "Ben Morisson", "state": "FL", "gender": "M", "purchase": "Amplifier" }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" \
|
||||||
|
--output "${t}.output" \
|
||||||
|
--facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "query": "^Ben" } ]'
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
|
@ -0,0 +1,34 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl-records"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.csv"
|
||||||
|
email,name,state,gender,purchase
|
||||||
|
danny.baron@example1.com,Danny Baron,CA,M,TV
|
||||||
|
,D. Baron,,,Winter jacket
|
||||||
|
,Daniel Baron,,,Bike
|
||||||
|
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight
|
||||||
|
melanie.white@example2.edu,Melanie White,NC,F,iPad
|
||||||
|
,,,,iPhone
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "Danny Baron", "D. Baron", "Daniel Baron" ], "state": "CA", "gender": "M", "purchase": [ "TV", "Winter jacket", "Bike" ] }
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": [ "Ben Tyler" ], "state": "NV", "gender": "M", "purchase": [ "Flashlight" ] }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": [ "Melanie White" ], "state": "NC", "gender": "F", "purchase": [ "iPad", "iPhone" ] }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" --output "${t}.output" --mode records
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl-separator"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cp data/duplicates.csv "${tmpdir}/${t}.csv"
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "Danny", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "TV" ] }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": [ "Melanie", "White" ], "state": "NC", "gender": "F", "purchase": [ "iPhone" ] }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "D.", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "Winter", "jacket" ] }
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": [ "Ben", "Tyler" ], "state": "NV", "gender": "M", "purchase": [ "Flashlight" ] }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": [ "Arthur", "Duff" ], "state": "OR", "gender": "M", "purchase": [ "Dining", "table" ] }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": [ "Daniel", "Baron" ], "state": "CA", "gender": "M", "purchase": [ "Bike" ] }
|
||||||
|
{ "email": "jean.griffith@example5.org", "name": [ "Jean", "Griffith" ], "state": "WA", "gender": "F", "purchase": [ "Power", "drill" ] }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": [ "Melanie", "White" ], "state": "NC", "gender": "F", "purchase": [ "iPad" ] }
|
||||||
|
{ "email": "ben.morisson@example6.org", "name": [ "Ben", "Morisson" ], "state": "FL", "gender": "M", "purchase": [ "Amplifier" ] }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": [ "Arthur", "Duff" ], "state": "OR", "gender": "M", "purchase": [ "Night", "table" ] }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" --output "${t}.output" --separator ' '
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# shellcheck disable=SC1083
|
||||||
|
|
||||||
|
t="export-jsonl"
|
||||||
|
|
||||||
|
# create tmp directory
|
||||||
|
tmpdir="$(mktemp -d)"
|
||||||
|
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
|
||||||
|
|
||||||
|
# input
|
||||||
|
cp data/duplicates.csv "${tmpdir}/${t}.csv"
|
||||||
|
|
||||||
|
# assertion
|
||||||
|
cat << "DATA" > "${tmpdir}/${t}.assert"
|
||||||
|
{ "email": "danny.baron@example1.com", "name": "Danny Baron", "state": "CA", "gender": "M", "purchase": "TV" }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": "Melanie White", "state": "NC", "gender": "F", "purchase": "iPhone" }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": "D. Baron", "state": "CA", "gender": "M", "purchase": "Winter jacket" }
|
||||||
|
{ "email": "ben.tyler@example3.org", "name": "Ben Tyler", "state": "NV", "gender": "M", "purchase": "Flashlight" }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": "Arthur Duff", "state": "OR", "gender": "M", "purchase": "Dining table" }
|
||||||
|
{ "email": "danny.baron@example1.com", "name": "Daniel Baron", "state": "CA", "gender": "M", "purchase": "Bike" }
|
||||||
|
{ "email": "jean.griffith@example5.org", "name": "Jean Griffith", "state": "WA", "gender": "F", "purchase": "Power drill" }
|
||||||
|
{ "email": "melanie.white@example2.edu", "name": "Melanie White", "state": "NC", "gender": "F", "purchase": "iPad" }
|
||||||
|
{ "email": "ben.morisson@example6.org", "name": "Ben Morisson", "state": "FL", "gender": "M", "purchase": "Amplifier" }
|
||||||
|
{ "email": "arthur.duff@example4.com", "name": "Arthur Duff", "state": "OR", "gender": "M", "purchase": "Night table" }
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# action
|
||||||
|
cd "${tmpdir}" || exit 1
|
||||||
|
orcli import csv "${t}.csv" --projectName "${t}"
|
||||||
|
orcli export jsonl "${t}" --output "${t}.output"
|
||||||
|
|
||||||
|
# test
|
||||||
|
diff -u "${t}.assert" "${t}.output"
|
Loading…
Reference in New Issue