some more import csv tests

This commit is contained in:
felixlohmeier 2023-01-13 22:30:12 +00:00
parent cd7d62e254
commit a7718283c9
12 changed files with 279 additions and 5 deletions

24
orcli
View File

@ -245,7 +245,7 @@ orcli_import_csv_usage() {
# :flag.usage # :flag.usage
printf " %s\n" "--columnNames COLUMNNAMES" printf " %s\n" "--columnNames COLUMNNAMES"
printf " set column names (comma separated)\n" printf " set column names (comma separated)\n hint: add --ignoreLines 1 to overwrite existing header row\n"
echo echo
# :flag.usage # :flag.usage
@ -383,7 +383,7 @@ orcli_import_tsv_usage() {
# :flag.usage # :flag.usage
printf " %s\n" "--columnNames COLUMNNAMES" printf " %s\n" "--columnNames COLUMNNAMES"
printf " set column names (comma separated)\n" printf " set column names (comma separated)\n hint: add --ignoreLines 1 to overwrite existing header row\n"
echo echo
# :flag.usage # :flag.usage
@ -2106,6 +2106,11 @@ orcli_import_csv_parse_requirements() {
# :flag.case # :flag.case
--columnNames) --columnNames)
# :flag.conflicts
if [[ -n "${args['--headerLines']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--headerLines" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then
@ -2144,6 +2149,11 @@ orcli_import_csv_parse_requirements() {
# :flag.case # :flag.case
--headerLines) --headerLines)
# :flag.conflicts
if [[ -n "${args['--columnNames']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--columnNames" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then
@ -2363,6 +2373,11 @@ orcli_import_tsv_parse_requirements() {
# :flag.case # :flag.case
--columnNames) --columnNames)
# :flag.conflicts
if [[ -n "${args['--headerLines']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--headerLines" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then
@ -2401,6 +2416,11 @@ orcli_import_tsv_parse_requirements() {
# :flag.case # :flag.case
--headerLines) --headerLines)
# :flag.conflicts
if [[ -n "${args['--columnNames']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--columnNames" >&2
exit 1
fi
# :flag.case_arg # :flag.case_arg
if [[ -n ${2+x} ]]; then if [[ -n ${2+x} ]]; then

View File

@ -75,8 +75,11 @@ commands:
help: store blank cells as empty strings instead of nulls help: store blank cells as empty strings instead of nulls
- &columnNames - &columnNames
long: --columnNames long: --columnNames
help: set column names (comma separated) help: |-
set column names (comma separated)
hint: add --ignoreLines 1 to overwrite existing header row
arg: columnNames arg: columnNames
conflicts: [--headerLines]
- &encoding_import - &encoding_import
long: --encoding long: --encoding
help: set character encoding help: set character encoding
@ -89,6 +92,7 @@ commands:
help: parse x line(s) as column headers help: parse x line(s) as column headers
arg: headerLines arg: headerLines
default: "1" default: "1"
conflicts: [--columnNames]
- &ignoreLines - &ignoreLines
long: --ignoreLines long: --ignoreLines
help: ignore first x line(s) at beginning of file help: ignore first x line(s) at beginning of file

View File

@ -0,0 +1,4 @@
a,b,c
1,2,3
ה,י,‗
$,\,'
1 a b c
2 1 2 3
3 ה י
4 $ \ '

View File

@ -10,8 +10,7 @@ trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
cp data/example.csv "${tmpdir}/${t}.csv" cp data/example.csv "${tmpdir}/${t}.csv"
# assertion (empty file) # assertion (empty file)
cat << "DATA" > "${tmpdir}/${t}.assert" touch "${tmpdir}/${t}.assert"
DATA
# action # action
cd "${tmpdir}" || exit 1 cd "${tmpdir}" || exit 1

View File

@ -0,0 +1,49 @@
#!/bin/bash
t="import-csv-blankCellsAsStrings"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
0,,0
$,\,'
DATA
cat << "DATA" > "${tmpdir}/${t}.transform"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "b",
"expression": "grel:isNull(value)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 false 3
0 false 0
$ false '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --blankCellsAsStrings
orcli transform "${t}" "${tmpdir}/${t}.transform"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,27 @@
#!/bin/bash
t="import-csv-columnNames"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
foo bar baz
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --columnNames "foo,bar,baz"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,26 @@
#!/bin/bash
t="import-csv-encoding"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example-iso-8859-1.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
ä é ß
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --encoding "ISO-8859-1"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-guessCellValueTypes"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
01,02,03
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
1 2 3
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --guessCellValueTypes
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,27 @@
#!/bin/bash
t="import-csv-headerLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
Column 1 Column 2 Column 3
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --headerLines 0
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,25 @@
#!/bin/bash
t="import-csv-ignoreLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --ignoreLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,32 @@
#!/bin/bash
t="import-csv-ignoreQuoteCharacter"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,"2,0",3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c Column 4
1 2 0 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
# OpenRefine 4.x fails without headerLines manually set
orcli import csv "${t}.csv" --projectName "${t}" --ignoreQuoteCharacter --headerLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-includeFileSources"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}-1.csv"
cp data/example.csv "${tmpdir}/${t}-2.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
File a b c
import-csv-includeFileSources-1.csv 1 2 3
import-csv-includeFileSources-1.csv 0 0 0
import-csv-includeFileSources-1.csv $ \ '
import-csv-includeFileSources-2.csv 1 2 3
import-csv-includeFileSources-2.csv 0 0 0
import-csv-includeFileSources-2.csv $ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}-1.csv" "${t}-2.csv" --projectName "${t}" --includeFileSources
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"