Merge pull request #98 from opencultureconsulting/felixlohmeier/import-csv-tsv-options-83

Felixlohmeier/import-csv-tsv-options-83
This commit is contained in:
Felix Lohmeier 2023-01-15 00:44:01 +01:00 committed by GitHub
commit 40571d1e38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 1647 additions and 341 deletions

View File

@ -10,7 +10,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
* allows execution of arbitrary bash scripts
* interactive mode for playing around and debugging
* your existing OpenRefine data will not be touched
* import CSV, ~~TSV, line-based TXT, fixed-width TXT, JSON or XML~~ (and specify input options)
* import CSV, TSV, ~~line-based TXT, fixed-width TXT, JSON or XML~~ (and specify input options)
* supports stdin, multiple files and URLs
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
* orcli calls specific endpoints for each operation to provide improved error handling and logging

1025
orcli

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@ examples:
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli delete "duplicates"
- orcli run --interactive
- |-
orcli run << EOF
@ -36,19 +37,23 @@ commands:
- name: delete
help: delete OpenRefine project
args:
- name: project
- &project
name: project
help: project name or id
required: true
flags:
- long: --force
short: -f
help: delete all projects with the same name
- long: --quiet
- &quiet
long: --quiet
short: -q
help: suppress log output, print errors only
examples:
- orcli delete "duplicates"
- orcli delete "duplicates" --force
- orcli delete 1234567890123
- for p in $(orcli list); do orcli delete ${p:0:13}; done
- name: import
help: commands to create OpenRefine projects from files or URLs
@ -57,68 +62,137 @@ commands:
- name: csv
help: import character-separated values (CSV)
args:
- name: file
- &file
name: file
help: Path to one or more files or URLs. When FILE is -, read standard input.
default: "-"
repeatable: true
flags:
- long: --separator
- &separator
long: --separator
help: character(s) that separates columns
arg: separator
default: ","
- long: --encoding
- &blankCellsAsStrings
long: --blankCellsAsStrings
help: store blank cells as empty strings instead of nulls
- &columnNames
long: --columnNames
help: |-
set column names (comma separated)
hint: add --ignoreLines 1 to overwrite existing header row
arg: columnNames
conflicts: [--headerLines]
- &encoding_import
long: --encoding
help: set character encoding
arg: encoding
- long: --trimStrings
- &guessCellValueTypes
long: --guessCellValueTypes
help: attempt to parse cell text into numbers
- &headerLines
long: --headerLines
help: parse x line(s) as column headers
arg: headerLines
default: "1"
conflicts: [--columnNames]
- &ignoreLines
long: --ignoreLines
help: ignore first x line(s) at beginning of file
arg: ignoreLines
default: "-1"
- &ignoreQuoteCharacter
long: --ignoreQuoteCharacter
help: do not use any quote character to enclose cells containing column separators
- &includeFileSources
long: --includeFileSources
help: add column with file source
- &includeArchiveFileName
long: --includeArchiveFileName
help: add column with archive file name
- &limit
long: --limit
help: load at most x row(s) of data
arg: limit
default: "-1"
- &quoteCharacter
long: --quoteCharacter
help: quote character to enclose cells containing column separators
arg: quoteCharacter
default: '\\\"'
- &skipBlankRows
long: --skipBlankRows
help: do not store blank rows
- &skipDataLines
long: --skipDataLines
help: discard initial x row(s) of data
arg: skipDataLines
default: "0"
- &trimStrings
long: --trimStrings
help: trim leading & trailing whitespace from strings
- long: --projectName
- &projectName
long: --projectName
arg: projectName
help: set a name for the OpenRefine project
- long: --quiet
short: -q
help: suppress log output, print errors only
- &projectTags
long: --projectTags
help: set project tags (comma separated)
arg: projectTags
- *quiet
examples:
- orcli import csv "file"
- orcli import csv "file1" "file2"
- cat "file" | orcli import csv
- head -n 100 "file" | orcli import csv
- orcli import csv "https://git.io/fj5hF"
- |-
orcli import csv "file" \\\\
--separator ";" \\\\
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
--encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates"
--projectTags "test,urgent"
- name: tsv
help: import tab-separated values (TSV)
args:
- name: file
help: Path to one or more files or URLs. When FILE is -, read standard input.
default: "-"
repeatable: true
- *file
flags:
- long: --encoding
help: set character encoding
arg: encoding
- long: --trimStrings
help: trim leading & trailing whitespace from strings
- long: --projectName
arg: projectName
help: set a name for the OpenRefine project
- long: --quiet
short: -q
help: suppress log output, print errors only
- *blankCellsAsStrings
- *columnNames
- *encoding_import
- *guessCellValueTypes
- *headerLines
- *ignoreLines
- *ignoreQuoteCharacter
- *includeFileSources
- *includeArchiveFileName
- *limit
- *quoteCharacter
- *skipBlankRows
- *skipDataLines
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import tsv "file"
- orcli import tsv "file1" "file2"
- cat "file" | orcli import tsv
- head -n 100 "file" | orcli import tsv
- orcli import tsv "https://git.io/fj5hF"
- |-
orcli import tsv "file" \\\\
--separator ";" \\\\
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
--encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates"
--projectTags "test,urgent"
- name: list
help: list projects on OpenRefine server
@ -126,12 +200,11 @@ commands:
- name: info
help: show OpenRefine project's metadata
args:
- name: project
help: project name or id
required: true
- *project
examples:
- orcli info "duplicates"
- orcli info 1234567890123
- orcli info "duplicates" | jq -r .columns[]
- name: test
help: run functional tests on tmp OpenRefine workspace
@ -139,17 +212,10 @@ commands:
- name: transform
help: apply undo/redo JSON file(s) to an OpenRefine project
args:
- name: project
help: project name or id
required: true
- name: file
help: Path to one or more files or URLs containing OpenRefine's undo/redo operation history in JSON format. When FILE is -, read standard input.
default: "-"
repeatable: true
- *project
- *file
flags:
- long: --quiet
short: -q
help: suppress log output, print errors only
- *quiet
examples:
- orcli transform "duplicates" "history.json"
- cat "history.json" | orcli transform "duplicates"
@ -163,31 +229,27 @@ commands:
- name: tsv
help: export tab-separated values (TSV)
args:
- name: project
help: project name or id
required: true
- *project
flags:
- long: --output
- &output
long: --output
help: Write to file instead of stdout
arg: file
- long: --encoding
- &encoding_export
long: --encoding
help: set character encoding
arg: encoding
default: "UTF-8"
- long: --quiet
short: -q
help: suppress log output, print errors only
- *quiet
examples:
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli export tsv "duplicates" --encoding "ISO-8859-1"
- name: run
help: run tmp OpenRefine workspace and execute shell script(s)
args:
- name: file
help: Path to one or more files. When FILE is -, read standard input.
default: "-"
repeatable: true
- *file
flags:
- long: --memory
help: maximum RAM for OpenRefine java heap space
@ -199,9 +261,7 @@ commands:
default: "3333"
- long: --interactive
help: do not exit on error and keep bash shell open
- long: --quiet
short: -q
help: suppress log output, print errors only
- *quiet
examples:
- orcli run --interactive
- |-

View File

@ -1,14 +1,12 @@
# shellcheck shell=bash
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then
if ! read -u 0 -t 0; then
orcli_import_csv_usage
exit 1
fi
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_csv_usage
exit 1
fi
# assemble specific post data (some options require json format)
@ -19,9 +17,67 @@ if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--columnNames]} ]]; then
IFS=',' read -ra columnNames <<< "${args[--columnNames]}"
options+=', '
options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]"
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--ignoreQuoteCharacter]} ]]; then
options+=', '
options+='"processQuotes": false'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+="\"trimStrings\": true"
options+='"trimStrings": true'
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
options+=' }'
data+=("options=${options}")

View File

@ -1,14 +1,12 @@
# shellcheck shell=bash
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then
if ! read -u 0 -t 0; then
orcli_import_tsv_usage
exit 1
fi
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_tsv_usage
exit 1
fi
# assemble specific post data (some options require json format)
@ -19,9 +17,67 @@ if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--columnNames]} ]]; then
IFS=',' read -ra columnNames <<< "${args[--columnNames]}"
options+=', '
options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]"
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--ignoreQuoteCharacter]} ]]; then
options+=', '
options+='"processQuotes": false'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+="\"trimStrings\": true"
options+='"trimStrings": true'
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
options+=' }'
data+=("options=${options}")

View File

@ -1,9 +1,8 @@
# common import tasks to support multiple files and URLs
# shellcheck shell=bash
# shellcheck shell=bash disable=SC2154
function init_import() {
local files file
# catch args, convert the space delimited string to an array
files=()
local files=()
eval "files=(${args[file]})"
# create tmp directory
tmpdir="$(mktemp -d)"
@ -14,7 +13,7 @@ function init_import() {
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "download of ${files[$i]} failed!"
fi
files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# read pipes if name starts with /dev/fd
@ -23,7 +22,7 @@ function init_import() {
if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "reading of ${files[$i]} failed!"
fi
files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# create a zip archive if there are multiple files
@ -35,23 +34,4 @@ function init_import() {
else
file="${files[0]}"
fi
# basic post data
if [[ ${file} == "-" ]]; then
data+=("project-file=@-")
else
if ! path=$(readlink -e "${file}"); then
error "cannot open ${file} (no such file)!"
fi
data+=("project-file=@${path}")
fi
if [[ ${args[--projectName]} ]]; then
data+=("project-name=${args[--projectName]}")
else
if [[ ${file} == "-" ]]; then
name="Untitled"
else
name="$(basename "${path}" | tr '.' ' ')"
fi
data+=("project-name=${name}")
fi
}

View File

@ -2,18 +2,18 @@
# shellcheck shell=bash disable=SC2154
function post_export() {
local curloptions
# post
mapfile -t curloptions < <(for d in "$@"; do
echo "--data"
echo "$d"
done)
# support file output
if [[ ${args[--output]} ]]; then
if ! mkdir -p "$(dirname "${args[--output]}")"; then
error "unable to create parent directory for ${args[--output]}"
fi
curloptions+=("--output")
curloptions+=("${args[--output]}")
curloptions+=("--output" "${args[--output]}")
fi
# post
if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then
error "exporting ${args[project]} failed!"
else

View File

@ -1,15 +1,31 @@
# post to create-project endpoint and validate
# shellcheck shell=bash disable=SC2154
function post_import() {
local curloptions
local projectid
local projectname
local rows
# post
local curloptions projectid projectname rows
mapfile -t curloptions < <(for d in "$@"; do
echo "--form"
echo "--form-string"
echo "$d"
done)
# basic post data
if [[ ${file} == "-" ]]; then
curloptions+=("--form" "project-file=@-")
else
if ! path=$(readlink -e "${file}"); then
error "cannot open ${file} (no such file)!"
fi
curloptions+=("--form" "project-file=@${path}")
fi
if [[ ${args[--projectName]} ]]; then
curloptions+=("--form-string" "project-name=${args[--projectName]}")
else
if [[ ${file} == "-" ]]; then
name="Untitled"
else
name="$(basename "${path}" | tr '.' ' ')"
fi
curloptions+=("--form-string" "project-name=${name}")
fi
# post
if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then
error "importing ${args[file]} failed!"
fi

View File

@ -35,11 +35,11 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --separator --trimStrings -h -q")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --trimStrings -h -q")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'

View File

@ -0,0 +1,4 @@
a,b,c
1,2,3
ה,י,‗
$,\,'
1 a b c
2 1 2 3
3 ה י
4 $ \ '

View File

@ -0,0 +1,4 @@
a; b; c
1; 2; 3
0; 0; 0
$; \; '
1 a b c
2 1 2 3
3 0 0 0
4 $ \ '

View File

@ -10,20 +10,19 @@ trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion (empty file)
cat << "DATA" > "${tmpdir}/${t}.assert"
DATA
touch "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv"
orcli list | grep "${t} csv"
orcli delete "${t} csv"
orcli list | grep "${t} csv" > "${t}.output" || exit 0
orcli import csv "${t}.csv"
orcli import csv "${t}.csv"
orcli list | grep "${t} csv"
orcli delete --force "${t} csv"
orcli list | grep "${t} csv" >> "${t}.output" || exit 0
orcli import csv "${t}.csv" --projectName "${t}"
orcli list | grep "${t}"
orcli delete "${t}"
orcli list | grep "${t}" > "${t}.output" || exit 0
orcli import csv "${t}.csv" --projectName "${t}"
orcli import csv "${t}.csv" --projectName "${t}"
orcli list | grep "${t}"
orcli delete --force "${t}"
orcli list | grep "${t}" >> "${t}.output" || exit 0
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -14,8 +14,8 @@ cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv"
orcli export tsv "${t} csv" --output "${t}.output"
orcli import csv "${t}.csv" --projectName "${t}"
orcli export tsv "${t}" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,49 @@
#!/bin/bash
t="import-csv-blankCellsAsStrings"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
0,,0
$,\,'
DATA
cat << "DATA" > "${tmpdir}/${t}.transform"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "b",
"expression": "grel:isNull(value)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 false 3
0 false 0
$ false '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --blankCellsAsStrings
orcli transform "${t}" "${tmpdir}/${t}.transform"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,27 @@
#!/bin/bash
t="import-csv-columnNames"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
foo bar baz
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --columnNames "foo,bar,baz"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,26 @@
#!/bin/bash
t="import-csv-encoding"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example-iso-8859-1.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
ä é ß
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --encoding "ISO-8859-1"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-guessCellValueTypes"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
01,02,03
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
1 2 3
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --guessCellValueTypes
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,27 @@
#!/bin/bash
t="import-csv-headerLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
Column 1 Column 2 Column 3
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --headerLines 0
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,25 @@
#!/bin/bash
t="import-csv-ignoreLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --ignoreLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,32 @@
#!/bin/bash
t="import-csv-ignoreQuoteCharacter"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,"2,0",3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c Column 4
1 2 0 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
# OpenRefine 4.x fails without headerLines manually set
orcli import csv "${t}.csv" --projectName "${t}" --ignoreQuoteCharacter --headerLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-includeArchiveFileName"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}-1.csv"
cp data/example.csv "${tmpdir}/${t}-2.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
Archive a b c
Untitled.zip 1 2 3
Untitled.zip 0 0 0
Untitled.zip $ \ '
Untitled.zip 1 2 3
Untitled.zip 0 0 0
Untitled.zip $ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}-1.csv" "${t}-2.csv" --projectName "${t}" --includeArchiveFileName
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-includeFileSources"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}-1.csv"
cp data/example.csv "${tmpdir}/${t}-2.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
File a b c
import-csv-includeFileSources-1.csv 1 2 3
import-csv-includeFileSources-1.csv 0 0 0
import-csv-includeFileSources-1.csv $ \ '
import-csv-includeFileSources-2.csv 1 2 3
import-csv-includeFileSources-2.csv 0 0 0
import-csv-includeFileSources-2.csv $ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}-1.csv" "${t}-2.csv" --projectName "${t}" --includeFileSources
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

25
tests/import-csv-limit.sh Normal file
View File

@ -0,0 +1,25 @@
#!/bin/bash
t="import-csv-limit"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
0 0 0
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --limit 2
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,25 @@
#!/bin/bash
t="import-csv-projectTags"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
foo
bar
baz
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --projectTags "foo,bar,baz"
orcli info "${t}" | jq -r .tags[] > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

20
tests/import-csv-quiet.sh Normal file
View File

@ -0,0 +1,20 @@
#!/bin/bash
t="import-csv-quiet"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion (empty file)
touch "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --quiet &> "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-quoteCharacter"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,%2,0%,3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2,0 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --quoteCharacter "%"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,21 @@
#!/bin/bash
t="import-csv-separator"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example-separator.csv "${tmpdir}/${t}.csv"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --separator "; "
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-skipBlankRows"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
,,
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --skipBlankRows
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-skipDataLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --skipDataLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-trimStrings"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1 , 2 , 3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --trimStrings
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-unicode biểu tượng cảm xúc ⛲"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
⌨,code,meaning
⛲,1F347,FOUNTAIN
⛳,1F349,FLAG IN HOLE
⛵,1F352,SAILBOAT
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
⌨ code meaning
⛲ 1F347 FOUNTAIN
⛳ 1F349 FLAG IN HOLE
⛵ 1F352 SAILBOAT
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -14,8 +14,8 @@ cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv"
orcli export tsv "${t} csv" > "${t}.output"
orcli import csv "${t}.csv" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

21
tests/import-tsv.sh Normal file
View File

@ -0,0 +1,21 @@
#!/bin/bash
t="import-tsv"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.tsv "${tmpdir}/${t}.tsv"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import tsv "${t}.tsv" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -18,8 +18,8 @@ DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv"
orcli info "${t} csv" | jq -r .columns[] > "${t}.output"
orcli import csv "${t}.csv" --projectName "${t}"
orcli info "${t}" | jq -r .columns[] > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -11,7 +11,7 @@ cp data/example.csv "${tmpdir}/${t}.csv"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv"
orcli import csv "${t}.csv" --projectName "${t}"
orcli list > "${t}.output"
# test