Merge pull request #98 from opencultureconsulting/felixlohmeier/import-csv-tsv-options-83

Felixlohmeier/import-csv-tsv-options-83
This commit is contained in:
Felix Lohmeier 2023-01-15 00:44:01 +01:00 committed by GitHub
commit 40571d1e38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
35 changed files with 1647 additions and 341 deletions

View File

@ -10,7 +10,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
* allows execution of arbitrary bash scripts * allows execution of arbitrary bash scripts
* interactive mode for playing around and debugging * interactive mode for playing around and debugging
* your existing OpenRefine data will not be touched * your existing OpenRefine data will not be touched
* import CSV, ~~TSV, line-based TXT, fixed-width TXT, JSON or XML~~ (and specify input options) * import CSV, TSV, ~~line-based TXT, fixed-width TXT, JSON or XML~~ (and specify input options)
* supports stdin, multiple files and URLs * supports stdin, multiple files and URLs
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file * transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
* orcli calls specific endpoints for each operation to provide improved error handling and logging * orcli calls specific endpoints for each operation to provide improved error handling and logging

1017
orcli

File diff suppressed because it is too large Load Diff

View File

@ -19,6 +19,7 @@ examples:
- orcli transform "duplicates" "https://git.io/fj5ju" - orcli transform "duplicates" "https://git.io/fj5ju"
- orcli export tsv "duplicates" - orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv" - orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli delete "duplicates"
- orcli run --interactive - orcli run --interactive
- |- - |-
orcli run << EOF orcli run << EOF
@ -36,19 +37,23 @@ commands:
- name: delete - name: delete
help: delete OpenRefine project help: delete OpenRefine project
args: args:
- name: project - &project
name: project
help: project name or id help: project name or id
required: true required: true
flags: flags:
- long: --force - long: --force
short: -f short: -f
help: delete all projects with the same name help: delete all projects with the same name
- long: --quiet - &quiet
long: --quiet
short: -q short: -q
help: suppress log output, print errors only help: suppress log output, print errors only
examples: examples:
- orcli delete "duplicates" - orcli delete "duplicates"
- orcli delete "duplicates" --force
- orcli delete 1234567890123 - orcli delete 1234567890123
- for p in $(orcli list); do orcli delete ${p:0:13}; done
- name: import - name: import
help: commands to create OpenRefine projects from files or URLs help: commands to create OpenRefine projects from files or URLs
@ -57,68 +62,137 @@ commands:
- name: csv - name: csv
help: import character-separated values (CSV) help: import character-separated values (CSV)
args: args:
- name: file - &file
name: file
help: Path to one or more files or URLs. When FILE is -, read standard input. help: Path to one or more files or URLs. When FILE is -, read standard input.
default: "-" default: "-"
repeatable: true repeatable: true
flags: flags:
- long: --separator - &separator
long: --separator
help: character(s) that separates columns help: character(s) that separates columns
arg: separator arg: separator
default: "," default: ","
- long: --encoding - &blankCellsAsStrings
long: --blankCellsAsStrings
help: store blank cells as empty strings instead of nulls
- &columnNames
long: --columnNames
help: |-
set column names (comma separated)
hint: add --ignoreLines 1 to overwrite existing header row
arg: columnNames
conflicts: [--headerLines]
- &encoding_import
long: --encoding
help: set character encoding help: set character encoding
arg: encoding arg: encoding
- long: --trimStrings - &guessCellValueTypes
long: --guessCellValueTypes
help: attempt to parse cell text into numbers
- &headerLines
long: --headerLines
help: parse x line(s) as column headers
arg: headerLines
default: "1"
conflicts: [--columnNames]
- &ignoreLines
long: --ignoreLines
help: ignore first x line(s) at beginning of file
arg: ignoreLines
default: "-1"
- &ignoreQuoteCharacter
long: --ignoreQuoteCharacter
help: do not use any quote character to enclose cells containing column separators
- &includeFileSources
long: --includeFileSources
help: add column with file source
- &includeArchiveFileName
long: --includeArchiveFileName
help: add column with archive file name
- &limit
long: --limit
help: load at most x row(s) of data
arg: limit
default: "-1"
- &quoteCharacter
long: --quoteCharacter
help: quote character to enclose cells containing column separators
arg: quoteCharacter
default: '\\\"'
- &skipBlankRows
long: --skipBlankRows
help: do not store blank rows
- &skipDataLines
long: --skipDataLines
help: discard initial x row(s) of data
arg: skipDataLines
default: "0"
- &trimStrings
long: --trimStrings
help: trim leading & trailing whitespace from strings help: trim leading & trailing whitespace from strings
- long: --projectName - &projectName
long: --projectName
arg: projectName arg: projectName
help: set a name for the OpenRefine project help: set a name for the OpenRefine project
- long: --quiet - &projectTags
short: -q long: --projectTags
help: suppress log output, print errors only help: set project tags (comma separated)
arg: projectTags
- *quiet
examples: examples:
- orcli import csv "file" - orcli import csv "file"
- orcli import csv "file1" "file2" - orcli import csv "file1" "file2"
- cat "file" | orcli import csv - head -n 100 "file" | orcli import csv
- orcli import csv "https://git.io/fj5hF" - orcli import csv "https://git.io/fj5hF"
- |- - |-
orcli import csv "file" \\\\ orcli import csv "file" \\\\
--separator ";" \\\\ --separator ";" \\\\
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
--encoding "ISO-8859-1" \\\\ --encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\ --trimStrings \\\\
--projectName "duplicates" --projectName "duplicates"
--projectTags "test,urgent"
- name: tsv - name: tsv
help: import tab-separated values (TSV) help: import tab-separated values (TSV)
args: args:
- name: file - *file
help: Path to one or more files or URLs. When FILE is -, read standard input.
default: "-"
repeatable: true
flags: flags:
- long: --encoding - *blankCellsAsStrings
help: set character encoding - *columnNames
arg: encoding - *encoding_import
- long: --trimStrings - *guessCellValueTypes
help: trim leading & trailing whitespace from strings - *headerLines
- long: --projectName - *ignoreLines
arg: projectName - *ignoreQuoteCharacter
help: set a name for the OpenRefine project - *includeFileSources
- long: --quiet - *includeArchiveFileName
short: -q - *limit
help: suppress log output, print errors only - *quoteCharacter
- *skipBlankRows
- *skipDataLines
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples: examples:
- orcli import tsv "file" - orcli import tsv "file"
- orcli import tsv "file1" "file2" - orcli import tsv "file1" "file2"
- cat "file" | orcli import tsv - head -n 100 "file" | orcli import tsv
- orcli import tsv "https://git.io/fj5hF" - orcli import tsv "https://git.io/fj5hF"
- |- - |-
orcli import tsv "file" \\\\ orcli import tsv "file" \\\\
--separator ";" \\\\ --separator ";" \\\\
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
--encoding "ISO-8859-1" \\\\ --encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\ --trimStrings \\\\
--projectName "duplicates" --projectName "duplicates"
--projectTags "test,urgent"
- name: list - name: list
help: list projects on OpenRefine server help: list projects on OpenRefine server
@ -126,12 +200,11 @@ commands:
- name: info - name: info
help: show OpenRefine project's metadata help: show OpenRefine project's metadata
args: args:
- name: project - *project
help: project name or id
required: true
examples: examples:
- orcli info "duplicates" - orcli info "duplicates"
- orcli info 1234567890123 - orcli info 1234567890123
- orcli info "duplicates" | jq -r .columns[]
- name: test - name: test
help: run functional tests on tmp OpenRefine workspace help: run functional tests on tmp OpenRefine workspace
@ -139,17 +212,10 @@ commands:
- name: transform - name: transform
help: apply undo/redo JSON file(s) to an OpenRefine project help: apply undo/redo JSON file(s) to an OpenRefine project
args: args:
- name: project - *project
help: project name or id - *file
required: true
- name: file
help: Path to one or more files or URLs containing OpenRefine's undo/redo operation history in JSON format. When FILE is -, read standard input.
default: "-"
repeatable: true
flags: flags:
- long: --quiet - *quiet
short: -q
help: suppress log output, print errors only
examples: examples:
- orcli transform "duplicates" "history.json" - orcli transform "duplicates" "history.json"
- cat "history.json" | orcli transform "duplicates" - cat "history.json" | orcli transform "duplicates"
@ -163,31 +229,27 @@ commands:
- name: tsv - name: tsv
help: export tab-separated values (TSV) help: export tab-separated values (TSV)
args: args:
- name: project - *project
help: project name or id
required: true
flags: flags:
- long: --output - &output
long: --output
help: Write to file instead of stdout help: Write to file instead of stdout
arg: file arg: file
- long: --encoding - &encoding_export
long: --encoding
help: set character encoding help: set character encoding
arg: encoding arg: encoding
default: "UTF-8" default: "UTF-8"
- long: --quiet - *quiet
short: -q
help: suppress log output, print errors only
examples: examples:
- orcli export tsv "duplicates" - orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv" - orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli export tsv "duplicates" --encoding "ISO-8859-1"
- name: run - name: run
help: run tmp OpenRefine workspace and execute shell script(s) help: run tmp OpenRefine workspace and execute shell script(s)
args: args:
- name: file - *file
help: Path to one or more files. When FILE is -, read standard input.
default: "-"
repeatable: true
flags: flags:
- long: --memory - long: --memory
help: maximum RAM for OpenRefine java heap space help: maximum RAM for OpenRefine java heap space
@ -199,9 +261,7 @@ commands:
default: "3333" default: "3333"
- long: --interactive - long: --interactive
help: do not exit on error and keep bash shell open help: do not exit on error and keep bash shell open
- long: --quiet - *quiet
short: -q
help: suppress log output, print errors only
examples: examples:
- orcli run --interactive - orcli run --interactive
- |- - |-

View File

@ -1,15 +1,13 @@
# shellcheck shell=bash # shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data # call init_import function to eval args and to set basic post data
init_import init_import
# check if stdin is present if selected # check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
if ! read -u 0 -t 0; then
orcli_import_csv_usage orcli_import_csv_usage
exit 1 exit 1
fi fi
fi
# assemble specific post data (some options require json format) # assemble specific post data (some options require json format)
data+=("format=text/line-based/*sv") data+=("format=text/line-based/*sv")
@ -19,9 +17,67 @@ if [[ ${args[--encoding]} ]]; then
options+=', ' options+=', '
options+="\"encoding\": \"${args[--encoding]}\"" options+="\"encoding\": \"${args[--encoding]}\""
fi fi
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--columnNames]} ]]; then
IFS=',' read -ra columnNames <<< "${args[--columnNames]}"
options+=', '
options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]"
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--ignoreQuoteCharacter]} ]]; then
options+=', '
options+='"processQuotes": false'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then if [[ ${args[--trimStrings]} ]]; then
options+=', ' options+=', '
options+="\"trimStrings\": true" options+='"trimStrings": true'
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi fi
options+=' }' options+=' }'
data+=("options=${options}") data+=("options=${options}")

View File

@ -1,15 +1,13 @@
# shellcheck shell=bash # shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data # call init_import function to eval args and to set basic post data
init_import init_import
# check if stdin is present if selected # check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
if ! read -u 0 -t 0; then
orcli_import_tsv_usage orcli_import_tsv_usage
exit 1 exit 1
fi fi
fi
# assemble specific post data (some options require json format) # assemble specific post data (some options require json format)
data+=("format=text/line-based/*sv") data+=("format=text/line-based/*sv")
@ -19,9 +17,67 @@ if [[ ${args[--encoding]} ]]; then
options+=', ' options+=', '
options+="\"encoding\": \"${args[--encoding]}\"" options+="\"encoding\": \"${args[--encoding]}\""
fi fi
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--columnNames]} ]]; then
IFS=',' read -ra columnNames <<< "${args[--columnNames]}"
options+=', '
options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]"
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--ignoreQuoteCharacter]} ]]; then
options+=', '
options+='"processQuotes": false'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--trimStrings]} ]]; then if [[ ${args[--trimStrings]} ]]; then
options+=', ' options+=', '
options+="\"trimStrings\": true" options+='"trimStrings": true'
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi fi
options+=' }' options+=' }'
data+=("options=${options}") data+=("options=${options}")

View File

@ -1,9 +1,8 @@
# common import tasks to support multiple files and URLs # common import tasks to support multiple files and URLs
# shellcheck shell=bash # shellcheck shell=bash disable=SC2154
function init_import() { function init_import() {
local files file
# catch args, convert the space delimited string to an array # catch args, convert the space delimited string to an array
files=() local files=()
eval "files=(${args[file]})" eval "files=(${args[file]})"
# create tmp directory # create tmp directory
tmpdir="$(mktemp -d)" tmpdir="$(mktemp -d)"
@ -14,7 +13,7 @@ function init_import() {
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "download of ${files[$i]} failed!" error "download of ${files[$i]} failed!"
fi fi
files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi fi
done done
# read pipes if name starts with /dev/fd # read pipes if name starts with /dev/fd
@ -23,7 +22,7 @@ function init_import() {
if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "reading of ${files[$i]} failed!" error "reading of ${files[$i]} failed!"
fi fi
files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi fi
done done
# create a zip archive if there are multiple files # create a zip archive if there are multiple files
@ -35,23 +34,4 @@ function init_import() {
else else
file="${files[0]}" file="${files[0]}"
fi fi
# basic post data
if [[ ${file} == "-" ]]; then
data+=("project-file=@-")
else
if ! path=$(readlink -e "${file}"); then
error "cannot open ${file} (no such file)!"
fi
data+=("project-file=@${path}")
fi
if [[ ${args[--projectName]} ]]; then
data+=("project-name=${args[--projectName]}")
else
if [[ ${file} == "-" ]]; then
name="Untitled"
else
name="$(basename "${path}" | tr '.' ' ')"
fi
data+=("project-name=${name}")
fi
} }

View File

@ -2,18 +2,18 @@
# shellcheck shell=bash disable=SC2154 # shellcheck shell=bash disable=SC2154
function post_export() { function post_export() {
local curloptions local curloptions
# post
mapfile -t curloptions < <(for d in "$@"; do mapfile -t curloptions < <(for d in "$@"; do
echo "--data" echo "--data"
echo "$d" echo "$d"
done) done)
# support file output
if [[ ${args[--output]} ]]; then if [[ ${args[--output]} ]]; then
if ! mkdir -p "$(dirname "${args[--output]}")"; then if ! mkdir -p "$(dirname "${args[--output]}")"; then
error "unable to create parent directory for ${args[--output]}" error "unable to create parent directory for ${args[--output]}"
fi fi
curloptions+=("--output") curloptions+=("--output" "${args[--output]}")
curloptions+=("${args[--output]}")
fi fi
# post
if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then
error "exporting ${args[project]} failed!" error "exporting ${args[project]} failed!"
else else

View File

@ -1,15 +1,31 @@
# post to create-project endpoint and validate # post to create-project endpoint and validate
# shellcheck shell=bash disable=SC2154 # shellcheck shell=bash disable=SC2154
function post_import() { function post_import() {
local curloptions local curloptions projectid projectname rows
local projectid
local projectname
local rows
# post
mapfile -t curloptions < <(for d in "$@"; do mapfile -t curloptions < <(for d in "$@"; do
echo "--form" echo "--form-string"
echo "$d" echo "$d"
done) done)
# basic post data
if [[ ${file} == "-" ]]; then
curloptions+=("--form" "project-file=@-")
else
if ! path=$(readlink -e "${file}"); then
error "cannot open ${file} (no such file)!"
fi
curloptions+=("--form" "project-file=@${path}")
fi
if [[ ${args[--projectName]} ]]; then
curloptions+=("--form-string" "project-name=${args[--projectName]}")
else
if [[ ${file} == "-" ]]; then
name="Untitled"
else
name="$(basename "${path}" | tr '.' ' ')"
fi
curloptions+=("--form-string" "project-name=${name}")
fi
# post
if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then
error "importing ${args[file]} failed!" error "importing ${args[file]} failed!"
fi fi

View File

@ -35,11 +35,11 @@ send_completions() {
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' \'import csv\'*)' echo $' \'import csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --separator --trimStrings -h -q")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' \'import tsv\'*)' echo $' \'import tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --trimStrings -h -q")" -- "$cur" )' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;' echo $' ;;'
echo $'' echo $''
echo $' \'export tsv\'*)' echo $' \'export tsv\'*)'

View File

@ -0,0 +1,4 @@
a,b,c
1,2,3
ה,י,‗
$,\,'
1 a b c
2 1 2 3
3 ה י
4 $ \ '

View File

@ -0,0 +1,4 @@
a; b; c
1; 2; 3
0; 0; 0
$; \; '
1 a b c
2 1 2 3
3 0 0 0
4 $ \ '

View File

@ -10,20 +10,19 @@ trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
cp data/example.csv "${tmpdir}/${t}.csv" cp data/example.csv "${tmpdir}/${t}.csv"
# assertion (empty file) # assertion (empty file)
cat << "DATA" > "${tmpdir}/${t}.assert" touch "${tmpdir}/${t}.assert"
DATA
# action # action
cd "${tmpdir}" || exit 1 cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" orcli import csv "${t}.csv" --projectName "${t}"
orcli list | grep "${t} csv" orcli list | grep "${t}"
orcli delete "${t} csv" orcli delete "${t}"
orcli list | grep "${t} csv" > "${t}.output" || exit 0 orcli list | grep "${t}" > "${t}.output" || exit 0
orcli import csv "${t}.csv" orcli import csv "${t}.csv" --projectName "${t}"
orcli import csv "${t}.csv" orcli import csv "${t}.csv" --projectName "${t}"
orcli list | grep "${t} csv" orcli list | grep "${t}"
orcli delete --force "${t} csv" orcli delete --force "${t}"
orcli list | grep "${t} csv" >> "${t}.output" || exit 0 orcli list | grep "${t}" >> "${t}.output" || exit 0
# test # test
diff -u "${t}.assert" "${t}.output" diff -u "${t}.assert" "${t}.output"

View File

@ -14,8 +14,8 @@ cp data/example.tsv "${tmpdir}/${t}.assert"
# action # action
cd "${tmpdir}" || exit 1 cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" orcli import csv "${t}.csv" --projectName "${t}"
orcli export tsv "${t} csv" --output "${t}.output" orcli export tsv "${t}" --output "${t}.output"
# test # test
diff -u "${t}.assert" "${t}.output" diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,49 @@
#!/bin/bash
t="import-csv-blankCellsAsStrings"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
0,,0
$,\,'
DATA
cat << "DATA" > "${tmpdir}/${t}.transform"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "b",
"expression": "grel:isNull(value)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 false 3
0 false 0
$ false '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --blankCellsAsStrings
orcli transform "${t}" "${tmpdir}/${t}.transform"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,27 @@
#!/bin/bash
t="import-csv-columnNames"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
foo bar baz
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --columnNames "foo,bar,baz"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,26 @@
#!/bin/bash
t="import-csv-encoding"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example-iso-8859-1.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
ä é ß
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --encoding "ISO-8859-1"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-guessCellValueTypes"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
01,02,03
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
1 2 3
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --guessCellValueTypes
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,27 @@
#!/bin/bash
t="import-csv-headerLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
Column 1 Column 2 Column 3
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --headerLines 0
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,25 @@
#!/bin/bash
t="import-csv-ignoreLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --ignoreLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,32 @@
#!/bin/bash
t="import-csv-ignoreQuoteCharacter"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,"2,0",3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c Column 4
1 2 0 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
# OpenRefine 4.x fails without headerLines manually set
orcli import csv "${t}.csv" --projectName "${t}" --ignoreQuoteCharacter --headerLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-includeArchiveFileName"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}-1.csv"
cp data/example.csv "${tmpdir}/${t}-2.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
Archive a b c
Untitled.zip 1 2 3
Untitled.zip 0 0 0
Untitled.zip $ \ '
Untitled.zip 1 2 3
Untitled.zip 0 0 0
Untitled.zip $ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}-1.csv" "${t}-2.csv" --projectName "${t}" --includeArchiveFileName
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-includeFileSources"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}-1.csv"
cp data/example.csv "${tmpdir}/${t}-2.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
File a b c
import-csv-includeFileSources-1.csv 1 2 3
import-csv-includeFileSources-1.csv 0 0 0
import-csv-includeFileSources-1.csv $ \ '
import-csv-includeFileSources-2.csv 1 2 3
import-csv-includeFileSources-2.csv 0 0 0
import-csv-includeFileSources-2.csv $ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}-1.csv" "${t}-2.csv" --projectName "${t}" --includeFileSources
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

25
tests/import-csv-limit.sh Normal file
View File

@ -0,0 +1,25 @@
#!/bin/bash
t="import-csv-limit"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
0 0 0
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --limit 2
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,25 @@
#!/bin/bash
t="import-csv-projectTags"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
foo
bar
baz
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --projectTags "foo,bar,baz"
orcli info "${t}" | jq -r .tags[] > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

20
tests/import-csv-quiet.sh Normal file
View File

@ -0,0 +1,20 @@
#!/bin/bash
t="import-csv-quiet"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion (empty file)
touch "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --quiet &> "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-quoteCharacter"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,%2,0%,3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2,0 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --quoteCharacter "%"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,21 @@
#!/bin/bash
t="import-csv-separator"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example-separator.csv "${tmpdir}/${t}.csv"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --separator "; "
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-skipBlankRows"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
,,
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --skipBlankRows
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,30 @@
#!/bin/bash
t="import-csv-skipDataLines"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1,2,3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --skipDataLines 1
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-trimStrings"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
a,b,c
1 , 2 , 3
0,0,0
$,\,'
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a b c
1 2 3
0 0 0
$ \ '
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}" --trimStrings
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -0,0 +1,31 @@
#!/bin/bash
t="import-csv-unicode biểu tượng cảm xúc ⛲"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cat << "DATA" > "${tmpdir}/${t}.csv"
⌨,code,meaning
⛲,1F347,FOUNTAIN
⛳,1F349,FLAG IN HOLE
⛵,1F352,SAILBOAT
DATA
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
⌨ code meaning
⛲ 1F347 FOUNTAIN
⛳ 1F349 FLAG IN HOLE
⛵ 1F352 SAILBOAT
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -14,8 +14,8 @@ cp data/example.tsv "${tmpdir}/${t}.assert"
# action # action
cd "${tmpdir}" || exit 1 cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" orcli import csv "${t}.csv" --projectName "${t}"
orcli export tsv "${t} csv" > "${t}.output" orcli export tsv "${t}" > "${t}.output"
# test # test
diff -u "${t}.assert" "${t}.output" diff -u "${t}.assert" "${t}.output"

21
tests/import-tsv.sh Normal file
View File

@ -0,0 +1,21 @@
#!/bin/bash
t="import-tsv"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.tsv "${tmpdir}/${t}.tsv"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import tsv "${t}.tsv" --projectName "${t}"
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -18,8 +18,8 @@ DATA
# action # action
cd "${tmpdir}" || exit 1 cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" orcli import csv "${t}.csv" --projectName "${t}"
orcli info "${t} csv" | jq -r .columns[] > "${t}.output" orcli info "${t}" | jq -r .columns[] > "${t}.output"
# test # test
diff -u "${t}.assert" "${t}.output" diff -u "${t}.assert" "${t}.output"

View File

@ -11,7 +11,7 @@ cp data/example.csv "${tmpdir}/${t}.csv"
# action # action
cd "${tmpdir}" || exit 1 cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" orcli import csv "${t}.csv" --projectName "${t}"
orcli list > "${t}.output" orcli list > "${t}.output"
# test # test