From fbf9b041887e1802f2446a0af82c557395ff5051 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Tue, 13 Dec 2022 21:20:36 +0000 Subject: [PATCH 01/10] reuse args and flags --- README.md | 2 +- orcli | 8 ++--- src/bashly.yml | 89 +++++++++++++++++++------------------------------- 3 files changed, 39 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 34af565..007ef72 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org * allows execution of arbitrary bash scripts * interactive mode for playing around and debugging * your existing OpenRefine data will not be touched -* import CSV, ~~TSV, line-based TXT, fixed-width TXT, JSON or XML~~ (and specify input options) +* import CSV, TSV, ~~line-based TXT, fixed-width TXT, JSON or XML~~ (and specify input options) * supports stdin, multiple files and URLs * transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file * orcli calls specific endpoints for each operation to provide improved error handling and logging diff --git a/orcli b/orcli index 24c97a7..ec53bb8 100755 --- a/orcli +++ b/orcli @@ -271,7 +271,7 @@ orcli_import_csv_usage() { printf "Examples:\n" printf " orcli import csv \"file\"\n" printf " orcli import csv \"file1\" \"file2\"\n" - printf " cat \"file\" | orcli import csv\n" + printf " head -n 100 \"file\" | orcli import csv\n" printf " orcli import csv \"https://git.io/fj5hF\"\n" printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --encoding \"ISO-8859-1\" \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n" echo @@ -339,7 +339,7 @@ orcli_import_tsv_usage() { printf "Examples:\n" printf " orcli import tsv \"file\"\n" printf " orcli import tsv \"file1\" \"file2\"\n" - printf " cat \"file\" | orcli import tsv\n" + printf " head -n 100 \"file\" | orcli import tsv\n" printf " orcli import tsv \"https://git.io/fj5hF\"\n" printf " orcli import tsv \"file\" \\\\\n --separator \";\" \\\\\n --encoding \"ISO-8859-1\" \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n" echo @@ -490,7 +490,7 @@ orcli_transform_usage() { # :argument.usage echo " FILE..." - printf " Path to one or more files or URLs containing OpenRefine's undo/redo\n operation history in JSON format. When FILE is -, read standard input.\n" + printf " Path to one or more files or URLs. When FILE is -, read standard input.\n" printf " Default: -\n" echo @@ -652,7 +652,7 @@ orcli_run_usage() { # :argument.usage echo " FILE..." - printf " Path to one or more files. When FILE is -, read standard input.\n" + printf " Path to one or more files or URLs. When FILE is -, read standard input.\n" printf " Default: -\n" echo diff --git a/src/bashly.yml b/src/bashly.yml index 0cadfb8..6d6b753 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -36,14 +36,16 @@ commands: - name: delete help: delete OpenRefine project args: - - name: project + - &project + name: project help: project name or id required: true flags: - long: --force short: -f help: delete all projects with the same name - - long: --quiet + - &quiet + long: --quiet short: -q help: suppress log output, print errors only examples: @@ -57,30 +59,33 @@ commands: - name: csv help: import character-separated values (CSV) args: - - name: file + - &file + name: file help: Path to one or more files or URLs. When FILE is -, read standard input. default: "-" repeatable: true flags: - - long: --separator + - &separator + long: --separator help: character(s) that separates columns arg: separator default: "," - - long: --encoding + - &encoding_import + long: --encoding help: set character encoding arg: encoding - - long: --trimStrings + - &trimStrings + long: --trimStrings help: trim leading & trailing whitespace from strings - - long: --projectName + - &projectName + long: --projectName arg: projectName help: set a name for the OpenRefine project - - long: --quiet - short: -q - help: suppress log output, print errors only + - *quiet examples: - orcli import csv "file" - orcli import csv "file1" "file2" - - cat "file" | orcli import csv + - head -n 100 "file" | orcli import csv - orcli import csv "https://git.io/fj5hF" - |- orcli import csv "file" \\\\ @@ -92,26 +97,16 @@ commands: - name: tsv help: import tab-separated values (TSV) args: - - name: file - help: Path to one or more files or URLs. When FILE is -, read standard input. - default: "-" - repeatable: true + - *file flags: - - long: --encoding - help: set character encoding - arg: encoding - - long: --trimStrings - help: trim leading & trailing whitespace from strings - - long: --projectName - arg: projectName - help: set a name for the OpenRefine project - - long: --quiet - short: -q - help: suppress log output, print errors only + - *encoding_import + - *trimStrings + - *projectName + - *quiet examples: - orcli import tsv "file" - orcli import tsv "file1" "file2" - - cat "file" | orcli import tsv + - head -n 100 "file" | orcli import tsv - orcli import tsv "https://git.io/fj5hF" - |- orcli import tsv "file" \\\\ @@ -126,9 +121,7 @@ commands: - name: info help: show OpenRefine project's metadata args: - - name: project - help: project name or id - required: true + - *project examples: - orcli info "duplicates" - orcli info 1234567890123 @@ -139,17 +132,10 @@ commands: - name: transform help: apply undo/redo JSON file(s) to an OpenRefine project args: - - name: project - help: project name or id - required: true - - name: file - help: Path to one or more files or URLs containing OpenRefine's undo/redo operation history in JSON format. When FILE is -, read standard input. - default: "-" - repeatable: true + - *project + - *file flags: - - long: --quiet - short: -q - help: suppress log output, print errors only + - *quiet examples: - orcli transform "duplicates" "history.json" - cat "history.json" | orcli transform "duplicates" @@ -163,20 +149,18 @@ commands: - name: tsv help: export tab-separated values (TSV) args: - - name: project - help: project name or id - required: true + - *project flags: - - long: --output + - &output + long: --output help: Write to file instead of stdout arg: file - - long: --encoding + - &encoding_export + long: --encoding help: set character encoding arg: encoding default: "UTF-8" - - long: --quiet - short: -q - help: suppress log output, print errors only + - *quiet examples: - orcli export tsv "duplicates" - orcli export tsv "duplicates" --output "duplicates.tsv" @@ -184,10 +168,7 @@ commands: - name: run help: run tmp OpenRefine workspace and execute shell script(s) args: - - name: file - help: Path to one or more files. When FILE is -, read standard input. - default: "-" - repeatable: true + - *file flags: - long: --memory help: maximum RAM for OpenRefine java heap space @@ -199,9 +180,7 @@ commands: default: "3333" - long: --interactive help: do not exit on error and keep bash shell open - - long: --quiet - short: -q - help: suppress log output, print errors only + - *quiet examples: - orcli run --interactive - |- From 27ee2ef6500c56039d6937263be861f35a56e153 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Tue, 13 Dec 2022 22:40:10 +0000 Subject: [PATCH 02/10] add import csv options --- orcli | 240 +++++++++++++++++++++++++++++++++++- src/bashly.yml | 43 +++++++ src/import_csv_command.sh | 50 +++++++- src/lib/send_completions.sh | 2 +- 4 files changed, 331 insertions(+), 4 deletions(-) diff --git a/orcli b/orcli index ec53bb8..875f219 100755 --- a/orcli +++ b/orcli @@ -238,11 +238,71 @@ orcli_import_csv_usage() { printf " Default: ,\n" echo + # :flag.usage + echo " --blankCellsAsStrings" + printf " store blank cells as empty strings instead of nulls\n" + echo + # :flag.usage echo " --encoding ENCODING" printf " set character encoding\n" echo + # :flag.usage + echo " --guessCellValueTypes" + printf " attempt to parse cell text into numbers\n" + echo + + # :flag.usage + echo " --headerLines HEADERLINES" + printf " parse x line(s) as column headers\n" + printf " Default: 1\n" + echo + + # :flag.usage + echo " --ignoreLines IGNORELINES" + printf " ignore first x line(s) at beginning of file\n" + printf " Default: -1\n" + echo + + # :flag.usage + echo " --ignoreQuoteCharacter" + printf " do not use any quote character to enclose cells containing column separators\n" + echo + + # :flag.usage + echo " --includeFileSources" + printf " add column with file source\n" + echo + + # :flag.usage + echo " --includeArchiveFileName" + printf " add column with archive file name\n" + echo + + # :flag.usage + echo " --limit LIMIT" + printf " load at most x row(s) of data\n" + printf " Default: -1\n" + echo + + # :flag.usage + echo " --quoteCharacter QUOTECHARACTER" + printf " quote character to enclose cells containing column separators\n" + printf " Default: \\\"\n" + echo + + # :flag.usage + echo " --skipBlankRows" + printf " do not store blank rows\n" + echo + + # :flag.usage + echo " --skipDataLines SKIPDATALINES" + printf " discard initial x row(s) of data\n" + printf " Default: 0\n" + echo + # :flag.usage echo " --trimStrings" printf " trim leading & trailing whitespace from strings\n" @@ -943,7 +1003,7 @@ send_completions() { echo $' ;;' echo $'' echo $' \'import csv\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --separator --trimStrings -h -q")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'import tsv\'*)' @@ -1061,9 +1121,57 @@ orcli_import_csv_command() { options+=', ' options+="\"encoding\": \"${args[--encoding]}\"" fi + if [[ ${args[--blankCellsAsStrings]} ]]; then + options+=', ' + options+='"storeBlankCellsAsNulls": false' + fi + if [[ ${args[--guessCellValueTypes]} ]]; then + options+=', ' + options+='"guessCellValueTypes": true' + fi + if [[ ${args[--headerLines]} ]]; then + options+=', ' + options+="\"headerLines\": ${args[--headerLines]}" + fi + if [[ ${args[--ignoreLines]} ]]; then + options+=', ' + options+="\"ignoreLines\": ${args[--ignoreLines]}" + fi + if [[ ${args[--ignoreQuoteCharacter]} ]]; then + options+=', ' + options+='"processQuotes": false' + fi + if [[ ${args[--includeFileSources]} ]]; then + options+=', ' + options+='includeFileSources: true' + fi + if [[ ${args[--includeArchiveFileName]} ]]; then + options+=', ' + options+='"includeArchiveFileName": true' + fi + if [[ ${args[--limit]} ]]; then + options+=', ' + options+="\"limit\": ${args[--limit]}" + fi + if [[ ${args[--projectName]} ]]; then + options+=', ' + options+="\"projectName\": \"${args[--projectName]}\"" + fi + if [[ ${args[--quoteCharacter]} ]]; then + options+=', ' + options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\"" + fi + if [[ ${args[--skipBlankRows]} ]]; then + options+=', ' + options+='"storeBlankRows": false' + fi + if [[ ${args[--skipDataLines]} ]]; then + options+=', ' + options+="\"skipDataLines\": ${args[--skipDataLines]}" + fi if [[ ${args[--trimStrings]} ]]; then options+=', ' - options+="\"trimStrings\": true" + options+='"trimStrings": true' fi options+=' }' data+=("options=${options}") @@ -1848,6 +1956,14 @@ orcli_import_csv_parse_requirements() { fi ;; + # :flag.case + --blankCellsAsStrings) + + # :flag.case_no_arg + args[--blankCellsAsStrings]=1 + shift + ;; + # :flag.case --encoding) @@ -1863,6 +1979,121 @@ orcli_import_csv_parse_requirements() { fi ;; + # :flag.case + --guessCellValueTypes) + + # :flag.case_no_arg + args[--guessCellValueTypes]=1 + shift + ;; + + # :flag.case + --headerLines) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args[--headerLines]="$2" + shift + shift + else + printf "%s\n" "--headerLines requires an argument: --headerLines HEADERLINES" >&2 + exit 1 + fi + ;; + + # :flag.case + --ignoreLines) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args[--ignoreLines]="$2" + shift + shift + else + printf "%s\n" "--ignoreLines requires an argument: --ignoreLines IGNORELINES" >&2 + exit 1 + fi + ;; + + # :flag.case + --ignoreQuoteCharacter) + + # :flag.case_no_arg + args[--ignoreQuoteCharacter]=1 + shift + ;; + + # :flag.case + --includeFileSources) + + # :flag.case_no_arg + args[--includeFileSources]=1 + shift + ;; + + # :flag.case + --includeArchiveFileName) + + # :flag.case_no_arg + args[--includeArchiveFileName]=1 + shift + ;; + + # :flag.case + --limit) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args[--limit]="$2" + shift + shift + else + printf "%s\n" "--limit requires an argument: --limit LIMIT" >&2 + exit 1 + fi + ;; + + # :flag.case + --quoteCharacter) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args[--quoteCharacter]="$2" + shift + shift + else + printf "%s\n" "--quoteCharacter requires an argument: --quoteCharacter QUOTECHARACTER" >&2 + exit 1 + fi + ;; + + # :flag.case + --skipBlankRows) + + # :flag.case_no_arg + args[--skipBlankRows]=1 + shift + ;; + + # :flag.case + --skipDataLines) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args[--skipDataLines]="$2" + shift + shift + else + printf "%s\n" "--skipDataLines requires an argument: --skipDataLines SKIPDATALINES" >&2 + exit 1 + fi + ;; + # :flag.case --trimStrings) @@ -1919,6 +2150,11 @@ orcli_import_csv_parse_requirements() { # :command.default_assignments [[ -n ${args[file]:-} ]] || args[file]="-" [[ -n ${args[--separator]:-} ]] || args[--separator]="," + [[ -n ${args[--headerLines]:-} ]] || args[--headerLines]="1" + [[ -n ${args[--ignoreLines]:-} ]] || args[--ignoreLines]="-1" + [[ -n ${args[--limit]:-} ]] || args[--limit]="-1" + [[ -n ${args[--quoteCharacter]:-} ]] || args[--quoteCharacter]="\\\"" + [[ -n ${args[--skipDataLines]:-} ]] || args[--skipDataLines]="0" } diff --git a/src/bashly.yml b/src/bashly.yml index 6d6b753..2fbf9b3 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -70,10 +70,53 @@ commands: help: character(s) that separates columns arg: separator default: "," + - &blankCellsAsStrings + long: --blankCellsAsStrings + help: store blank cells as empty strings instead of nulls - &encoding_import long: --encoding help: set character encoding arg: encoding + - &guessCellValueTypes + long: --guessCellValueTypes + help: attempt to parse cell text into numbers + - &headerLines + long: --headerLines + help: parse x line(s) as column headers + arg: headerLines + default: "1" + - &ignoreLines + long: --ignoreLines + help: ignore first x line(s) at beginning of file + arg: ignoreLines + default: "-1" + - &ignoreQuoteCharacter + long: --ignoreQuoteCharacter + help: do not use any quote character to enclose cells containing column separators + - &includeFileSources + long: --includeFileSources + help: add column with file source + - &includeArchiveFileName + long: --includeArchiveFileName + help: add column with archive file name + - &limit + long: --limit + help: load at most x row(s) of data + arg: limit + default: "-1" + - "eCharacter + long: --quoteCharacter + help: quote character to enclose cells containing column separators + arg: quoteCharacter + default: '\\\"' + - &skipBlankRows + long: --skipBlankRows + help: do not store blank rows + - &skipDataLines + long: --skipDataLines + help: discard initial x row(s) of data + arg: skipDataLines + default: "0" - &trimStrings long: --trimStrings help: trim leading & trailing whitespace from strings diff --git a/src/import_csv_command.sh b/src/import_csv_command.sh index a2bf97f..e341644 100644 --- a/src/import_csv_command.sh +++ b/src/import_csv_command.sh @@ -19,9 +19,57 @@ if [[ ${args[--encoding]} ]]; then options+=', ' options+="\"encoding\": \"${args[--encoding]}\"" fi +if [[ ${args[--blankCellsAsStrings]} ]]; then + options+=', ' + options+='"storeBlankCellsAsNulls": false' +fi +if [[ ${args[--guessCellValueTypes]} ]]; then + options+=', ' + options+='"guessCellValueTypes": true' +fi +if [[ ${args[--headerLines]} ]]; then + options+=', ' + options+="\"headerLines\": ${args[--headerLines]}" +fi +if [[ ${args[--ignoreLines]} ]]; then + options+=', ' + options+="\"ignoreLines\": ${args[--ignoreLines]}" +fi +if [[ ${args[--ignoreQuoteCharacter]} ]]; then + options+=', ' + options+='"processQuotes": false' +fi +if [[ ${args[--includeFileSources]} ]]; then + options+=', ' + options+='includeFileSources: true' +fi +if [[ ${args[--includeArchiveFileName]} ]]; then + options+=', ' + options+='"includeArchiveFileName": true' +fi +if [[ ${args[--limit]} ]]; then + options+=', ' + options+="\"limit\": ${args[--limit]}" +fi +if [[ ${args[--projectName]} ]]; then + options+=', ' + options+="\"projectName\": \"${args[--projectName]}\"" +fi +if [[ ${args[--quoteCharacter]} ]]; then + options+=', ' + options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\"" +fi +if [[ ${args[--skipBlankRows]} ]]; then + options+=', ' + options+='"storeBlankRows": false' +fi +if [[ ${args[--skipDataLines]} ]]; then + options+=', ' + options+="\"skipDataLines\": ${args[--skipDataLines]}" +fi if [[ ${args[--trimStrings]} ]]; then options+=', ' - options+="\"trimStrings\": true" + options+='"trimStrings": true' fi options+=' }' data+=("options=${options}") diff --git a/src/lib/send_completions.sh b/src/lib/send_completions.sh index 23b26fd..473d54b 100644 --- a/src/lib/send_completions.sh +++ b/src/lib/send_completions.sh @@ -35,7 +35,7 @@ send_completions() { echo $' ;;' echo $'' echo $' \'import csv\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --separator --trimStrings -h -q")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'import tsv\'*)' From 5cf03927b645400ad65f11550d5c630dbf23584d Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Mon, 9 Jan 2023 10:38:53 +0000 Subject: [PATCH 03/10] csv/tsv option columnNames and projectTags --- orcli | 720 ++++++++++++++++++++++++++---------- src/bashly.yml | 21 ++ src/import_csv_command.sh | 10 + src/import_tsv_command.sh | 58 ++- src/lib/send_completions.sh | 4 +- 5 files changed, 617 insertions(+), 196 deletions(-) diff --git a/orcli b/orcli index 875f219..7caa1ef 100755 --- a/orcli +++ b/orcli @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# This script was generated by bashly 0.9.1 (https://bashly.dannyb.co) +# This script was generated by bashly 0.9.4 (https://bashly.dannyb.co) # Modifying it manually is not recommended # :wrapper.bash3_bouncer @@ -27,47 +27,47 @@ orcli_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli COMMAND\n" printf " orcli [COMMAND] --help | -h\n" printf " orcli --version | -v\n" echo # :command.usage_commands - printf "Commands:\n" - echo " completions Generate bash completions" - echo " delete delete OpenRefine project" - echo " import commands to create OpenRefine projects from files or URLs" - echo " list list projects on OpenRefine server" - echo " info show OpenRefine project's metadata" - echo " test run functional tests on tmp OpenRefine workspace" - echo " transform apply undo/redo JSON file(s) to an OpenRefine project" - echo " export commands to export data from OpenRefine projects to files" - echo " run run tmp OpenRefine workspace and execute shell script(s)" + printf "%s\n" "Commands:" + printf " %s Generate bash completions\n" "completions" + printf " %s delete OpenRefine project\n" "delete " + printf " %s commands to create OpenRefine projects from files or URLs\n" "import " + printf " %s list projects on OpenRefine server\n" "list " + printf " %s show OpenRefine project's metadata\n" "info " + printf " %s run functional tests on tmp OpenRefine workspace\n" "test " + printf " %s apply undo/redo JSON file(s) to an OpenRefine project\n" "transform " + printf " %s commands to export data from OpenRefine projects to files\n" "export " + printf " %s run tmp OpenRefine workspace and execute shell script(s)\n" "run " echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo - echo " --version, -v" + printf " %s\n" "--version, -v" printf " Show version number\n" echo # :command.usage_environment_variables - printf "Environment Variables:\n" + printf "%s\n" "Environment Variables:" # :environment_variable.usage - echo " OPENREFINE_URL" + printf " %s\n" "OPENREFINE_URL" printf " URL to OpenRefine server\n" printf " Default: http://localhost:3333\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n" printf " orcli list\n" printf " orcli info \"duplicates\"\n" @@ -100,17 +100,17 @@ orcli_completions_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli completions\n" printf " orcli completions --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo @@ -129,41 +129,41 @@ orcli_delete_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli delete PROJECT [OPTIONS]\n" printf " orcli delete --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo # :command.usage_flags # :flag.usage - echo " --force, -f" + printf " %s\n" "--force, -f" printf " delete all projects with the same name\n" echo # :flag.usage - echo " --quiet, -q" + printf " %s\n" "--quiet, -q" printf " suppress log output, print errors only\n" echo # :command.usage_args - printf "Arguments:\n" + printf "%s\n" "Arguments:" # :argument.usage - echo " PROJECT" + printf " %s\n" "PROJECT" printf " project name or id\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli delete \"duplicates\"\n" printf " orcli delete 1234567890123\n" echo @@ -183,22 +183,22 @@ orcli_import_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli import COMMAND\n" printf " orcli import [COMMAND] --help | -h\n" echo # :command.usage_commands - printf "Commands:\n" - echo " csv import character-separated values (CSV)" - echo " tsv import tab-separated values (TSV)" + printf "%s\n" "Commands:" + printf " %s import character-separated values (CSV)\n" "csv" + printf " %s import tab-separated values (TSV)\n" "tsv" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo @@ -217,118 +217,128 @@ orcli_import_csv_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli import csv [FILE...] [OPTIONS]\n" printf " orcli import csv --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo # :command.usage_flags # :flag.usage - echo " --separator SEPARATOR" + printf " %s\n" "--separator SEPARATOR" printf " character(s) that separates columns\n" printf " Default: ,\n" echo # :flag.usage - echo " --blankCellsAsStrings" + printf " %s\n" "--blankCellsAsStrings" printf " store blank cells as empty strings instead of nulls\n" echo # :flag.usage - echo " --encoding ENCODING" + printf " %s\n" "--columnNames COLUMNNAMES" + printf " set column names (comma separated)\n" + echo + + # :flag.usage + printf " %s\n" "--encoding ENCODING" printf " set character encoding\n" echo # :flag.usage - echo " --guessCellValueTypes" + printf " %s\n" "--guessCellValueTypes" printf " attempt to parse cell text into numbers\n" echo # :flag.usage - echo " --headerLines HEADERLINES" + printf " %s\n" "--headerLines HEADERLINES" printf " parse x line(s) as column headers\n" printf " Default: 1\n" echo # :flag.usage - echo " --ignoreLines IGNORELINES" + printf " %s\n" "--ignoreLines IGNORELINES" printf " ignore first x line(s) at beginning of file\n" printf " Default: -1\n" echo # :flag.usage - echo " --ignoreQuoteCharacter" + printf " %s\n" "--ignoreQuoteCharacter" printf " do not use any quote character to enclose cells containing column separators\n" echo # :flag.usage - echo " --includeFileSources" + printf " %s\n" "--includeFileSources" printf " add column with file source\n" echo # :flag.usage - echo " --includeArchiveFileName" + printf " %s\n" "--includeArchiveFileName" printf " add column with archive file name\n" echo # :flag.usage - echo " --limit LIMIT" + printf " %s\n" "--limit LIMIT" printf " load at most x row(s) of data\n" printf " Default: -1\n" echo # :flag.usage - echo " --quoteCharacter QUOTECHARACTER" + printf " %s\n" "--quoteCharacter QUOTECHARACTER" printf " quote character to enclose cells containing column separators\n" printf " Default: \\\"\n" echo # :flag.usage - echo " --skipBlankRows" + printf " %s\n" "--skipBlankRows" printf " do not store blank rows\n" echo # :flag.usage - echo " --skipDataLines SKIPDATALINES" + printf " %s\n" "--skipDataLines SKIPDATALINES" printf " discard initial x row(s) of data\n" printf " Default: 0\n" echo # :flag.usage - echo " --trimStrings" + printf " %s\n" "--trimStrings" printf " trim leading & trailing whitespace from strings\n" echo # :flag.usage - echo " --projectName PROJECTNAME" + printf " %s\n" "--projectName PROJECTNAME" printf " set a name for the OpenRefine project\n" echo # :flag.usage - echo " --quiet, -q" + printf " %s\n" "--projectTags PROJECTTAGS" + printf " set project tags (comma separated)\n" + echo + + # :flag.usage + printf " %s\n" "--quiet, -q" printf " suppress log output, print errors only\n" echo # :command.usage_args - printf "Arguments:\n" + printf "%s\n" "Arguments:" # :argument.usage - echo " FILE..." + printf " %s\n" "FILE..." printf " Path to one or more files or URLs. When FILE is -, read standard input.\n" printf " Default: -\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli import csv \"file\"\n" printf " orcli import csv \"file1\" \"file2\"\n" printf " head -n 100 \"file\" | orcli import csv\n" @@ -351,52 +361,122 @@ orcli_import_tsv_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli import tsv [FILE...] [OPTIONS]\n" printf " orcli import tsv --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo # :command.usage_flags # :flag.usage - echo " --encoding ENCODING" + printf " %s\n" "--blankCellsAsStrings" + printf " store blank cells as empty strings instead of nulls\n" + echo + + # :flag.usage + printf " %s\n" "--columnNames COLUMNNAMES" + printf " set column names (comma separated)\n" + echo + + # :flag.usage + printf " %s\n" "--encoding ENCODING" printf " set character encoding\n" echo # :flag.usage - echo " --trimStrings" + printf " %s\n" "--guessCellValueTypes" + printf " attempt to parse cell text into numbers\n" + echo + + # :flag.usage + printf " %s\n" "--headerLines HEADERLINES" + printf " parse x line(s) as column headers\n" + printf " Default: 1\n" + echo + + # :flag.usage + printf " %s\n" "--ignoreLines IGNORELINES" + printf " ignore first x line(s) at beginning of file\n" + printf " Default: -1\n" + echo + + # :flag.usage + printf " %s\n" "--ignoreQuoteCharacter" + printf " do not use any quote character to enclose cells containing column separators\n" + echo + + # :flag.usage + printf " %s\n" "--includeFileSources" + printf " add column with file source\n" + echo + + # :flag.usage + printf " %s\n" "--includeArchiveFileName" + printf " add column with archive file name\n" + echo + + # :flag.usage + printf " %s\n" "--limit LIMIT" + printf " load at most x row(s) of data\n" + printf " Default: -1\n" + echo + + # :flag.usage + printf " %s\n" "--quoteCharacter QUOTECHARACTER" + printf " quote character to enclose cells containing column separators\n" + printf " Default: \\\"\n" + echo + + # :flag.usage + printf " %s\n" "--skipBlankRows" + printf " do not store blank rows\n" + echo + + # :flag.usage + printf " %s\n" "--skipDataLines SKIPDATALINES" + printf " discard initial x row(s) of data\n" + printf " Default: 0\n" + echo + + # :flag.usage + printf " %s\n" "--trimStrings" printf " trim leading & trailing whitespace from strings\n" echo # :flag.usage - echo " --projectName PROJECTNAME" + printf " %s\n" "--projectName PROJECTNAME" printf " set a name for the OpenRefine project\n" echo # :flag.usage - echo " --quiet, -q" + printf " %s\n" "--projectTags PROJECTTAGS" + printf " set project tags (comma separated)\n" + echo + + # :flag.usage + printf " %s\n" "--quiet, -q" printf " suppress log output, print errors only\n" echo # :command.usage_args - printf "Arguments:\n" + printf "%s\n" "Arguments:" # :argument.usage - echo " FILE..." + printf " %s\n" "FILE..." printf " Path to one or more files or URLs. When FILE is -, read standard input.\n" printf " Default: -\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli import tsv \"file\"\n" printf " orcli import tsv \"file1\" \"file2\"\n" printf " head -n 100 \"file\" | orcli import tsv\n" @@ -419,17 +499,17 @@ orcli_list_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli list\n" printf " orcli list --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo @@ -448,30 +528,30 @@ orcli_info_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli info PROJECT\n" printf " orcli info --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo # :command.usage_args - printf "Arguments:\n" + printf "%s\n" "Arguments:" # :argument.usage - echo " PROJECT" + printf " %s\n" "PROJECT" printf " project name or id\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli info \"duplicates\"\n" printf " orcli info 1234567890123\n" echo @@ -491,17 +571,17 @@ orcli_test_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli test\n" printf " orcli test --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo @@ -520,42 +600,42 @@ orcli_transform_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli transform PROJECT [FILE...] [OPTIONS]\n" printf " orcli transform --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo # :command.usage_flags # :flag.usage - echo " --quiet, -q" + printf " %s\n" "--quiet, -q" printf " suppress log output, print errors only\n" echo # :command.usage_args - printf "Arguments:\n" + printf "%s\n" "Arguments:" # :argument.usage - echo " PROJECT" + printf " %s\n" "PROJECT" printf " project name or id\n" echo # :argument.usage - echo " FILE..." + printf " %s\n" "FILE..." printf " Path to one or more files or URLs. When FILE is -, read standard input.\n" printf " Default: -\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli transform \"duplicates\" \"history.json\"\n" printf " cat \"history.json\" | orcli transform \"duplicates\"\n" printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n" @@ -577,21 +657,21 @@ orcli_export_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli export COMMAND\n" printf " orcli export [COMMAND] --help | -h\n" echo # :command.usage_commands - printf "Commands:\n" - echo " tsv export tab-separated values (TSV)" + printf "%s\n" "Commands:" + printf " %s export tab-separated values (TSV)\n" "tsv" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo @@ -610,47 +690,47 @@ orcli_export_tsv_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli export tsv PROJECT [OPTIONS]\n" printf " orcli export tsv --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo # :command.usage_flags # :flag.usage - echo " --output FILE" + printf " %s\n" "--output FILE" printf " Write to file instead of stdout\n" echo # :flag.usage - echo " --encoding ENCODING" + printf " %s\n" "--encoding ENCODING" printf " set character encoding\n" printf " Default: UTF-8\n" echo # :flag.usage - echo " --quiet, -q" + printf " %s\n" "--quiet, -q" printf " suppress log output, print errors only\n" echo # :command.usage_args - printf "Arguments:\n" + printf "%s\n" "Arguments:" # :argument.usage - echo " PROJECT" + printf " %s\n" "PROJECT" printf " project name or id\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli export tsv \"duplicates\"\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" echo @@ -670,54 +750,54 @@ orcli_run_usage() { fi - printf "Usage:\n" + printf "%s\n" "Usage:" printf " orcli run [FILE...] [OPTIONS]\n" printf " orcli run --help | -h\n" echo # :command.long_usage if [[ -n $long_usage ]]; then - printf "Options:\n" + printf "%s\n" "Options:" # :command.usage_fixed_flags - echo " --help, -h" + printf " %s\n" "--help, -h" printf " Show this help\n" echo # :command.usage_flags # :flag.usage - echo " --memory RAM" + printf " %s\n" "--memory RAM" printf " maximum RAM for OpenRefine java heap space\n" printf " Default: 2048M\n" echo # :flag.usage - echo " --port PORT" + printf " %s\n" "--port PORT" printf " PORT on which OpenRefine should listen\n" printf " Default: 3333\n" echo # :flag.usage - echo " --interactive" + printf " %s\n" "--interactive" printf " do not exit on error and keep bash shell open\n" echo # :flag.usage - echo " --quiet, -q" + printf " %s\n" "--quiet, -q" printf " suppress log output, print errors only\n" echo # :command.usage_args - printf "Arguments:\n" + printf "%s\n" "Arguments:" # :argument.usage - echo " FILE..." + printf " %s\n" "FILE..." printf " Path to one or more files or URLs. When FILE is -, read standard input.\n" printf " Default: -\n" echo # :command.usage_examples - printf "Examples:\n" + printf "%s\n" "Examples:" printf " orcli run --interactive\n" printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n orcli export tsv \"duplicates\"\n EOF\n" printf " orcli run --memory \"2000M\" --port \"3334\" << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" &\n orcli import csv \"https://git.io/fj5hF\" --projectName \"copy\" &\n wait\n echo \"finished import\"\n orcli export csv \"duplicates\" --output duplicates.csv &\n orcli export tsv \"duplicates\" --output duplicates.tsv &\n wait\n wc duplicates*\n EOF\n" @@ -1003,11 +1083,11 @@ send_completions() { echo $' ;;' echo $'' echo $' \'import csv\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'import tsv\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --trimStrings -h -q")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'export tsv\'*)' @@ -1125,6 +1205,11 @@ orcli_import_csv_command() { options+=', ' options+='"storeBlankCellsAsNulls": false' fi + if [[ ${args[--columnNames]} ]]; then + IFS=',' read -ra columnNames <<< "${args[--columnNames]}" + options+=', ' + options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]" + fi if [[ ${args[--guessCellValueTypes]} ]]; then options+=', ' options+='"guessCellValueTypes": true' @@ -1173,6 +1258,11 @@ orcli_import_csv_command() { options+=', ' options+='"trimStrings": true' fi + if [[ ${args[--projectTags]} ]]; then + IFS=',' read -ra projectTags <<< "${args[--projectTags]}" + options+=', ' + options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]" + fi options+=' }' data+=("options=${options}") @@ -1205,9 +1295,65 @@ orcli_import_tsv_command() { options+=', ' options+="\"encoding\": \"${args[--encoding]}\"" fi + if [[ ${args[--blankCellsAsStrings]} ]]; then + options+=', ' + options+='"storeBlankCellsAsNulls": false' + fi + if [[ ${args[--columnNames]} ]]; then + options+=', ' + options+="\"columnNames\": \"[${args[--columnNames]}\"]" + fi + if [[ ${args[--guessCellValueTypes]} ]]; then + options+=', ' + options+='"guessCellValueTypes": true' + fi + if [[ ${args[--headerLines]} ]]; then + options+=', ' + options+="\"headerLines\": ${args[--headerLines]}" + fi + if [[ ${args[--ignoreLines]} ]]; then + options+=', ' + options+="\"ignoreLines\": ${args[--ignoreLines]}" + fi + if [[ ${args[--ignoreQuoteCharacter]} ]]; then + options+=', ' + options+='"processQuotes": false' + fi + if [[ ${args[--includeFileSources]} ]]; then + options+=', ' + options+='includeFileSources: true' + fi + if [[ ${args[--includeArchiveFileName]} ]]; then + options+=', ' + options+='"includeArchiveFileName": true' + fi + if [[ ${args[--limit]} ]]; then + options+=', ' + options+="\"limit\": ${args[--limit]}" + fi + if [[ ${args[--projectName]} ]]; then + options+=', ' + options+="\"projectName\": \"${args[--projectName]}\"" + fi + if [[ ${args[--quoteCharacter]} ]]; then + options+=', ' + options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\"" + fi + if [[ ${args[--skipBlankRows]} ]]; then + options+=', ' + options+='"storeBlankRows": false' + fi + if [[ ${args[--skipDataLines]} ]]; then + options+=', ' + options+="\"skipDataLines\": ${args[--skipDataLines]}" + fi if [[ ${args[--trimStrings]} ]]; then options+=', ' - options+="\"trimStrings\": true" + options+='"trimStrings": true' + fi + if [[ ${args[--projectTags]} ]]; then + options+=', ' + options+="\"projectTags\": \"[${args[--projectTags]}\"]" fi options+=' }' data+=("options=${options}") @@ -1800,7 +1946,7 @@ orcli_delete_parse_requirements() { --force | -f) # :flag.case_no_arg - args[--force]=1 + args['--force']=1 shift ;; @@ -1808,7 +1954,7 @@ orcli_delete_parse_requirements() { --quiet | -q) # :flag.case_no_arg - args[--quiet]=1 + args['--quiet']=1 shift ;; @@ -1820,9 +1966,9 @@ orcli_delete_parse_requirements() { *) # :command.parse_requirements_case # :command.parse_requirements_case_simple - if [[ -z ${args[project]+x} ]]; then + if [[ -z ${args['project']+x} ]]; then - args[project]=$1 + args['project']=$1 shift else printf "invalid argument: %s\n" "$key" >&2 @@ -1835,7 +1981,7 @@ orcli_delete_parse_requirements() { done # :command.required_args_filter - if [[ -z ${args[project]+x} ]]; then + if [[ -z ${args['project']+x} ]]; then printf "missing required argument: PROJECT\nusage: orcli delete PROJECT [OPTIONS]\n" >&2 exit 1 fi @@ -1947,7 +2093,7 @@ orcli_import_csv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--separator]="$2" + args['--separator']="$2" shift shift else @@ -1960,17 +2106,32 @@ orcli_import_csv_parse_requirements() { --blankCellsAsStrings) # :flag.case_no_arg - args[--blankCellsAsStrings]=1 + args['--blankCellsAsStrings']=1 shift ;; + # :flag.case + --columnNames) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--columnNames']="$2" + shift + shift + else + printf "%s\n" "--columnNames requires an argument: --columnNames COLUMNNAMES" >&2 + exit 1 + fi + ;; + # :flag.case --encoding) # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--encoding]="$2" + args['--encoding']="$2" shift shift else @@ -1983,7 +2144,7 @@ orcli_import_csv_parse_requirements() { --guessCellValueTypes) # :flag.case_no_arg - args[--guessCellValueTypes]=1 + args['--guessCellValueTypes']=1 shift ;; @@ -1993,7 +2154,7 @@ orcli_import_csv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--headerLines]="$2" + args['--headerLines']="$2" shift shift else @@ -2008,7 +2169,7 @@ orcli_import_csv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--ignoreLines]="$2" + args['--ignoreLines']="$2" shift shift else @@ -2021,7 +2182,7 @@ orcli_import_csv_parse_requirements() { --ignoreQuoteCharacter) # :flag.case_no_arg - args[--ignoreQuoteCharacter]=1 + args['--ignoreQuoteCharacter']=1 shift ;; @@ -2029,7 +2190,7 @@ orcli_import_csv_parse_requirements() { --includeFileSources) # :flag.case_no_arg - args[--includeFileSources]=1 + args['--includeFileSources']=1 shift ;; @@ -2037,7 +2198,7 @@ orcli_import_csv_parse_requirements() { --includeArchiveFileName) # :flag.case_no_arg - args[--includeArchiveFileName]=1 + args['--includeArchiveFileName']=1 shift ;; @@ -2047,7 +2208,7 @@ orcli_import_csv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--limit]="$2" + args['--limit']="$2" shift shift else @@ -2062,7 +2223,7 @@ orcli_import_csv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--quoteCharacter]="$2" + args['--quoteCharacter']="$2" shift shift else @@ -2075,7 +2236,7 @@ orcli_import_csv_parse_requirements() { --skipBlankRows) # :flag.case_no_arg - args[--skipBlankRows]=1 + args['--skipBlankRows']=1 shift ;; @@ -2085,7 +2246,7 @@ orcli_import_csv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--skipDataLines]="$2" + args['--skipDataLines']="$2" shift shift else @@ -2098,7 +2259,7 @@ orcli_import_csv_parse_requirements() { --trimStrings) # :flag.case_no_arg - args[--trimStrings]=1 + args['--trimStrings']=1 shift ;; @@ -2108,7 +2269,7 @@ orcli_import_csv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--projectName]="$2" + args['--projectName']="$2" shift shift else @@ -2117,11 +2278,26 @@ orcli_import_csv_parse_requirements() { fi ;; + # :flag.case + --projectTags) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--projectTags']="$2" + shift + shift + else + printf "%s\n" "--projectTags requires an argument: --projectTags PROJECTTAGS" >&2 + exit 1 + fi + ;; + # :flag.case --quiet | -q) # :flag.case_no_arg - args[--quiet]=1 + args['--quiet']=1 shift ;; @@ -2133,12 +2309,12 @@ orcli_import_csv_parse_requirements() { *) # :command.parse_requirements_case # :command.parse_requirements_case_repeatable - if [[ -z ${args[file]+x} ]]; then - args[file]="\"$1\"" + if [[ -z ${args['file']+x} ]]; then + args['file']="\"$1\"" shift else - args[file]="${args[file]} \"$1\"" + args['file']="${args[file]} \"$1\"" shift fi @@ -2148,13 +2324,13 @@ orcli_import_csv_parse_requirements() { done # :command.default_assignments - [[ -n ${args[file]:-} ]] || args[file]="-" - [[ -n ${args[--separator]:-} ]] || args[--separator]="," - [[ -n ${args[--headerLines]:-} ]] || args[--headerLines]="1" - [[ -n ${args[--ignoreLines]:-} ]] || args[--ignoreLines]="-1" - [[ -n ${args[--limit]:-} ]] || args[--limit]="-1" - [[ -n ${args[--quoteCharacter]:-} ]] || args[--quoteCharacter]="\\\"" - [[ -n ${args[--skipDataLines]:-} ]] || args[--skipDataLines]="0" + [[ -n ${args['file']:-} ]] || args['file']="-" + [[ -n ${args['--separator']:-} ]] || args['--separator']="," + [[ -n ${args['--headerLines']:-} ]] || args['--headerLines']="1" + [[ -n ${args['--ignoreLines']:-} ]] || args['--ignoreLines']="-1" + [[ -n ${args['--limit']:-} ]] || args['--limit']="-1" + [[ -n ${args['--quoteCharacter']:-} ]] || args['--quoteCharacter']="\\\"" + [[ -n ${args['--skipDataLines']:-} ]] || args['--skipDataLines']="0" } @@ -2183,13 +2359,36 @@ orcli_import_tsv_parse_requirements() { while [[ $# -gt 0 ]]; do key="$1" case "$key" in + # :flag.case + --blankCellsAsStrings) + + # :flag.case_no_arg + args['--blankCellsAsStrings']=1 + shift + ;; + + # :flag.case + --columnNames) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--columnNames']="$2" + shift + shift + else + printf "%s\n" "--columnNames requires an argument: --columnNames COLUMNNAMES" >&2 + exit 1 + fi + ;; + # :flag.case --encoding) # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--encoding]="$2" + args['--encoding']="$2" shift shift else @@ -2198,11 +2397,126 @@ orcli_import_tsv_parse_requirements() { fi ;; + # :flag.case + --guessCellValueTypes) + + # :flag.case_no_arg + args['--guessCellValueTypes']=1 + shift + ;; + + # :flag.case + --headerLines) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--headerLines']="$2" + shift + shift + else + printf "%s\n" "--headerLines requires an argument: --headerLines HEADERLINES" >&2 + exit 1 + fi + ;; + + # :flag.case + --ignoreLines) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--ignoreLines']="$2" + shift + shift + else + printf "%s\n" "--ignoreLines requires an argument: --ignoreLines IGNORELINES" >&2 + exit 1 + fi + ;; + + # :flag.case + --ignoreQuoteCharacter) + + # :flag.case_no_arg + args['--ignoreQuoteCharacter']=1 + shift + ;; + + # :flag.case + --includeFileSources) + + # :flag.case_no_arg + args['--includeFileSources']=1 + shift + ;; + + # :flag.case + --includeArchiveFileName) + + # :flag.case_no_arg + args['--includeArchiveFileName']=1 + shift + ;; + + # :flag.case + --limit) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--limit']="$2" + shift + shift + else + printf "%s\n" "--limit requires an argument: --limit LIMIT" >&2 + exit 1 + fi + ;; + + # :flag.case + --quoteCharacter) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--quoteCharacter']="$2" + shift + shift + else + printf "%s\n" "--quoteCharacter requires an argument: --quoteCharacter QUOTECHARACTER" >&2 + exit 1 + fi + ;; + + # :flag.case + --skipBlankRows) + + # :flag.case_no_arg + args['--skipBlankRows']=1 + shift + ;; + + # :flag.case + --skipDataLines) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--skipDataLines']="$2" + shift + shift + else + printf "%s\n" "--skipDataLines requires an argument: --skipDataLines SKIPDATALINES" >&2 + exit 1 + fi + ;; + # :flag.case --trimStrings) # :flag.case_no_arg - args[--trimStrings]=1 + args['--trimStrings']=1 shift ;; @@ -2212,7 +2526,7 @@ orcli_import_tsv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--projectName]="$2" + args['--projectName']="$2" shift shift else @@ -2221,11 +2535,26 @@ orcli_import_tsv_parse_requirements() { fi ;; + # :flag.case + --projectTags) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--projectTags']="$2" + shift + shift + else + printf "%s\n" "--projectTags requires an argument: --projectTags PROJECTTAGS" >&2 + exit 1 + fi + ;; + # :flag.case --quiet | -q) # :flag.case_no_arg - args[--quiet]=1 + args['--quiet']=1 shift ;; @@ -2237,12 +2566,12 @@ orcli_import_tsv_parse_requirements() { *) # :command.parse_requirements_case # :command.parse_requirements_case_repeatable - if [[ -z ${args[file]+x} ]]; then - args[file]="\"$1\"" + if [[ -z ${args['file']+x} ]]; then + args['file']="\"$1\"" shift else - args[file]="${args[file]} \"$1\"" + args['file']="${args[file]} \"$1\"" shift fi @@ -2252,7 +2581,12 @@ orcli_import_tsv_parse_requirements() { done # :command.default_assignments - [[ -n ${args[file]:-} ]] || args[file]="-" + [[ -n ${args['file']:-} ]] || args['file']="-" + [[ -n ${args['--headerLines']:-} ]] || args['--headerLines']="1" + [[ -n ${args['--ignoreLines']:-} ]] || args['--ignoreLines']="-1" + [[ -n ${args['--limit']:-} ]] || args['--limit']="-1" + [[ -n ${args['--quoteCharacter']:-} ]] || args['--quoteCharacter']="\\\"" + [[ -n ${args['--skipDataLines']:-} ]] || args['--skipDataLines']="0" } @@ -2334,9 +2668,9 @@ orcli_info_parse_requirements() { *) # :command.parse_requirements_case # :command.parse_requirements_case_simple - if [[ -z ${args[project]+x} ]]; then + if [[ -z ${args['project']+x} ]]; then - args[project]=$1 + args['project']=$1 shift else printf "invalid argument: %s\n" "$key" >&2 @@ -2349,7 +2683,7 @@ orcli_info_parse_requirements() { done # :command.required_args_filter - if [[ -z ${args[project]+x} ]]; then + if [[ -z ${args['project']+x} ]]; then printf "missing required argument: PROJECT\nusage: orcli info PROJECT\n" >&2 exit 1 fi @@ -2429,7 +2763,7 @@ orcli_transform_parse_requirements() { --quiet | -q) # :flag.case_no_arg - args[--quiet]=1 + args['--quiet']=1 shift ;; @@ -2441,16 +2775,16 @@ orcli_transform_parse_requirements() { *) # :command.parse_requirements_case # :command.parse_requirements_case_repeatable - if [[ -z ${args[project]+x} ]]; then - args[project]=$1 + if [[ -z ${args['project']+x} ]]; then + args['project']=$1 shift - elif [[ -z ${args[file]+x} ]]; then - args[file]="\"$1\"" + elif [[ -z ${args['file']+x} ]]; then + args['file']="\"$1\"" shift else - args[file]="${args[file]} \"$1\"" + args['file']="${args[file]} \"$1\"" shift fi @@ -2460,13 +2794,13 @@ orcli_transform_parse_requirements() { done # :command.required_args_filter - if [[ -z ${args[project]+x} ]]; then + if [[ -z ${args['project']+x} ]]; then printf "missing required argument: PROJECT\nusage: orcli transform PROJECT [FILE...] [OPTIONS]\n" >&2 exit 1 fi # :command.default_assignments - [[ -n ${args[file]:-} ]] || args[file]="-" + [[ -n ${args['file']:-} ]] || args['file']="-" } @@ -2568,7 +2902,7 @@ orcli_export_tsv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--output]="$2" + args['--output']="$2" shift shift else @@ -2583,7 +2917,7 @@ orcli_export_tsv_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--encoding]="$2" + args['--encoding']="$2" shift shift else @@ -2596,7 +2930,7 @@ orcli_export_tsv_parse_requirements() { --quiet | -q) # :flag.case_no_arg - args[--quiet]=1 + args['--quiet']=1 shift ;; @@ -2608,9 +2942,9 @@ orcli_export_tsv_parse_requirements() { *) # :command.parse_requirements_case # :command.parse_requirements_case_simple - if [[ -z ${args[project]+x} ]]; then + if [[ -z ${args['project']+x} ]]; then - args[project]=$1 + args['project']=$1 shift else printf "invalid argument: %s\n" "$key" >&2 @@ -2623,13 +2957,13 @@ orcli_export_tsv_parse_requirements() { done # :command.required_args_filter - if [[ -z ${args[project]+x} ]]; then + if [[ -z ${args['project']+x} ]]; then printf "missing required argument: PROJECT\nusage: orcli export tsv PROJECT [OPTIONS]\n" >&2 exit 1 fi # :command.default_assignments - [[ -n ${args[--encoding]:-} ]] || args[--encoding]="UTF-8" + [[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8" } @@ -2664,7 +2998,7 @@ orcli_run_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--memory]="$2" + args['--memory']="$2" shift shift else @@ -2679,7 +3013,7 @@ orcli_run_parse_requirements() { # :flag.case_arg if [[ -n ${2+x} ]]; then - args[--port]="$2" + args['--port']="$2" shift shift else @@ -2692,7 +3026,7 @@ orcli_run_parse_requirements() { --interactive) # :flag.case_no_arg - args[--interactive]=1 + args['--interactive']=1 shift ;; @@ -2700,7 +3034,7 @@ orcli_run_parse_requirements() { --quiet | -q) # :flag.case_no_arg - args[--quiet]=1 + args['--quiet']=1 shift ;; @@ -2712,12 +3046,12 @@ orcli_run_parse_requirements() { *) # :command.parse_requirements_case # :command.parse_requirements_case_repeatable - if [[ -z ${args[file]+x} ]]; then - args[file]="\"$1\"" + if [[ -z ${args['file']+x} ]]; then + args['file']="\"$1\"" shift else - args[file]="${args[file]} \"$1\"" + args['file']="${args[file]} \"$1\"" shift fi @@ -2727,9 +3061,9 @@ orcli_run_parse_requirements() { done # :command.default_assignments - [[ -n ${args[file]:-} ]] || args[file]="-" - [[ -n ${args[--memory]:-} ]] || args[--memory]="2048M" - [[ -n ${args[--port]:-} ]] || args[--port]="3333" + [[ -n ${args['file']:-} ]] || args['file']="-" + [[ -n ${args['--memory']:-} ]] || args['--memory']="2048M" + [[ -n ${args['--port']:-} ]] || args['--port']="3333" } @@ -2756,7 +3090,7 @@ run() { case "$action" in "completions") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_completions_usage else @@ -2765,7 +3099,7 @@ run() { ;; "delete") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_delete_usage else @@ -2774,7 +3108,7 @@ run() { ;; "import") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_import_usage else @@ -2783,7 +3117,7 @@ run() { ;; "import csv") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_import_csv_usage else @@ -2792,7 +3126,7 @@ run() { ;; "import tsv") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_import_tsv_usage else @@ -2801,7 +3135,7 @@ run() { ;; "list") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_list_usage else @@ -2810,7 +3144,7 @@ run() { ;; "info") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_info_usage else @@ -2819,7 +3153,7 @@ run() { ;; "test") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_test_usage else @@ -2828,7 +3162,7 @@ run() { ;; "transform") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_transform_usage else @@ -2837,7 +3171,7 @@ run() { ;; "export") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_export_usage else @@ -2846,7 +3180,7 @@ run() { ;; "export tsv") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_export_tsv_usage else @@ -2855,7 +3189,7 @@ run() { ;; "run") - if [[ ${args[--help]:-} ]]; then + if [[ ${args['--help']:-} ]]; then long_usage=yes orcli_run_usage else diff --git a/src/bashly.yml b/src/bashly.yml index 2fbf9b3..206af03 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -73,6 +73,10 @@ commands: - &blankCellsAsStrings long: --blankCellsAsStrings help: store blank cells as empty strings instead of nulls + - &columnNames + long: --columnNames + help: set column names (comma separated) + arg: columnNames - &encoding_import long: --encoding help: set character encoding @@ -124,6 +128,10 @@ commands: long: --projectName arg: projectName help: set a name for the OpenRefine project + - &projectTags + long: --projectTags + help: set project tags (comma separated) + arg: projectTags - *quiet examples: - orcli import csv "file" @@ -142,9 +150,22 @@ commands: args: - *file flags: + - *blankCellsAsStrings + - *columnNames - *encoding_import + - *guessCellValueTypes + - *headerLines + - *ignoreLines + - *ignoreQuoteCharacter + - *includeFileSources + - *includeArchiveFileName + - *limit + - *quoteCharacter + - *skipBlankRows + - *skipDataLines - *trimStrings - *projectName + - *projectTags - *quiet examples: - orcli import tsv "file" diff --git a/src/import_csv_command.sh b/src/import_csv_command.sh index e341644..8e8f37f 100644 --- a/src/import_csv_command.sh +++ b/src/import_csv_command.sh @@ -23,6 +23,11 @@ if [[ ${args[--blankCellsAsStrings]} ]]; then options+=', ' options+='"storeBlankCellsAsNulls": false' fi +if [[ ${args[--columnNames]} ]]; then + IFS=',' read -ra columnNames <<< "${args[--columnNames]}" + options+=', ' + options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]" +fi if [[ ${args[--guessCellValueTypes]} ]]; then options+=', ' options+='"guessCellValueTypes": true' @@ -71,6 +76,11 @@ if [[ ${args[--trimStrings]} ]]; then options+=', ' options+='"trimStrings": true' fi +if [[ ${args[--projectTags]} ]]; then + IFS=',' read -ra projectTags <<< "${args[--projectTags]}" + options+=', ' + options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]" +fi options+=' }' data+=("options=${options}") diff --git a/src/import_tsv_command.sh b/src/import_tsv_command.sh index 02644bd..439e508 100644 --- a/src/import_tsv_command.sh +++ b/src/import_tsv_command.sh @@ -19,9 +19,65 @@ if [[ ${args[--encoding]} ]]; then options+=', ' options+="\"encoding\": \"${args[--encoding]}\"" fi +if [[ ${args[--blankCellsAsStrings]} ]]; then + options+=', ' + options+='"storeBlankCellsAsNulls": false' +fi +if [[ ${args[--columnNames]} ]]; then + options+=', ' + options+="\"columnNames\": \"[${args[--columnNames]}\"]" +fi +if [[ ${args[--guessCellValueTypes]} ]]; then + options+=', ' + options+='"guessCellValueTypes": true' +fi +if [[ ${args[--headerLines]} ]]; then + options+=', ' + options+="\"headerLines\": ${args[--headerLines]}" +fi +if [[ ${args[--ignoreLines]} ]]; then + options+=', ' + options+="\"ignoreLines\": ${args[--ignoreLines]}" +fi +if [[ ${args[--ignoreQuoteCharacter]} ]]; then + options+=', ' + options+='"processQuotes": false' +fi +if [[ ${args[--includeFileSources]} ]]; then + options+=', ' + options+='includeFileSources: true' +fi +if [[ ${args[--includeArchiveFileName]} ]]; then + options+=', ' + options+='"includeArchiveFileName": true' +fi +if [[ ${args[--limit]} ]]; then + options+=', ' + options+="\"limit\": ${args[--limit]}" +fi +if [[ ${args[--projectName]} ]]; then + options+=', ' + options+="\"projectName\": \"${args[--projectName]}\"" +fi +if [[ ${args[--quoteCharacter]} ]]; then + options+=', ' + options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\"" +fi +if [[ ${args[--skipBlankRows]} ]]; then + options+=', ' + options+='"storeBlankRows": false' +fi +if [[ ${args[--skipDataLines]} ]]; then + options+=', ' + options+="\"skipDataLines\": ${args[--skipDataLines]}" +fi if [[ ${args[--trimStrings]} ]]; then options+=', ' - options+="\"trimStrings\": true" + options+='"trimStrings": true' +fi +if [[ ${args[--projectTags]} ]]; then + options+=', ' + options+="\"projectTags\": \"[${args[--projectTags]}\"]" fi options+=' }' data+=("options=${options}") diff --git a/src/lib/send_completions.sh b/src/lib/send_completions.sh index 473d54b..4ee0d0d 100644 --- a/src/lib/send_completions.sh +++ b/src/lib/send_completions.sh @@ -35,11 +35,11 @@ send_completions() { echo $' ;;' echo $'' echo $' \'import csv\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'import tsv\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --help --projectName --quiet --trimStrings -h -q")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'export tsv\'*)' From 770effe16a0d1a9719c8a811d864c20626f6b683 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Mon, 9 Jan 2023 10:42:51 +0000 Subject: [PATCH 04/10] tsv option columnNames projectTags --- orcli | 6 ++++-- src/import_tsv_command.sh | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/orcli b/orcli index 7caa1ef..733cb1b 100755 --- a/orcli +++ b/orcli @@ -1300,8 +1300,9 @@ orcli_import_tsv_command() { options+='"storeBlankCellsAsNulls": false' fi if [[ ${args[--columnNames]} ]]; then + IFS=',' read -ra columnNames <<< "${args[--columnNames]}" options+=', ' - options+="\"columnNames\": \"[${args[--columnNames]}\"]" + options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]" fi if [[ ${args[--guessCellValueTypes]} ]]; then options+=', ' @@ -1352,8 +1353,9 @@ orcli_import_tsv_command() { options+='"trimStrings": true' fi if [[ ${args[--projectTags]} ]]; then + IFS=',' read -ra projectTags <<< "${args[--projectTags]}" options+=', ' - options+="\"projectTags\": \"[${args[--projectTags]}\"]" + options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]" fi options+=' }' data+=("options=${options}") diff --git a/src/import_tsv_command.sh b/src/import_tsv_command.sh index 439e508..81faee4 100644 --- a/src/import_tsv_command.sh +++ b/src/import_tsv_command.sh @@ -24,8 +24,9 @@ if [[ ${args[--blankCellsAsStrings]} ]]; then options+='"storeBlankCellsAsNulls": false' fi if [[ ${args[--columnNames]} ]]; then + IFS=',' read -ra columnNames <<< "${args[--columnNames]}" options+=', ' - options+="\"columnNames\": \"[${args[--columnNames]}\"]" + options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]" fi if [[ ${args[--guessCellValueTypes]} ]]; then options+=', ' @@ -76,8 +77,9 @@ if [[ ${args[--trimStrings]} ]]; then options+='"trimStrings": true' fi if [[ ${args[--projectTags]} ]]; then + IFS=',' read -ra projectTags <<< "${args[--projectTags]}" options+=', ' - options+="\"projectTags\": \"[${args[--projectTags]}\"]" + options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]" fi options+=' }' data+=("options=${options}") From fba2160dc77f6da43efcd92a2fd3ca189befe3f8 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Mon, 9 Jan 2023 10:50:11 +0000 Subject: [PATCH 05/10] basic test import tsv --- tests/import-tsv.sh | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/import-tsv.sh diff --git a/tests/import-tsv.sh b/tests/import-tsv.sh new file mode 100644 index 0000000..5c1de8d --- /dev/null +++ b/tests/import-tsv.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +t="import-tsv" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.tsv "${tmpdir}/${t}.tsv" + +# assertion +cp data/example.tsv "${tmpdir}/${t}.assert" + +# action +cd "${tmpdir}" || exit 1 +orcli import tsv "${t}.tsv" --projectName "${t}" +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" From e82b306839d20035baf605ed330cb4c381b6b1f6 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Mon, 9 Jan 2023 18:21:24 +0000 Subject: [PATCH 06/10] test import csv --separator --- tests/data/example-separator.csv | 4 ++++ tests/import-csv-separator.sh | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/data/example-separator.csv create mode 100644 tests/import-csv-separator.sh diff --git a/tests/data/example-separator.csv b/tests/data/example-separator.csv new file mode 100644 index 0000000..1c0c00c --- /dev/null +++ b/tests/data/example-separator.csv @@ -0,0 +1,4 @@ +a; b; c +1; 2; 3 +0; 0; 0 +$; \; ' diff --git a/tests/import-csv-separator.sh b/tests/import-csv-separator.sh new file mode 100644 index 0000000..1bac0fa --- /dev/null +++ b/tests/import-csv-separator.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +t="import-csv-separator" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example-separator.csv "${tmpdir}/${t}.csv" + +# assertion +cp data/example.tsv "${tmpdir}/${t}.assert" + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --separator "; " +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" From cd7d62e25428ba2e5be18c08f1c7c2a401351a4d Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Thu, 12 Jan 2023 10:18:06 +0000 Subject: [PATCH 07/10] fix curl quoting with --form-string --- orcli | 82 ++++++++++++++++++--------------------- src/import_csv_command.sh | 10 ++--- src/import_tsv_command.sh | 10 ++--- src/lib/init_import.sh | 28 ++----------- src/lib/post_export.sh | 6 +-- src/lib/post_import.sh | 28 ++++++++++--- tests/delete.sh | 18 ++++----- tests/export-tsv.sh | 4 +- tests/import-csv.sh | 4 +- tests/info.sh | 4 +- tests/list.sh | 2 +- 11 files changed, 90 insertions(+), 106 deletions(-) diff --git a/orcli b/orcli index 733cb1b..9d098bd 100755 --- a/orcli +++ b/orcli @@ -900,11 +900,10 @@ function get_ids() { # src/lib/init_import.sh # common import tasks to support multiple files and URLs -# shellcheck shell=bash +# shellcheck shell=bash disable=SC2154 function init_import() { - local files file # catch args, convert the space delimited string to an array - files=() + local files=() eval "files=(${args[file]})" # create tmp directory tmpdir="$(mktemp -d)" @@ -915,7 +914,7 @@ function init_import() { if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then error "download of ${files[$i]} failed!" fi - files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" + files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" fi done # read pipes if name starts with /dev/fd @@ -924,7 +923,7 @@ function init_import() { if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then error "reading of ${files[$i]} failed!" fi - files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" + files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" fi done # create a zip archive if there are multiple files @@ -936,25 +935,6 @@ function init_import() { else file="${files[0]}" fi - # basic post data - if [[ ${file} == "-" ]]; then - data+=("project-file=@-") - else - if ! path=$(readlink -e "${file}"); then - error "cannot open ${file} (no such file)!" - fi - data+=("project-file=@${path}") - fi - if [[ ${args[--projectName]} ]]; then - data+=("project-name=${args[--projectName]}") - else - if [[ ${file} == "-" ]]; then - name="Untitled" - else - name="$(basename "${path}" | tr '.' ' ')" - fi - data+=("project-name=${name}") - fi } # src/lib/interactive.sh @@ -994,18 +974,18 @@ function log() { # shellcheck shell=bash disable=SC2154 function post_export() { local curloptions - # post mapfile -t curloptions < <(for d in "$@"; do echo "--data" echo "$d" done) + # support file output if [[ ${args[--output]} ]]; then if ! mkdir -p "$(dirname "${args[--output]}")"; then error "unable to create parent directory for ${args[--output]}" fi - curloptions+=("--output") - curloptions+=("${args[--output]}") + curloptions+=("--output" "${args[--output]}") fi + # post if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then error "exporting ${args[project]} failed!" else @@ -1019,15 +999,31 @@ function post_export() { # post to create-project endpoint and validate # shellcheck shell=bash disable=SC2154 function post_import() { - local curloptions - local projectid - local projectname - local rows - # post + local curloptions projectid projectname rows mapfile -t curloptions < <(for d in "$@"; do - echo "--form" + echo "--form-string" echo "$d" done) + # basic post data + if [[ ${file} == "-" ]]; then + curloptions+=("--form" "project-file=@-") + else + if ! path=$(readlink -e "${file}"); then + error "cannot open ${file} (no such file)!" + fi + curloptions+=("--form" "project-file=@${path}") + fi + if [[ ${args[--projectName]} ]]; then + curloptions+=("--form-string" "project-name=${args[--projectName]}") + else + if [[ ${file} == "-" ]]; then + name="Untitled" + else + name="$(basename "${path}" | tr '.' ' ')" + fi + curloptions+=("--form-string" "project-name=${name}") + fi + # post if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then error "importing ${args[file]} failed!" fi @@ -1180,17 +1176,15 @@ orcli_delete_command() { # :command.function orcli_import_csv_command() { # src/import_csv_command.sh - # shellcheck shell=bash + # shellcheck shell=bash disable=SC2154 # call init_import function to eval args and to set basic post data init_import # check if stdin is present if selected - if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then - if ! read -u 0 -t 0; then - orcli_import_csv_usage - exit 1 - fi + if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then + orcli_import_csv_usage + exit 1 fi # assemble specific post data (some options require json format) @@ -1274,17 +1268,15 @@ orcli_import_csv_command() { # :command.function orcli_import_tsv_command() { # src/import_tsv_command.sh - # shellcheck shell=bash + # shellcheck shell=bash disable=SC2154 # call init_import function to eval args and to set basic post data init_import # check if stdin is present if selected - if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then - if ! read -u 0 -t 0; then - orcli_import_tsv_usage - exit 1 - fi + if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then + orcli_import_tsv_usage + exit 1 fi # assemble specific post data (some options require json format) diff --git a/src/import_csv_command.sh b/src/import_csv_command.sh index 8e8f37f..d3958be 100644 --- a/src/import_csv_command.sh +++ b/src/import_csv_command.sh @@ -1,14 +1,12 @@ -# shellcheck shell=bash +# shellcheck shell=bash disable=SC2154 # call init_import function to eval args and to set basic post data init_import # check if stdin is present if selected -if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then - if ! read -u 0 -t 0; then - orcli_import_csv_usage - exit 1 - fi +if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then + orcli_import_csv_usage + exit 1 fi # assemble specific post data (some options require json format) diff --git a/src/import_tsv_command.sh b/src/import_tsv_command.sh index 81faee4..fa8383a 100644 --- a/src/import_tsv_command.sh +++ b/src/import_tsv_command.sh @@ -1,14 +1,12 @@ -# shellcheck shell=bash +# shellcheck shell=bash disable=SC2154 # call init_import function to eval args and to set basic post data init_import # check if stdin is present if selected -if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then - if ! read -u 0 -t 0; then - orcli_import_tsv_usage - exit 1 - fi +if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then + orcli_import_tsv_usage + exit 1 fi # assemble specific post data (some options require json format) diff --git a/src/lib/init_import.sh b/src/lib/init_import.sh index 283acfc..fdfdc16 100644 --- a/src/lib/init_import.sh +++ b/src/lib/init_import.sh @@ -1,9 +1,8 @@ # common import tasks to support multiple files and URLs -# shellcheck shell=bash +# shellcheck shell=bash disable=SC2154 function init_import() { - local files file # catch args, convert the space delimited string to an array - files=() + local files=() eval "files=(${args[file]})" # create tmp directory tmpdir="$(mktemp -d)" @@ -14,7 +13,7 @@ function init_import() { if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then error "download of ${files[$i]} failed!" fi - files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" + files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" fi done # read pipes if name starts with /dev/fd @@ -23,7 +22,7 @@ function init_import() { if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then error "reading of ${files[$i]} failed!" fi - files[$i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" + files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}" fi done # create a zip archive if there are multiple files @@ -35,23 +34,4 @@ function init_import() { else file="${files[0]}" fi - # basic post data - if [[ ${file} == "-" ]]; then - data+=("project-file=@-") - else - if ! path=$(readlink -e "${file}"); then - error "cannot open ${file} (no such file)!" - fi - data+=("project-file=@${path}") - fi - if [[ ${args[--projectName]} ]]; then - data+=("project-name=${args[--projectName]}") - else - if [[ ${file} == "-" ]]; then - name="Untitled" - else - name="$(basename "${path}" | tr '.' ' ')" - fi - data+=("project-name=${name}") - fi } diff --git a/src/lib/post_export.sh b/src/lib/post_export.sh index fd48304..77cb561 100644 --- a/src/lib/post_export.sh +++ b/src/lib/post_export.sh @@ -2,18 +2,18 @@ # shellcheck shell=bash disable=SC2154 function post_export() { local curloptions - # post mapfile -t curloptions < <(for d in "$@"; do echo "--data" echo "$d" done) + # support file output if [[ ${args[--output]} ]]; then if ! mkdir -p "$(dirname "${args[--output]}")"; then error "unable to create parent directory for ${args[--output]}" fi - curloptions+=("--output") - curloptions+=("${args[--output]}") + curloptions+=("--output" "${args[--output]}") fi + # post if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then error "exporting ${args[project]} failed!" else diff --git a/src/lib/post_import.sh b/src/lib/post_import.sh index 245f8f5..a863ab2 100644 --- a/src/lib/post_import.sh +++ b/src/lib/post_import.sh @@ -1,15 +1,31 @@ # post to create-project endpoint and validate # shellcheck shell=bash disable=SC2154 function post_import() { - local curloptions - local projectid - local projectname - local rows - # post + local curloptions projectid projectname rows mapfile -t curloptions < <(for d in "$@"; do - echo "--form" + echo "--form-string" echo "$d" done) + # basic post data + if [[ ${file} == "-" ]]; then + curloptions+=("--form" "project-file=@-") + else + if ! path=$(readlink -e "${file}"); then + error "cannot open ${file} (no such file)!" + fi + curloptions+=("--form" "project-file=@${path}") + fi + if [[ ${args[--projectName]} ]]; then + curloptions+=("--form-string" "project-name=${args[--projectName]}") + else + if [[ ${file} == "-" ]]; then + name="Untitled" + else + name="$(basename "${path}" | tr '.' ' ')" + fi + curloptions+=("--form-string" "project-name=${name}") + fi + # post if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then error "importing ${args[file]} failed!" fi diff --git a/tests/delete.sh b/tests/delete.sh index 226f0be..01dee63 100644 --- a/tests/delete.sh +++ b/tests/delete.sh @@ -15,15 +15,15 @@ DATA # action cd "${tmpdir}" || exit 1 -orcli import csv "${t}.csv" -orcli list | grep "${t} csv" -orcli delete "${t} csv" -orcli list | grep "${t} csv" > "${t}.output" || exit 0 -orcli import csv "${t}.csv" -orcli import csv "${t}.csv" -orcli list | grep "${t} csv" -orcli delete --force "${t} csv" -orcli list | grep "${t} csv" >> "${t}.output" || exit 0 +orcli import csv "${t}.csv" --projectName "${t}" +orcli list | grep "${t}" +orcli delete "${t}" +orcli list | grep "${t}" > "${t}.output" || exit 0 +orcli import csv "${t}.csv" --projectName "${t}" +orcli import csv "${t}.csv" --projectName "${t}" +orcli list | grep "${t}" +orcli delete --force "${t}" +orcli list | grep "${t}" >> "${t}.output" || exit 0 # test diff -u "${t}.assert" "${t}.output" diff --git a/tests/export-tsv.sh b/tests/export-tsv.sh index f831c4f..a690e62 100644 --- a/tests/export-tsv.sh +++ b/tests/export-tsv.sh @@ -14,8 +14,8 @@ cp data/example.tsv "${tmpdir}/${t}.assert" # action cd "${tmpdir}" || exit 1 -orcli import csv "${t}.csv" -orcli export tsv "${t} csv" --output "${t}.output" +orcli import csv "${t}.csv" --projectName "${t}" +orcli export tsv "${t}" --output "${t}.output" # test diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv.sh b/tests/import-csv.sh index 4918d59..f208767 100644 --- a/tests/import-csv.sh +++ b/tests/import-csv.sh @@ -14,8 +14,8 @@ cp data/example.tsv "${tmpdir}/${t}.assert" # action cd "${tmpdir}" || exit 1 -orcli import csv "${t}.csv" -orcli export tsv "${t} csv" > "${t}.output" +orcli import csv "${t}.csv" --projectName "${t}" +orcli export tsv "${t}" > "${t}.output" # test diff -u "${t}.assert" "${t}.output" diff --git a/tests/info.sh b/tests/info.sh index da5ff48..68f25e4 100644 --- a/tests/info.sh +++ b/tests/info.sh @@ -18,8 +18,8 @@ DATA # action cd "${tmpdir}" || exit 1 -orcli import csv "${t}.csv" -orcli info "${t} csv" | jq -r .columns[] > "${t}.output" +orcli import csv "${t}.csv" --projectName "${t}" +orcli info "${t}" | jq -r .columns[] > "${t}.output" # test diff -u "${t}.assert" "${t}.output" diff --git a/tests/list.sh b/tests/list.sh index cb28e0d..9281908 100644 --- a/tests/list.sh +++ b/tests/list.sh @@ -11,7 +11,7 @@ cp data/example.csv "${tmpdir}/${t}.csv" # action cd "${tmpdir}" || exit 1 -orcli import csv "${t}.csv" +orcli import csv "${t}.csv" --projectName "${t}" orcli list > "${t}.output" # test From a7718283c9f02ac8efe5187a1a1c3bb808b211aa Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Fri, 13 Jan 2023 22:30:12 +0000 Subject: [PATCH 08/10] some more import csv tests --- orcli | 24 +++++++++++- src/bashly.yml | 6 ++- tests/data/example-iso-8859-1.csv | 4 ++ tests/delete.sh | 3 +- tests/import-csv-blankCellsAsStrings.sh | 49 ++++++++++++++++++++++++ tests/import-csv-columnNames.sh | 27 +++++++++++++ tests/import-csv-encoding.sh | 26 +++++++++++++ tests/import-csv-guessCellValueTypes.sh | 31 +++++++++++++++ tests/import-csv-headerLines.sh | 27 +++++++++++++ tests/import-csv-ignoreLines.sh | 25 ++++++++++++ tests/import-csv-ignoreQuoteCharacter.sh | 32 ++++++++++++++++ tests/import-csv-includeFileSources.sh | 30 +++++++++++++++ 12 files changed, 279 insertions(+), 5 deletions(-) create mode 100644 tests/data/example-iso-8859-1.csv create mode 100644 tests/import-csv-blankCellsAsStrings.sh create mode 100644 tests/import-csv-columnNames.sh create mode 100644 tests/import-csv-encoding.sh create mode 100644 tests/import-csv-guessCellValueTypes.sh create mode 100644 tests/import-csv-headerLines.sh create mode 100644 tests/import-csv-ignoreLines.sh create mode 100644 tests/import-csv-ignoreQuoteCharacter.sh create mode 100644 tests/import-csv-includeFileSources.sh diff --git a/orcli b/orcli index 9d098bd..5f94b22 100755 --- a/orcli +++ b/orcli @@ -245,7 +245,7 @@ orcli_import_csv_usage() { # :flag.usage printf " %s\n" "--columnNames COLUMNNAMES" - printf " set column names (comma separated)\n" + printf " set column names (comma separated)\n hint: add --ignoreLines 1 to overwrite existing header row\n" echo # :flag.usage @@ -383,7 +383,7 @@ orcli_import_tsv_usage() { # :flag.usage printf " %s\n" "--columnNames COLUMNNAMES" - printf " set column names (comma separated)\n" + printf " set column names (comma separated)\n hint: add --ignoreLines 1 to overwrite existing header row\n" echo # :flag.usage @@ -2106,6 +2106,11 @@ orcli_import_csv_parse_requirements() { # :flag.case --columnNames) + # :flag.conflicts + if [[ -n "${args['--headerLines']:-}" ]]; then + printf "conflicting options: %s cannot be used with %s\n" "$key" "--headerLines" >&2 + exit 1 + fi # :flag.case_arg if [[ -n ${2+x} ]]; then @@ -2144,6 +2149,11 @@ orcli_import_csv_parse_requirements() { # :flag.case --headerLines) + # :flag.conflicts + if [[ -n "${args['--columnNames']:-}" ]]; then + printf "conflicting options: %s cannot be used with %s\n" "$key" "--columnNames" >&2 + exit 1 + fi # :flag.case_arg if [[ -n ${2+x} ]]; then @@ -2363,6 +2373,11 @@ orcli_import_tsv_parse_requirements() { # :flag.case --columnNames) + # :flag.conflicts + if [[ -n "${args['--headerLines']:-}" ]]; then + printf "conflicting options: %s cannot be used with %s\n" "$key" "--headerLines" >&2 + exit 1 + fi # :flag.case_arg if [[ -n ${2+x} ]]; then @@ -2401,6 +2416,11 @@ orcli_import_tsv_parse_requirements() { # :flag.case --headerLines) + # :flag.conflicts + if [[ -n "${args['--columnNames']:-}" ]]; then + printf "conflicting options: %s cannot be used with %s\n" "$key" "--columnNames" >&2 + exit 1 + fi # :flag.case_arg if [[ -n ${2+x} ]]; then diff --git a/src/bashly.yml b/src/bashly.yml index 206af03..be7069a 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -75,8 +75,11 @@ commands: help: store blank cells as empty strings instead of nulls - &columnNames long: --columnNames - help: set column names (comma separated) + help: |- + set column names (comma separated) + hint: add --ignoreLines 1 to overwrite existing header row arg: columnNames + conflicts: [--headerLines] - &encoding_import long: --encoding help: set character encoding @@ -89,6 +92,7 @@ commands: help: parse x line(s) as column headers arg: headerLines default: "1" + conflicts: [--columnNames] - &ignoreLines long: --ignoreLines help: ignore first x line(s) at beginning of file diff --git a/tests/data/example-iso-8859-1.csv b/tests/data/example-iso-8859-1.csv new file mode 100644 index 0000000..c83fb4c --- /dev/null +++ b/tests/data/example-iso-8859-1.csv @@ -0,0 +1,4 @@ +a,b,c +1,2,3 +ä,é,ß +$,\,' diff --git a/tests/delete.sh b/tests/delete.sh index 01dee63..f80923f 100644 --- a/tests/delete.sh +++ b/tests/delete.sh @@ -10,8 +10,7 @@ trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 cp data/example.csv "${tmpdir}/${t}.csv" # assertion (empty file) -cat << "DATA" > "${tmpdir}/${t}.assert" -DATA +touch "${tmpdir}/${t}.assert" # action cd "${tmpdir}" || exit 1 diff --git a/tests/import-csv-blankCellsAsStrings.sh b/tests/import-csv-blankCellsAsStrings.sh new file mode 100644 index 0000000..eb2e185 --- /dev/null +++ b/tests/import-csv-blankCellsAsStrings.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +t="import-csv-blankCellsAsStrings" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +a,b,c +1,2,3 +0,,0 +$,\,' +DATA + +cat << "DATA" > "${tmpdir}/${t}.transform" +[ + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "b", + "expression": "grel:isNull(value)", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + } +] +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +1 false 3 +0 false 0 +$ false ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --blankCellsAsStrings +orcli transform "${t}" "${tmpdir}/${t}.transform" +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-columnNames.sh b/tests/import-csv-columnNames.sh new file mode 100644 index 0000000..a218586 --- /dev/null +++ b/tests/import-csv-columnNames.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +t="import-csv-columnNames" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +foo bar baz +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --columnNames "foo,bar,baz" +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-encoding.sh b/tests/import-csv-encoding.sh new file mode 100644 index 0000000..1ab8fd3 --- /dev/null +++ b/tests/import-csv-encoding.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +t="import-csv-encoding" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example-iso-8859-1.csv "${tmpdir}/${t}.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +1 2 3 +ä é ß +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --encoding "ISO-8859-1" +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-guessCellValueTypes.sh b/tests/import-csv-guessCellValueTypes.sh new file mode 100644 index 0000000..435126d --- /dev/null +++ b/tests/import-csv-guessCellValueTypes.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +t="import-csv-guessCellValueTypes" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +a,b,c +1,2,3 +01,02,03 +$,\,' +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +1 2 3 +1 2 3 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --guessCellValueTypes +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-headerLines.sh b/tests/import-csv-headerLines.sh new file mode 100644 index 0000000..bbd5d98 --- /dev/null +++ b/tests/import-csv-headerLines.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +t="import-csv-headerLines" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +Column 1 Column 2 Column 3 +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --headerLines 0 +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-ignoreLines.sh b/tests/import-csv-ignoreLines.sh new file mode 100644 index 0000000..71f4c01 --- /dev/null +++ b/tests/import-csv-ignoreLines.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +t="import-csv-ignoreLines" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +1 2 3 +0 0 0 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --ignoreLines 1 +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-ignoreQuoteCharacter.sh b/tests/import-csv-ignoreQuoteCharacter.sh new file mode 100644 index 0000000..53c7336 --- /dev/null +++ b/tests/import-csv-ignoreQuoteCharacter.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +t="import-csv-ignoreQuoteCharacter" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +a,b,c +1,"2,0",3 +0,0,0 +$,\,' +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c Column 4 +1 2 0 3 +0 0 0 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +# OpenRefine 4.x fails without headerLines manually set +orcli import csv "${t}.csv" --projectName "${t}" --ignoreQuoteCharacter --headerLines 1 +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-includeFileSources.sh b/tests/import-csv-includeFileSources.sh new file mode 100644 index 0000000..e0c6500 --- /dev/null +++ b/tests/import-csv-includeFileSources.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +t="import-csv-includeFileSources" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}-1.csv" +cp data/example.csv "${tmpdir}/${t}-2.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +File a b c +import-csv-includeFileSources-1.csv 1 2 3 +import-csv-includeFileSources-1.csv 0 0 0 +import-csv-includeFileSources-1.csv $ \ ' +import-csv-includeFileSources-2.csv 1 2 3 +import-csv-includeFileSources-2.csv 0 0 0 +import-csv-includeFileSources-2.csv $ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}-1.csv" "${t}-2.csv" --projectName "${t}" --includeFileSources +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" From 99424d221d0fd4aa6cb9a908dec42db698ee0789 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Sat, 14 Jan 2023 22:04:56 +0000 Subject: [PATCH 09/10] further more import csv tests --- tests/import-csv-includeArchiveFileName.sh | 30 +++++++++++++++++++++ tests/import-csv-limit.sh | 25 +++++++++++++++++ tests/import-csv-quoteCharacter.sh | 31 ++++++++++++++++++++++ 3 files changed, 86 insertions(+) create mode 100644 tests/import-csv-includeArchiveFileName.sh create mode 100644 tests/import-csv-limit.sh create mode 100644 tests/import-csv-quoteCharacter.sh diff --git a/tests/import-csv-includeArchiveFileName.sh b/tests/import-csv-includeArchiveFileName.sh new file mode 100644 index 0000000..157ecff --- /dev/null +++ b/tests/import-csv-includeArchiveFileName.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +t="import-csv-includeArchiveFileName" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}-1.csv" +cp data/example.csv "${tmpdir}/${t}-2.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +Archive a b c +Untitled.zip 1 2 3 +Untitled.zip 0 0 0 +Untitled.zip $ \ ' +Untitled.zip 1 2 3 +Untitled.zip 0 0 0 +Untitled.zip $ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}-1.csv" "${t}-2.csv" --projectName "${t}" --includeArchiveFileName +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-limit.sh b/tests/import-csv-limit.sh new file mode 100644 index 0000000..20746c7 --- /dev/null +++ b/tests/import-csv-limit.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +t="import-csv-limit" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +1 2 3 +0 0 0 +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --limit 2 +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-quoteCharacter.sh b/tests/import-csv-quoteCharacter.sh new file mode 100644 index 0000000..fc5d65d --- /dev/null +++ b/tests/import-csv-quoteCharacter.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +t="import-csv-quoteCharacter" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +a,b,c +1,%2,0%,3 +0,0,0 +$,\,' +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +1 2,0 3 +0 0 0 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --quoteCharacter "%" +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" From 444fd0320bdbae8f24d9d0c12463c90f58655a15 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Sat, 14 Jan 2023 23:43:25 +0000 Subject: [PATCH 10/10] final import csv tests and examples --- orcli | 9 +++++++-- src/bashly.yml | 13 +++++++++++++ tests/import-csv-projectTags.sh | 25 +++++++++++++++++++++++++ tests/import-csv-quiet.sh | 20 ++++++++++++++++++++ tests/import-csv-skipBlankRows.sh | 30 ++++++++++++++++++++++++++++++ tests/import-csv-skipDataLines.sh | 30 ++++++++++++++++++++++++++++++ tests/import-csv-trimStrings.sh | 31 +++++++++++++++++++++++++++++++ tests/import-csv-unicode.sh | 31 +++++++++++++++++++++++++++++++ 8 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 tests/import-csv-projectTags.sh create mode 100644 tests/import-csv-quiet.sh create mode 100644 tests/import-csv-skipBlankRows.sh create mode 100644 tests/import-csv-skipDataLines.sh create mode 100644 tests/import-csv-trimStrings.sh create mode 100644 tests/import-csv-unicode.sh diff --git a/orcli b/orcli index 5f94b22..e98dcc9 100755 --- a/orcli +++ b/orcli @@ -74,6 +74,7 @@ orcli_usage() { printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n" printf " orcli export tsv \"duplicates\"\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" + printf " orcli delete \"duplicates\"\n" printf " orcli run --interactive\n" printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n orcli export tsv \"duplicates\"\n EOF\n" echo @@ -165,7 +166,9 @@ orcli_delete_usage() { # :command.usage_examples printf "%s\n" "Examples:" printf " orcli delete \"duplicates\"\n" + printf " orcli delete \"duplicates\" --force\n" printf " orcli delete 1234567890123\n" + printf " for p in $(orcli list); do orcli delete ${p:0:13}; done\n" echo fi @@ -343,7 +346,7 @@ orcli_import_csv_usage() { printf " orcli import csv \"file1\" \"file2\"\n" printf " head -n 100 \"file\" | orcli import csv\n" printf " orcli import csv \"https://git.io/fj5hF\"\n" - printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --encoding \"ISO-8859-1\" \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n" + printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n --projectTags \"test,urgent\"\n" echo fi @@ -481,7 +484,7 @@ orcli_import_tsv_usage() { printf " orcli import tsv \"file1\" \"file2\"\n" printf " head -n 100 \"file\" | orcli import tsv\n" printf " orcli import tsv \"https://git.io/fj5hF\"\n" - printf " orcli import tsv \"file\" \\\\\n --separator \";\" \\\\\n --encoding \"ISO-8859-1\" \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n" + printf " orcli import tsv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n --projectTags \"test,urgent\"\n" echo fi @@ -554,6 +557,7 @@ orcli_info_usage() { printf "%s\n" "Examples:" printf " orcli info \"duplicates\"\n" printf " orcli info 1234567890123\n" + printf " orcli info \"duplicates\" | jq -r .columns[]\n" echo fi @@ -733,6 +737,7 @@ orcli_export_tsv_usage() { printf "%s\n" "Examples:" printf " orcli export tsv \"duplicates\"\n" printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n" + printf " orcli export tsv \"duplicates\" --encoding \"ISO-8859-1\"\n" echo fi diff --git a/src/bashly.yml b/src/bashly.yml index be7069a..6bc7c85 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -19,6 +19,7 @@ examples: - orcli transform "duplicates" "https://git.io/fj5ju" - orcli export tsv "duplicates" - orcli export tsv "duplicates" --output "duplicates.tsv" + - orcli delete "duplicates" - orcli run --interactive - |- orcli run << EOF @@ -50,7 +51,9 @@ commands: help: suppress log output, print errors only examples: - orcli delete "duplicates" + - orcli delete "duplicates" --force - orcli delete 1234567890123 + - for p in $(orcli list); do orcli delete ${p:0:13}; done - name: import help: commands to create OpenRefine projects from files or URLs @@ -145,9 +148,13 @@ commands: - |- orcli import csv "file" \\\\ --separator ";" \\\\ + --columnNames "foo,bar,baz" \\\\ + --ignoreLines 1 \\\\ --encoding "ISO-8859-1" \\\\ + --limit 100 \\\\ --trimStrings \\\\ --projectName "duplicates" + --projectTags "test,urgent" - name: tsv help: import tab-separated values (TSV) @@ -179,9 +186,13 @@ commands: - |- orcli import tsv "file" \\\\ --separator ";" \\\\ + --columnNames "foo,bar,baz" \\\\ + --ignoreLines 1 \\\\ --encoding "ISO-8859-1" \\\\ + --limit 100 \\\\ --trimStrings \\\\ --projectName "duplicates" + --projectTags "test,urgent" - name: list help: list projects on OpenRefine server @@ -193,6 +204,7 @@ commands: examples: - orcli info "duplicates" - orcli info 1234567890123 + - orcli info "duplicates" | jq -r .columns[] - name: test help: run functional tests on tmp OpenRefine workspace @@ -232,6 +244,7 @@ commands: examples: - orcli export tsv "duplicates" - orcli export tsv "duplicates" --output "duplicates.tsv" + - orcli export tsv "duplicates" --encoding "ISO-8859-1" - name: run help: run tmp OpenRefine workspace and execute shell script(s) diff --git a/tests/import-csv-projectTags.sh b/tests/import-csv-projectTags.sh new file mode 100644 index 0000000..9069398 --- /dev/null +++ b/tests/import-csv-projectTags.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +t="import-csv-projectTags" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +foo +bar +baz +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --projectTags "foo,bar,baz" +orcli info "${t}" | jq -r .tags[] > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-quiet.sh b/tests/import-csv-quiet.sh new file mode 100644 index 0000000..087e56a --- /dev/null +++ b/tests/import-csv-quiet.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +t="import-csv-quiet" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}.csv" + +# assertion (empty file) +touch "${tmpdir}/${t}.assert" + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --quiet &> "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-skipBlankRows.sh b/tests/import-csv-skipBlankRows.sh new file mode 100644 index 0000000..7fa6aec --- /dev/null +++ b/tests/import-csv-skipBlankRows.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +t="import-csv-skipBlankRows" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +a,b,c +1,2,3 +,, +$,\,' +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +1 2 3 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --skipBlankRows +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-skipDataLines.sh b/tests/import-csv-skipDataLines.sh new file mode 100644 index 0000000..6d3a92f --- /dev/null +++ b/tests/import-csv-skipDataLines.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +t="import-csv-skipDataLines" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +0 0 0 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --skipDataLines 1 +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-trimStrings.sh b/tests/import-csv-trimStrings.sh new file mode 100644 index 0000000..7596344 --- /dev/null +++ b/tests/import-csv-trimStrings.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +t="import-csv-trimStrings" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +a,b,c +1 , 2 , 3 +0,0,0 +$,\,' +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" --trimStrings +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output" diff --git a/tests/import-csv-unicode.sh b/tests/import-csv-unicode.sh new file mode 100644 index 0000000..2f01e67 --- /dev/null +++ b/tests/import-csv-unicode.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +t="import-csv-unicode biểu tượng cảm xúc ⛲" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cat << "DATA" > "${tmpdir}/${t}.csv" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +⌨ code meaning +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" +orcli export tsv "${t}" > "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output"