From 8ce5261a128487c84319c9e79d3d4661a4be7ce1 Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Sat, 16 Dec 2023 03:35:58 +0100 Subject: [PATCH 1/2] fix tests --- tests/transform-bracket.sh | 6 +++--- tests/transform.sh | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/transform-bracket.sh b/tests/transform-bracket.sh index f35cbbc..190dee1 100644 --- a/tests/transform-bracket.sh +++ b/tests/transform-bracket.sh @@ -40,9 +40,9 @@ DATA # action cd "${tmpdir}" || exit 1 -orcli import csv "https://git.io/fj5hF" --projectName "duplicates" -orcli transform "duplicates" "${t}.history" -orcli export tsv "duplicates" --output "${t}.output" +orcli import csv "https://git.io/fj5hF" --projectName "${t}" +orcli transform "${t}" "${t}.history" +orcli export tsv "${t}" --output "${t}.output" # test diff -u "${t}.assert" "${t}.output" diff --git a/tests/transform.sh b/tests/transform.sh index d260b6f..db4caf9 100644 --- a/tests/transform.sh +++ b/tests/transform.sh @@ -14,9 +14,9 @@ cp data/duplicates-history.json "${tmpdir}/${t}.history" # action cd "${tmpdir}" || exit 1 -orcli import csv "https://git.io/fj5hF" --projectName "duplicates" -orcli transform "duplicates" "${t}.history" -orcli export tsv "duplicates" --output "${t}.output" +orcli import csv "https://git.io/fj5hF" --projectName "${t}" +orcli transform "${t}" "${t}.history" +orcli export tsv "${t}" --output "${t}.output" # test -diff -u "${t}.assert" "${t}.output" \ No newline at end of file +diff -u "${t}.assert" "${t}.output" From dd41bfa192152154cf29cc97a1e8d5efb604d10d Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Sat, 16 Dec 2023 03:37:06 +0100 Subject: [PATCH 2/2] export csv --- README.md | 2 +- help/README.md | 3 +- help/export_csv.md | 60 +++++++ orcli | 319 +++++++++++++++++++++++++++++++++- src/bashly.yml | 37 +++- src/export_csv_command.sh | 34 ++++ src/lib/send_completions.sh | 10 +- tests/export-csv-separator.sh | 26 +++ 8 files changed, 478 insertions(+), 13 deletions(-) create mode 100644 help/export_csv.md create mode 100644 src/export_csv_command.sh create mode 100644 tests/export-csv-separator.sh diff --git a/README.md b/README.md index 4d05bbf..4e7251e 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org * transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file * orcli calls specific endpoints for each operation to provide improved error handling and logging * supports stdin, multiple files and URLs -* export to TSV, JSONL, ~~CSV, HTML, XLS, XLSX, ODS~~ +* export to CSV, TSV, JSONL, ~~HTML, XLS, XLSX, ODS~~ * [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML ## Requirements diff --git a/help/README.md b/help/README.md index d97de0a..c50e21a 100644 --- a/help/README.md +++ b/help/README.md @@ -1,9 +1,10 @@ -# orcli 0.3.0 +# orcli 0.4.0 ## command help screens - [completions](completions.md) - [delete](delete.md) +- [export csv](export_csv.md) - [export jsonl](export_jsonl.md) - [export template](export_template.md) - [export tsv](export_tsv.md) diff --git a/help/export_csv.md b/help/export_csv.md new file mode 100644 index 0000000..e1ad2b4 --- /dev/null +++ b/help/export_csv.md @@ -0,0 +1,60 @@ +# orcli export csv + +``` +orcli export csv - export comma-separated values (CSV) + +Usage: + orcli export csv PROJECT [OPTIONS] + orcli export csv --help | -h + +Options: + --separator SEPARATOR + character(s) that separates columns + Default: , + + --select COLUMNS + filter result set to one or more columns (comma separated) + example: --select "foo,bar,baz" + + --mode MODE + specify if project contains multi-row records + Allowed: rows, records + Default: rows + + --facets FACETS + filter result set by providing an OpenRefine facets config in json + Default: [] + + --output FILE + Write to file instead of stdout + + --encoding ENCODING + set character encoding + Default: UTF-8 + + --quiet, -q + suppress log output, print errors only + + --help, -h + Show this help + +Arguments: + PROJECT + project name or id + +Examples: + orcli export csv "duplicates" + orcli export csv "duplicates" --output "duplicates.tsv" + orcli export csv "duplicates" --separator ";" + orcli export csv "duplicates" --encoding "ISO-8859-1" + orcli export csv "duplicates" --select "name,email,purchase" + orcli export csv "duplicates" --facets '[ { "type": "text", "columnName": + "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": + "^Ben" } ]' + orcli export csv "duplicates" --facets '[{ "type": "list", "expression": + "grel:filter([\"gender\",\"purchase\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", + "columnName": "", "selection": [{"v": {"v": true}}] }]' + +``` + +code: [src/export_csv_command.sh](../src/export_csv_command.sh) diff --git a/orcli b/orcli index 26b328b..33b9e0a 100755 --- a/orcli +++ b/orcli @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# This script was generated by bashly 1.1.4 (https://bashly.dannyb.co) +# This script was generated by bashly 1.1.3 (https://bashly.dannyb.co) # Modifying it manually is not recommended # :wrapper.bash3_bouncer @@ -1020,6 +1020,7 @@ orcli_export_usage() { # :command.usage_commands printf "%s\n" "Commands:" printf " %s export JSON Lines / newline-delimited JSON\n" "jsonl " + printf " %s export comma-separated values (CSV)\n" "csv " printf " %s export tab-separated values (TSV)\n" "tsv " printf " %s export to any text format by providing your own GREL template\n" "template" echo @@ -1118,6 +1119,95 @@ orcli_export_jsonl_usage() { fi } +# :command.usage +orcli_export_csv_usage() { + if [[ -n $long_usage ]]; then + printf "orcli export csv - export comma-separated values (CSV)\n" + echo + + else + printf "orcli export csv - export comma-separated values (CSV)\n" + echo + + fi + + printf "%s\n" "Usage:" + printf " orcli export csv PROJECT [OPTIONS]\n" + printf " orcli export csv --help | -h\n" + echo + + # :command.long_usage + if [[ -n $long_usage ]]; then + printf "%s\n" "Options:" + + # :command.usage_flags + # :flag.usage + printf " %s\n" "--separator SEPARATOR" + printf " character(s) that separates columns\n" + printf " Default: ,\n" + echo + + # :flag.usage + printf " %s\n" "--select COLUMNS" + printf " filter result set to one or more columns (comma separated)\n example: --select \"foo,bar,baz\"\n" + echo + + # :flag.usage + printf " %s\n" "--mode MODE" + printf " specify if project contains multi-row records\n" + printf " Allowed: rows, records\n" + printf " Default: rows\n" + echo + + # :flag.usage + printf " %s\n" "--facets FACETS" + printf " filter result set by providing an OpenRefine facets config in json\n" + printf " Default: []\n" + echo + + # :flag.usage + printf " %s\n" "--output FILE" + printf " Write to file instead of stdout\n" + echo + + # :flag.usage + printf " %s\n" "--encoding ENCODING" + printf " set character encoding\n" + printf " Default: UTF-8\n" + echo + + # :flag.usage + printf " %s\n" "--quiet, -q" + printf " suppress log output, print errors only\n" + echo + + # :command.usage_fixed_flags + printf " %s\n" "--help, -h" + printf " Show this help\n" + echo + + # :command.usage_args + printf "%s\n" "Arguments:" + + # :argument.usage + printf " %s\n" "PROJECT" + printf " project name or id\n" + echo + + # :command.usage_examples + printf "%s\n" "Examples:" + printf " orcli export csv \"duplicates\"\n" + printf " orcli export csv \"duplicates\" --output \"duplicates.tsv\"\n" + printf " orcli export csv \"duplicates\" --separator \";\"\n" + printf " orcli export csv \"duplicates\" --encoding \"ISO-8859-1\"\n" + printf " orcli export csv \"duplicates\" --select \"name,email,purchase\"\n" + printf " orcli export csv \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n" + printf " orcli export csv \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n" + echo + + fi +} + # :command.usage orcli_export_tsv_usage() { if [[ -n $long_usage ]]; then @@ -1674,6 +1764,10 @@ send_completions() { echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )' echo $' ;;' echo $'' + echo $' \'export csv\'*\'--mode\')' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )' + echo $' ;;' + echo $'' echo $' \'export tsv\'*\'--mode\')' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )' echo $' ;;' @@ -1710,6 +1804,10 @@ send_completions() { echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' + echo $' \'export csv\'*)' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select --separator -h -q")" -- "$cur" )' + echo $' ;;' + echo $'' echo $' \'export tsv\'*)' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select -h -q")" -- "$cur" )' echo $' ;;' @@ -1731,7 +1829,7 @@ send_completions() { echo $' ;;' echo $'' echo $' \'export\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv jsonl template tsv")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'list\'*)' @@ -2477,6 +2575,46 @@ orcli_export_jsonl_command() { } +# :command.function +orcli_export_csv_command() { + # src/export_csv_command.sh + # shellcheck shell=bash + inspect_args + projectid="$(get_id "${args[project]}")" + separator="${args[--separator]:-,}" + + # assemble specific post data (some options require json format) + data+=("project=${projectid}") + data+=("format=csv") + options='{ ' + options+="\"separator\": \"${separator}\"" + if [[ ${args[--encoding]} ]]; then + options+=', ' + options+="\"encoding\": \"${args[--encoding]}\"" + fi + if [[ ${args[--select]} ]]; then + options+=', ' + options+='"columns": [' + IFS=',' read -ra columns <<< "${args[--select]}" + options+='{"name":"' + options+="${columns[0]}" + options+='"}' + for cn in "${columns[@]:1}"; do + options+=', ' + options+='{"name":"' + options+="${cn}" + options+='"}' + done + options+="]" + fi + options+=' }' + data+=("options=${options}") + + # call post_export function to post data and validate results + post_export "${data[@]}" + +} + # :command.function orcli_export_tsv_command() { # src/export_tsv_command.sh @@ -4196,7 +4334,7 @@ orcli_sort_columns_parse_requirements() { if [[ -z ${args['--first']+x} ]]; then args['--first']="\"$2\"" else - args['--first']="${args['--first']} \"$2\"" + args['--first']="${args[--first]} \"$2\"" fi shift shift @@ -4381,6 +4519,13 @@ orcli_export_parse_requirements() { shift $# ;; + csv) + action="csv" + shift + orcli_export_csv_parse_requirements "$@" + shift $# + ;; + tsv) action="tsv" shift @@ -4580,6 +4725,171 @@ orcli_export_jsonl_parse_requirements() { } +# :command.parse_requirements +orcli_export_csv_parse_requirements() { + # :command.fixed_flags_filter + while [[ $# -gt 0 ]]; do + case "${1:-}" in + --help | -h) + long_usage=yes + orcli_export_csv_usage + exit + ;; + + *) + break + ;; + + esac + done + + # :command.command_filter + action="export csv" + + # :command.parse_requirements_while + while [[ $# -gt 0 ]]; do + key="$1" + case "$key" in + # :flag.case + --separator) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--separator']="$2" + shift + shift + else + printf "%s\n" "--separator requires an argument: --separator SEPARATOR" >&2 + exit 1 + fi + ;; + + # :flag.case + --select) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--select']="$2" + shift + shift + else + printf "%s\n" "--select requires an argument: --select COLUMNS" >&2 + exit 1 + fi + ;; + + # :flag.case + --mode) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--mode']="$2" + shift + shift + else + printf "%s\n" "--mode requires an argument: --mode MODE" >&2 + exit 1 + fi + ;; + + # :flag.case + --facets) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--facets']="$2" + shift + shift + else + printf "%s\n" "--facets requires an argument: --facets FACETS" >&2 + exit 1 + fi + ;; + + # :flag.case + --output) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--output']="$2" + shift + shift + else + printf "%s\n" "--output requires an argument: --output FILE" >&2 + exit 1 + fi + ;; + + # :flag.case + --encoding) + + # :flag.case_arg + if [[ -n ${2+x} ]]; then + + args['--encoding']="$2" + shift + shift + else + printf "%s\n" "--encoding requires an argument: --encoding ENCODING" >&2 + exit 1 + fi + ;; + + # :flag.case + --quiet | -q) + + # :flag.case_no_arg + args['--quiet']=1 + shift + ;; + + -?*) + printf "invalid option: %s\n" "$key" >&2 + exit 1 + ;; + + *) + # :command.parse_requirements_case + # :command.parse_requirements_case_simple + if [[ -z ${args['project']+x} ]]; then + + args['project']=$1 + shift + else + printf "invalid argument: %s\n" "$key" >&2 + exit 1 + fi + + ;; + + esac + done + + # :command.required_args_filter + if [[ -z ${args['project']+x} ]]; then + printf "missing required argument: PROJECT\nusage: orcli export csv PROJECT [OPTIONS]\n" >&2 + exit 1 + fi + + # :command.default_assignments + [[ -n ${args['--separator']:-} ]] || args['--separator']="," + [[ -n ${args['--mode']:-} ]] || args['--mode']="rows" + [[ -n ${args['--facets']:-} ]] || args['--facets']="[]" + [[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8" + + # :command.whitelist_filter + if [[ ${args['--mode']:-} ]] && [[ ! ${args['--mode']:-} =~ ^(rows|records)$ ]]; then + printf "%s\n" "--mode must be one of: rows, records" >&2 + exit 1 + fi + +} + # :command.parse_requirements orcli_export_tsv_parse_requirements() { # :command.fixed_flags_filter @@ -5015,7 +5325,7 @@ orcli_run_parse_requirements() { # :command.initialize initialize() { - version="0.3.0" + version="0.4.0" long_usage='' set -e @@ -5052,6 +5362,7 @@ run() { "transform") orcli_transform_command ;; "export") orcli_export_command ;; "export jsonl") orcli_export_jsonl_command ;; + "export csv") orcli_export_csv_command ;; "export tsv") orcli_export_tsv_command ;; "export template") orcli_export_template_command ;; "run") orcli_run_command ;; diff --git a/src/bashly.yml b/src/bashly.yml index bced36c..0edb2ad 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -1,6 +1,6 @@ name: orcli help: OpenRefine command-line interface written in Bash -version: 0.3.0 +version: 0.4.0 footer: https://github.com/opencultureconsulting/orcli dependencies: @@ -373,16 +373,41 @@ commands: orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]' - |- orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]' + + - name: csv + help: export comma-separated values (CSV) + args: + - *project + flags: + - *separator + - &select + long: --select + help: |- + filter result set to one or more columns (comma separated) + example: --select "foo,bar,baz" + arg: columns + - *mode + - *facets + - *output + - *encoding_export + - *quiet + examples: + - orcli export csv "duplicates" + - orcli export csv "duplicates" --output "duplicates.tsv" + - orcli export csv "duplicates" --separator ";" + - orcli export csv "duplicates" --encoding "ISO-8859-1" + - orcli export csv "duplicates" --select "name,email,purchase" + - |- + orcli export csv "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]' + - |- + orcli export csv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]' + - name: tsv help: export tab-separated values (TSV) args: - *project flags: - - long: --select - help: |- - filter result set to one or more columns (comma separated) - example: --select "foo,bar,baz" - arg: columns + - *select - *mode - *facets - *output diff --git a/src/export_csv_command.sh b/src/export_csv_command.sh new file mode 100644 index 0000000..04fad58 --- /dev/null +++ b/src/export_csv_command.sh @@ -0,0 +1,34 @@ +# shellcheck shell=bash +inspect_args +projectid="$(get_id "${args[project]}")" +separator="${args[--separator]:-,}" + +# assemble specific post data (some options require json format) +data+=("project=${projectid}") +data+=("format=csv") +options='{ ' +options+="\"separator\": \"${separator}\"" +if [[ ${args[--encoding]} ]]; then + options+=', ' + options+="\"encoding\": \"${args[--encoding]}\"" +fi +if [[ ${args[--select]} ]]; then + options+=', ' + options+='"columns": [' + IFS=',' read -ra columns <<< "${args[--select]}" + options+='{"name":"' + options+="${columns[0]}" + options+='"}' + for cn in "${columns[@]:1}"; do + options+=', ' + options+='{"name":"' + options+="${cn}" + options+='"}' + done + options+="]" +fi +options+=' }' +data+=("options=${options}") + +# call post_export function to post data and validate results +post_export "${data[@]}" diff --git a/src/lib/send_completions.sh b/src/lib/send_completions.sh index 8651a84..3299b99 100644 --- a/src/lib/send_completions.sh +++ b/src/lib/send_completions.sh @@ -38,6 +38,10 @@ send_completions() { echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )' echo $' ;;' echo $'' + echo $' \'export csv\'*\'--mode\')' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )' + echo $' ;;' + echo $'' echo $' \'export tsv\'*\'--mode\')' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )' echo $' ;;' @@ -74,6 +78,10 @@ send_completions() { echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )' echo $' ;;' echo $'' + echo $' \'export csv\'*)' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select --separator -h -q")" -- "$cur" )' + echo $' ;;' + echo $'' echo $' \'export tsv\'*)' echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select -h -q")" -- "$cur" )' echo $' ;;' @@ -95,7 +103,7 @@ send_completions() { echo $' ;;' echo $'' echo $' \'export\'*)' - echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )' + echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv jsonl template tsv")" -- "$cur" )' echo $' ;;' echo $'' echo $' \'list\'*)' diff --git a/tests/export-csv-separator.sh b/tests/export-csv-separator.sh new file mode 100644 index 0000000..765c185 --- /dev/null +++ b/tests/export-csv-separator.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +t="export-csv-separator" + +# create tmp directory +tmpdir="$(mktemp -d)" +trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15 + +# input +cp data/example.csv "${tmpdir}/${t}.csv" + +# assertion +cat << "DATA" > "${tmpdir}/${t}.assert" +a;b;c +1;2;3 +0;0;0 +$;\;' +DATA + +# action +cd "${tmpdir}" || exit 1 +orcli import csv "${t}.csv" --projectName "${t}" +orcli export csv "${t}" --separator ";" --output "${t}.output" + +# test +diff -u "${t}.assert" "${t}.output"