Merge pull request #125 from opencultureconsulting/30-export-csv

export csv
This commit is contained in:
Felix Lohmeier 2023-12-16 03:38:00 +01:00 committed by GitHub
commit 943ce2ab17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 485 additions and 20 deletions

View File

@ -17,7 +17,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
* orcli calls specific endpoints for each operation to provide improved error handling and logging
* supports stdin, multiple files and URLs
* export to TSV, JSONL, ~~CSV, HTML, XLS, XLSX, ODS~~
* export to CSV, TSV, JSONL, ~~HTML, XLS, XLSX, ODS~~
* [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML
## Requirements

View File

@ -1,9 +1,10 @@
# orcli 0.3.0
# orcli 0.4.0
## command help screens
- [completions](completions.md)
- [delete](delete.md)
- [export csv](export_csv.md)
- [export jsonl](export_jsonl.md)
- [export template](export_template.md)
- [export tsv](export_tsv.md)

60
help/export_csv.md Normal file
View File

@ -0,0 +1,60 @@
# orcli export csv
```
orcli export csv - export comma-separated values (CSV)
Usage:
orcli export csv PROJECT [OPTIONS]
orcli export csv --help | -h
Options:
--separator SEPARATOR
character(s) that separates columns
Default: ,
--select COLUMNS
filter result set to one or more columns (comma separated)
example: --select "foo,bar,baz"
--mode MODE
specify if project contains multi-row records
Allowed: rows, records
Default: rows
--facets FACETS
filter result set by providing an OpenRefine facets config in json
Default: []
--output FILE
Write to file instead of stdout
--encoding ENCODING
set character encoding
Default: UTF-8
--quiet, -q
suppress log output, print errors only
--help, -h
Show this help
Arguments:
PROJECT
project name or id
Examples:
orcli export csv "duplicates"
orcli export csv "duplicates" --output "duplicates.tsv"
orcli export csv "duplicates" --separator ";"
orcli export csv "duplicates" --encoding "ISO-8859-1"
orcli export csv "duplicates" --select "name,email,purchase"
orcli export csv "duplicates" --facets '[ { "type": "text", "columnName":
"name", "mode": "regex", "caseSensitive": false, "invert": false, "query":
"^Ben" } ]'
orcli export csv "duplicates" --facets '[{ "type": "list", "expression":
"grel:filter([\"gender\",\"purchase\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0",
"columnName": "", "selection": [{"v": {"v": true}}] }]'
```
code: [src/export_csv_command.sh](../src/export_csv_command.sh)

319
orcli
View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# This script was generated by bashly 1.1.4 (https://bashly.dannyb.co)
# This script was generated by bashly 1.1.3 (https://bashly.dannyb.co)
# Modifying it manually is not recommended
# :wrapper.bash3_bouncer
@ -1020,6 +1020,7 @@ orcli_export_usage() {
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s export JSON Lines / newline-delimited JSON\n" "jsonl "
printf " %s export comma-separated values (CSV)\n" "csv "
printf " %s export tab-separated values (TSV)\n" "tsv "
printf " %s export to any text format by providing your own GREL template\n" "template"
echo
@ -1118,6 +1119,95 @@ orcli_export_jsonl_usage() {
fi
}
# :command.usage
orcli_export_csv_usage() {
if [[ -n $long_usage ]]; then
printf "orcli export csv - export comma-separated values (CSV)\n"
echo
else
printf "orcli export csv - export comma-separated values (CSV)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli export csv PROJECT [OPTIONS]\n"
printf " orcli export csv --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--separator SEPARATOR"
printf " character(s) that separates columns\n"
printf " Default: ,\n"
echo
# :flag.usage
printf " %s\n" "--select COLUMNS"
printf " filter result set to one or more columns (comma separated)\n example: --select \"foo,bar,baz\"\n"
echo
# :flag.usage
printf " %s\n" "--mode MODE"
printf " specify if project contains multi-row records\n"
printf " Allowed: rows, records\n"
printf " Default: rows\n"
echo
# :flag.usage
printf " %s\n" "--facets FACETS"
printf " filter result set by providing an OpenRefine facets config in json\n"
printf " Default: []\n"
echo
# :flag.usage
printf " %s\n" "--output FILE"
printf " Write to file instead of stdout\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
printf " Default: UTF-8\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli export csv \"duplicates\"\n"
printf " orcli export csv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli export csv \"duplicates\" --separator \";\"\n"
printf " orcli export csv \"duplicates\" --encoding \"ISO-8859-1\"\n"
printf " orcli export csv \"duplicates\" --select \"name,email,purchase\"\n"
printf " orcli export csv \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
printf " orcli export csv \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
echo
fi
}
# :command.usage
orcli_export_tsv_usage() {
if [[ -n $long_usage ]]; then
@ -1674,6 +1764,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
@ -1710,6 +1804,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select --separator -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select -h -q")" -- "$cur" )'
echo $' ;;'
@ -1731,7 +1829,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'export\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv jsonl template tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'list\'*)'
@ -2477,6 +2575,46 @@ orcli_export_jsonl_command() {
}
# :command.function
orcli_export_csv_command() {
# src/export_csv_command.sh
# shellcheck shell=bash
inspect_args
projectid="$(get_id "${args[project]}")"
separator="${args[--separator]:-,}"
# assemble specific post data (some options require json format)
data+=("project=${projectid}")
data+=("format=csv")
options='{ '
options+="\"separator\": \"${separator}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--select]} ]]; then
options+=', '
options+='"columns": ['
IFS=',' read -ra columns <<< "${args[--select]}"
options+='{"name":"'
options+="${columns[0]}"
options+='"}'
for cn in "${columns[@]:1}"; do
options+=', '
options+='{"name":"'
options+="${cn}"
options+='"}'
done
options+="]"
fi
options+=' }'
data+=("options=${options}")
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_export_tsv_command() {
# src/export_tsv_command.sh
@ -4196,7 +4334,7 @@ orcli_sort_columns_parse_requirements() {
if [[ -z ${args['--first']+x} ]]; then
args['--first']="\"$2\""
else
args['--first']="${args['--first']} \"$2\""
args['--first']="${args[--first]} \"$2\""
fi
shift
shift
@ -4381,6 +4519,13 @@ orcli_export_parse_requirements() {
shift $#
;;
csv)
action="csv"
shift
orcli_export_csv_parse_requirements "$@"
shift $#
;;
tsv)
action="tsv"
shift
@ -4580,6 +4725,171 @@ orcli_export_jsonl_parse_requirements() {
}
# :command.parse_requirements
orcli_export_csv_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_export_csv_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="export csv"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--separator)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--separator']="$2"
shift
shift
else
printf "%s\n" "--separator requires an argument: --separator SEPARATOR" >&2
exit 1
fi
;;
# :flag.case
--select)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--select']="$2"
shift
shift
else
printf "%s\n" "--select requires an argument: --select COLUMNS" >&2
exit 1
fi
;;
# :flag.case
--mode)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--mode']="$2"
shift
shift
else
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
exit 1
fi
;;
# :flag.case
--facets)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--facets']="$2"
shift
shift
else
printf "%s\n" "--facets requires an argument: --facets FACETS" >&2
exit 1
fi
;;
# :flag.case
--output)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--output']="$2"
shift
shift
else
printf "%s\n" "--output requires an argument: --output FILE" >&2
exit 1
fi
;;
# :flag.case
--encoding)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--encoding']="$2"
shift
shift
else
printf "%s\n" "--encoding requires an argument: --encoding ENCODING" >&2
exit 1
fi
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
if [[ -z ${args['project']+x} ]]; then
args['project']=$1
shift
else
printf "invalid argument: %s\n" "$key" >&2
exit 1
fi
;;
esac
done
# :command.required_args_filter
if [[ -z ${args['project']+x} ]]; then
printf "missing required argument: PROJECT\nusage: orcli export csv PROJECT [OPTIONS]\n" >&2
exit 1
fi
# :command.default_assignments
[[ -n ${args['--separator']:-} ]] || args['--separator']=","
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
# :command.whitelist_filter
if [[ ${args['--mode']:-} ]] && [[ ! ${args['--mode']:-} =~ ^(rows|records)$ ]]; then
printf "%s\n" "--mode must be one of: rows, records" >&2
exit 1
fi
}
# :command.parse_requirements
orcli_export_tsv_parse_requirements() {
# :command.fixed_flags_filter
@ -5015,7 +5325,7 @@ orcli_run_parse_requirements() {
# :command.initialize
initialize() {
version="0.3.0"
version="0.4.0"
long_usage=''
set -e
@ -5052,6 +5362,7 @@ run() {
"transform") orcli_transform_command ;;
"export") orcli_export_command ;;
"export jsonl") orcli_export_jsonl_command ;;
"export csv") orcli_export_csv_command ;;
"export tsv") orcli_export_tsv_command ;;
"export template") orcli_export_template_command ;;
"run") orcli_run_command ;;

View File

@ -1,6 +1,6 @@
name: orcli
help: OpenRefine command-line interface written in Bash
version: 0.3.0
version: 0.4.0
footer: https://github.com/opencultureconsulting/orcli
dependencies:
@ -373,16 +373,41 @@ commands:
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
- |-
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: csv
help: export comma-separated values (CSV)
args:
- *project
flags:
- *separator
- &select
long: --select
help: |-
filter result set to one or more columns (comma separated)
example: --select "foo,bar,baz"
arg: columns
- *mode
- *facets
- *output
- *encoding_export
- *quiet
examples:
- orcli export csv "duplicates"
- orcli export csv "duplicates" --output "duplicates.tsv"
- orcli export csv "duplicates" --separator ";"
- orcli export csv "duplicates" --encoding "ISO-8859-1"
- orcli export csv "duplicates" --select "name,email,purchase"
- |-
orcli export csv "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
- |-
orcli export csv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: tsv
help: export tab-separated values (TSV)
args:
- *project
flags:
- long: --select
help: |-
filter result set to one or more columns (comma separated)
example: --select "foo,bar,baz"
arg: columns
- *select
- *mode
- *facets
- *output

34
src/export_csv_command.sh Normal file
View File

@ -0,0 +1,34 @@
# shellcheck shell=bash
inspect_args
projectid="$(get_id "${args[project]}")"
separator="${args[--separator]:-,}"
# assemble specific post data (some options require json format)
data+=("project=${projectid}")
data+=("format=csv")
options='{ '
options+="\"separator\": \"${separator}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--select]} ]]; then
options+=', '
options+='"columns": ['
IFS=',' read -ra columns <<< "${args[--select]}"
options+='{"name":"'
options+="${columns[0]}"
options+='"}'
for cn in "${columns[@]:1}"; do
options+=', '
options+='{"name":"'
options+="${cn}"
options+='"}'
done
options+="]"
fi
options+=' }'
data+=("options=${options}")
# call post_export function to post data and validate results
post_export "${data[@]}"

View File

@ -38,6 +38,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
@ -74,6 +78,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select --separator -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select -h -q")" -- "$cur" )'
echo $' ;;'
@ -95,7 +103,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'export\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv jsonl template tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'list\'*)'

View File

@ -0,0 +1,26 @@
#!/bin/bash
t="export-csv-separator"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a;b;c
1;2;3
0;0;0
$;\;'
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export csv "${t}" --separator ";" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -40,9 +40,9 @@ DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "${t}.history"
orcli export tsv "duplicates" --output "${t}.output"
orcli import csv "https://git.io/fj5hF" --projectName "${t}"
orcli transform "${t}" "${t}.history"
orcli export tsv "${t}" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -14,9 +14,9 @@ cp data/duplicates-history.json "${tmpdir}/${t}.history"
# action
cd "${tmpdir}" || exit 1
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "${t}.history"
orcli export tsv "duplicates" --output "${t}.output"
orcli import csv "https://git.io/fj5hF" --projectName "${t}"
orcli transform "${t}" "${t}.history"
orcli export tsv "${t}" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"