Compare commits

...

7 Commits
v0.2.2 ... main

Author SHA1 Message Date
Felix Lohmeier eb61b6a1c4 remove debug messages 2023-12-16 04:08:37 +01:00
Felix Lohmeier bfc43142fe remove debug messages 2023-12-16 04:07:55 +01:00
Felix Lohmeier 943ce2ab17
Merge pull request #125 from opencultureconsulting/30-export-csv
export csv
2023-12-16 03:38:00 +01:00
Felix Lohmeier dd41bfa192 export csv 2023-12-16 03:37:06 +01:00
Felix Lohmeier 8ce5261a12 fix tests 2023-12-16 03:35:58 +01:00
Felix Lohmeier 6dfb763b9f
Merge pull request #124 from opencultureconsulting/123-sort-columns
new command: orcli sort columns
2023-12-11 11:19:06 +01:00
Felix Lohmeier 6895a74e41 new command: orcli sort columns 2023-12-11 10:17:46 +00:00
13 changed files with 857 additions and 20 deletions

View File

@ -17,7 +17,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org
* transform data by providing an [undo/redo](https://docs.openrefine.org/manual/running#history-undoredo) JSON file
* orcli calls specific endpoints for each operation to provide improved error handling and logging
* supports stdin, multiple files and URLs
* export to TSV, JSONL, ~~CSV, HTML, XLS, XLSX, ODS~~
* export to CSV, TSV, JSONL, ~~HTML, XLS, XLSX, ODS~~
* [templating export](https://docs.openrefine.org/manual/exporting#templating-exporter) to additional formats like JSON or XML
## Requirements

View File

@ -1,9 +1,10 @@
# orcli 0.2.2
# orcli 0.4.0
## command help screens
- [completions](completions.md)
- [delete](delete.md)
- [export csv](export_csv.md)
- [export jsonl](export_jsonl.md)
- [export template](export_template.md)
- [export tsv](export_tsv.md)
@ -15,6 +16,7 @@
- [list](list.md)
- [run](run.md)
- [search](search.md)
- [sort columns](sort_columns.md)
- [test](test.md)
- [transform](transform.md)
@ -35,6 +37,7 @@ Commands:
list list projects on OpenRefine server
info show OpenRefine project's metadata
search apply regex to each column and print matches in flattened tsv format
sort commands to sort OpenRefine projects
test run functional tests on tmp OpenRefine workspace
transform apply undo/redo JSON file(s) to an OpenRefine project
export commands to export data from OpenRefine projects to files
@ -58,6 +61,7 @@ Examples:
orcli info "duplicates"
orcli transform "duplicates" "https://git.io/fj5ju"
orcli search "duplicates" "^Ben"
orcli sort columns "duplicates"
orcli export tsv "duplicates"
orcli export tsv "duplicates" --output "duplicates.tsv"
orcli delete "duplicates"

60
help/export_csv.md Normal file
View File

@ -0,0 +1,60 @@
# orcli export csv
```
orcli export csv - export comma-separated values (CSV)
Usage:
orcli export csv PROJECT [OPTIONS]
orcli export csv --help | -h
Options:
--separator SEPARATOR
character(s) that separates columns
Default: ,
--select COLUMNS
filter result set to one or more columns (comma separated)
example: --select "foo,bar,baz"
--mode MODE
specify if project contains multi-row records
Allowed: rows, records
Default: rows
--facets FACETS
filter result set by providing an OpenRefine facets config in json
Default: []
--output FILE
Write to file instead of stdout
--encoding ENCODING
set character encoding
Default: UTF-8
--quiet, -q
suppress log output, print errors only
--help, -h
Show this help
Arguments:
PROJECT
project name or id
Examples:
orcli export csv "duplicates"
orcli export csv "duplicates" --output "duplicates.tsv"
orcli export csv "duplicates" --separator ";"
orcli export csv "duplicates" --encoding "ISO-8859-1"
orcli export csv "duplicates" --select "name,email,purchase"
orcli export csv "duplicates" --facets '[ { "type": "text", "columnName":
"name", "mode": "regex", "caseSensitive": false, "invert": false, "query":
"^Ben" } ]'
orcli export csv "duplicates" --facets '[{ "type": "list", "expression":
"grel:filter([\"gender\",\"purchase\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0",
"columnName": "", "selection": [{"v": {"v": true}}] }]'
```
code: [src/export_csv_command.sh](../src/export_csv_command.sh)

27
help/sort_columns.md Normal file
View File

@ -0,0 +1,27 @@
# orcli sort columns
```
orcli sort columns - re-order columns alphabetically
Usage:
orcli sort columns PROJECT [OPTIONS]
orcli sort columns --help | -h
Options:
--first COLUMN (repeatable)
set key column(s)
--help, -h
Show this help
Arguments:
PROJECT
project name or id
Examples:
orcli sort columns "duplicates"
orcli sort columns "duplicates" --first name
```
code: [src/sort_columns_command.sh](../src/sort_columns_command.sh)

583
orcli
View File

@ -40,6 +40,7 @@ orcli_usage() {
printf " %s list projects on OpenRefine server\n" "list "
printf " %s show OpenRefine project's metadata\n" "info "
printf " %s apply regex to each column and print matches in flattened tsv format\n" "search "
printf " %s commands to sort OpenRefine projects\n" "sort "
printf " %s run functional tests on tmp OpenRefine workspace\n" "test "
printf " %s apply undo/redo JSON file(s) to an OpenRefine project\n" "transform "
printf " %s commands to export data from OpenRefine projects to files\n" "export "
@ -74,6 +75,7 @@ orcli_usage() {
printf " orcli info \"duplicates\"\n"
printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n"
printf " orcli search \"duplicates\" \"^Ben\"\n"
printf " orcli sort columns \"duplicates\"\n"
printf " orcli export tsv \"duplicates\"\n"
printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli delete \"duplicates\"\n"
@ -831,6 +833,88 @@ orcli_search_usage() {
fi
}
# :command.usage
orcli_sort_usage() {
if [[ -n $long_usage ]]; then
printf "orcli sort - commands to sort OpenRefine projects\n"
echo
else
printf "orcli sort - commands to sort OpenRefine projects\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli sort COMMAND\n"
printf " orcli sort [COMMAND] --help | -h\n"
echo
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s re-order columns alphabetically\n" "columns"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
fi
}
# :command.usage
orcli_sort_columns_usage() {
if [[ -n $long_usage ]]; then
printf "orcli sort columns - re-order columns alphabetically\n"
echo
else
printf "orcli sort columns - re-order columns alphabetically\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli sort columns PROJECT [OPTIONS]\n"
printf " orcli sort columns --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--first COLUMN (repeatable)"
printf " set key column(s)\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli sort columns \"duplicates\"\n"
printf " orcli sort columns \"duplicates\" --first name\n"
echo
fi
}
# :command.usage
orcli_test_usage() {
if [[ -n $long_usage ]]; then
@ -936,6 +1020,7 @@ orcli_export_usage() {
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s export JSON Lines / newline-delimited JSON\n" "jsonl "
printf " %s export comma-separated values (CSV)\n" "csv "
printf " %s export tab-separated values (TSV)\n" "tsv "
printf " %s export to any text format by providing your own GREL template\n" "template"
echo
@ -1034,6 +1119,95 @@ orcli_export_jsonl_usage() {
fi
}
# :command.usage
orcli_export_csv_usage() {
if [[ -n $long_usage ]]; then
printf "orcli export csv - export comma-separated values (CSV)\n"
echo
else
printf "orcli export csv - export comma-separated values (CSV)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli export csv PROJECT [OPTIONS]\n"
printf " orcli export csv --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--separator SEPARATOR"
printf " character(s) that separates columns\n"
printf " Default: ,\n"
echo
# :flag.usage
printf " %s\n" "--select COLUMNS"
printf " filter result set to one or more columns (comma separated)\n example: --select \"foo,bar,baz\"\n"
echo
# :flag.usage
printf " %s\n" "--mode MODE"
printf " specify if project contains multi-row records\n"
printf " Allowed: rows, records\n"
printf " Default: rows\n"
echo
# :flag.usage
printf " %s\n" "--facets FACETS"
printf " filter result set by providing an OpenRefine facets config in json\n"
printf " Default: []\n"
echo
# :flag.usage
printf " %s\n" "--output FILE"
printf " Write to file instead of stdout\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
printf " Default: UTF-8\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli export csv \"duplicates\"\n"
printf " orcli export csv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli export csv \"duplicates\" --separator \";\"\n"
printf " orcli export csv \"duplicates\" --encoding \"ISO-8859-1\"\n"
printf " orcli export csv \"duplicates\" --select \"name,email,purchase\"\n"
printf " orcli export csv \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
printf " orcli export csv \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
echo
fi
}
# :command.usage
orcli_export_tsv_usage() {
if [[ -n $long_usage ]]; then
@ -1590,6 +1764,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
@ -1602,6 +1780,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'sort columns\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--first --help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
echo $' ;;'
@ -1622,6 +1804,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select --separator -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select -h -q")" -- "$cur" )'
echo $' ;;'
@ -1643,7 +1829,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'export\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv jsonl template tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'list\'*)'
@ -1654,6 +1840,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'sort\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h columns")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'test\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
@ -1663,7 +1853,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' *)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run search test transform")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run search sort test transform")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' esac'
@ -2089,6 +2279,31 @@ orcli_search_command() {
}
# :command.function
orcli_sort_columns_command() {
# src/sort_columns_command.sh
# get columns, sort and transform with re-order columns
# shellcheck shell=bash
# catch args, convert the space delimited string to an array
first=()
eval "first=(${args[--first]})"
# convert to a comma-separated list of elements
columns=$(printf ',"'%s'"' "${first[@]}" | cut -c2-)
# get project id
projectid="$(get_id "${args[project]}")"
csrf="$(get_csrf)"
if ! sorted=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq --argjson columns "[ ${columns} ]" '($columns) + ([ .[].name ] | del (.[] | select (. | IN( $columns[] ))) | sort)'); then
error "getting columns in ${args[project]} failed!"
fi
if ! curl -fs -o /dev/null --data project="$projectid" --data "columnNames=${sorted}" "${OPENREFINE_URL}/command/core/reorder-columns${csrf}"; then
error "sorting columns in ${args[project]} failed!"
fi
log "sorted columns in ${args[project]}"
}
# :command.function
orcli_test_command() {
# src/test_command.sh
@ -2360,6 +2575,45 @@ orcli_export_jsonl_command() {
}
# :command.function
orcli_export_csv_command() {
# src/export_csv_command.sh
# shellcheck shell=bash
projectid="$(get_id "${args[project]}")"
separator="${args[--separator]:-,}"
# assemble specific post data (some options require json format)
data+=("project=${projectid}")
data+=("format=csv")
options='{ '
options+="\"separator\": \"${separator}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--select]} ]]; then
options+=', '
options+='"columns": ['
IFS=',' read -ra columns <<< "${args[--select]}"
options+='{"name":"'
options+="${columns[0]}"
options+='"}'
for cn in "${columns[@]:1}"; do
options+=', '
options+='{"name":"'
options+="${cn}"
options+='"}'
done
options+="]"
fi
options+=' }'
data+=("options=${options}")
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_export_tsv_command() {
# src/export_tsv_command.sh
@ -2658,6 +2912,13 @@ parse_requirements() {
shift $#
;;
sort)
action="sort"
shift
orcli_sort_parse_requirements "$@"
shift $#
;;
test)
action="test"
shift
@ -3971,6 +4232,147 @@ orcli_search_parse_requirements() {
}
# :command.parse_requirements
orcli_sort_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_sort_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action=${1:-}
case $action in
-*) ;;
columns)
action="columns"
shift
orcli_sort_columns_parse_requirements "$@"
shift $#
;;
# :command.command_fallback
"")
orcli_sort_usage >&2
exit 1
;;
*)
printf "invalid command: %s\n" "$action" >&2
exit 1
;;
esac
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
printf "invalid argument: %s\n" "$key" >&2
exit 1
;;
esac
done
}
# :command.parse_requirements
orcli_sort_columns_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_sort_columns_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="sort columns"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--first)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
if [[ -z ${args['--first']+x} ]]; then
args['--first']="\"$2\""
else
args['--first']="${args[--first]} \"$2\""
fi
shift
shift
else
printf "%s\n" "--first requires an argument: --first COLUMN" >&2
exit 1
fi
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
if [[ -z ${args['project']+x} ]]; then
args['project']=$1
shift
else
printf "invalid argument: %s\n" "$key" >&2
exit 1
fi
;;
esac
done
# :command.required_args_filter
if [[ -z ${args['project']+x} ]]; then
printf "missing required argument: PROJECT\nusage: orcli sort columns PROJECT [OPTIONS]\n" >&2
exit 1
fi
}
# :command.parse_requirements
orcli_test_parse_requirements() {
# :command.fixed_flags_filter
@ -4116,6 +4518,13 @@ orcli_export_parse_requirements() {
shift $#
;;
csv)
action="csv"
shift
orcli_export_csv_parse_requirements "$@"
shift $#
;;
tsv)
action="tsv"
shift
@ -4315,6 +4724,171 @@ orcli_export_jsonl_parse_requirements() {
}
# :command.parse_requirements
orcli_export_csv_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_export_csv_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="export csv"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--separator)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--separator']="$2"
shift
shift
else
printf "%s\n" "--separator requires an argument: --separator SEPARATOR" >&2
exit 1
fi
;;
# :flag.case
--select)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--select']="$2"
shift
shift
else
printf "%s\n" "--select requires an argument: --select COLUMNS" >&2
exit 1
fi
;;
# :flag.case
--mode)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--mode']="$2"
shift
shift
else
printf "%s\n" "--mode requires an argument: --mode MODE" >&2
exit 1
fi
;;
# :flag.case
--facets)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--facets']="$2"
shift
shift
else
printf "%s\n" "--facets requires an argument: --facets FACETS" >&2
exit 1
fi
;;
# :flag.case
--output)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--output']="$2"
shift
shift
else
printf "%s\n" "--output requires an argument: --output FILE" >&2
exit 1
fi
;;
# :flag.case
--encoding)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--encoding']="$2"
shift
shift
else
printf "%s\n" "--encoding requires an argument: --encoding ENCODING" >&2
exit 1
fi
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
if [[ -z ${args['project']+x} ]]; then
args['project']=$1
shift
else
printf "invalid argument: %s\n" "$key" >&2
exit 1
fi
;;
esac
done
# :command.required_args_filter
if [[ -z ${args['project']+x} ]]; then
printf "missing required argument: PROJECT\nusage: orcli export csv PROJECT [OPTIONS]\n" >&2
exit 1
fi
# :command.default_assignments
[[ -n ${args['--separator']:-} ]] || args['--separator']=","
[[ -n ${args['--mode']:-} ]] || args['--mode']="rows"
[[ -n ${args['--facets']:-} ]] || args['--facets']="[]"
[[ -n ${args['--encoding']:-} ]] || args['--encoding']="UTF-8"
# :command.whitelist_filter
if [[ ${args['--mode']:-} ]] && [[ ! ${args['--mode']:-} =~ ^(rows|records)$ ]]; then
printf "%s\n" "--mode must be one of: rows, records" >&2
exit 1
fi
}
# :command.parse_requirements
orcli_export_tsv_parse_requirements() {
# :command.fixed_flags_filter
@ -4750,7 +5324,7 @@ orcli_run_parse_requirements() {
# :command.initialize
initialize() {
version="0.2.2"
version="0.4.0"
long_usage=''
set -e
@ -4781,10 +5355,13 @@ run() {
"list") orcli_list_command ;;
"info") orcli_info_command ;;
"search") orcli_search_command ;;
"sort") orcli_sort_command ;;
"sort columns") orcli_sort_columns_command ;;
"test") orcli_test_command ;;
"transform") orcli_transform_command ;;
"export") orcli_export_command ;;
"export jsonl") orcli_export_jsonl_command ;;
"export csv") orcli_export_csv_command ;;
"export tsv") orcli_export_tsv_command ;;
"export template") orcli_export_template_command ;;
"run") orcli_run_command ;;

View File

@ -1,6 +1,6 @@
name: orcli
help: OpenRefine command-line interface written in Bash
version: 0.2.2
version: 0.4.0
footer: https://github.com/opencultureconsulting/orcli
dependencies:
@ -18,6 +18,7 @@ examples:
- orcli info "duplicates"
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli search "duplicates" "^Ben"
- orcli sort columns "duplicates"
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli delete "duplicates"
@ -297,6 +298,23 @@ commands:
- |-
orcli export tsv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: sort
help: commands to sort OpenRefine projects
commands:
- name: columns
help: re-order columns alphabetically
args:
- *project
flags:
- long: --first
help: set key column(s)
arg: column
repeatable: true
examples:
- orcli sort columns "duplicates"
- orcli sort columns "duplicates" --first name
- name: test
help: run functional tests on tmp OpenRefine workspace
@ -355,16 +373,41 @@ commands:
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
- |-
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: csv
help: export comma-separated values (CSV)
args:
- *project
flags:
- *separator
- &select
long: --select
help: |-
filter result set to one or more columns (comma separated)
example: --select "foo,bar,baz"
arg: columns
- *mode
- *facets
- *output
- *encoding_export
- *quiet
examples:
- orcli export csv "duplicates"
- orcli export csv "duplicates" --output "duplicates.tsv"
- orcli export csv "duplicates" --separator ";"
- orcli export csv "duplicates" --encoding "ISO-8859-1"
- orcli export csv "duplicates" --select "name,email,purchase"
- |-
orcli export csv "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
- |-
orcli export csv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: tsv
help: export tab-separated values (TSV)
args:
- *project
flags:
- long: --select
help: |-
filter result set to one or more columns (comma separated)
example: --select "foo,bar,baz"
arg: columns
- *select
- *mode
- *facets
- *output

33
src/export_csv_command.sh Normal file
View File

@ -0,0 +1,33 @@
# shellcheck shell=bash
projectid="$(get_id "${args[project]}")"
separator="${args[--separator]:-,}"
# assemble specific post data (some options require json format)
data+=("project=${projectid}")
data+=("format=csv")
options='{ '
options+="\"separator\": \"${separator}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--select]} ]]; then
options+=', '
options+='"columns": ['
IFS=',' read -ra columns <<< "${args[--select]}"
options+='{"name":"'
options+="${columns[0]}"
options+='"}'
for cn in "${columns[@]:1}"; do
options+=', '
options+='{"name":"'
options+="${cn}"
options+='"}'
done
options+="]"
fi
options+=' }'
data+=("options=${options}")
# call post_export function to post data and validate results
post_export "${data[@]}"

View File

@ -38,6 +38,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
@ -50,6 +54,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'sort columns\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--first --help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
echo $' ;;'
@ -70,6 +78,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select --separator -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select -h -q")" -- "$cur" )'
echo $' ;;'
@ -91,7 +103,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'export\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h jsonl template tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv jsonl template tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'list\'*)'
@ -102,6 +114,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'sort\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h columns")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'test\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
@ -111,7 +127,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' *)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run search test transform")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run search sort test transform")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' esac'

View File

@ -0,0 +1,20 @@
# get columns, sort and transform with re-order columns
# shellcheck shell=bash
# catch args, convert the space delimited string to an array
first=()
eval "first=(${args[--first]})"
# convert to a comma-separated list of elements
columns=$(printf ',"'%s'"' "${first[@]}" | cut -c2-)
# get project id
projectid="$(get_id "${args[project]}")"
csrf="$(get_csrf)"
if ! sorted=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq --argjson columns "[ ${columns} ]" '($columns) + ([ .[].name ] | del (.[] | select (. | IN( $columns[] ))) | sort)'); then
error "getting columns in ${args[project]} failed!"
fi
if ! curl -fs -o /dev/null --data project="$projectid" --data "columnNames=${sorted}" "${OPENREFINE_URL}/command/core/reorder-columns${csrf}"; then
error "sorting columns in ${args[project]} failed!"
fi
log "sorted columns in ${args[project]}"

View File

@ -0,0 +1,26 @@
#!/bin/bash
t="export-csv-separator"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.csv "${tmpdir}/${t}.csv"
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
a;b;c
1;2;3
0;0;0
$;\;'
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "${t}.csv" --projectName "${t}"
orcli export csv "${t}" --separator ";" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

31
tests/sort-columns.sh Normal file
View File

@ -0,0 +1,31 @@
#!/bin/bash
t="sort-columns"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# assertion
cat << "DATA" > "${tmpdir}/${t}.assert"
name state email gender purchase
Danny Baron CA danny.baron@example1.com M TV
Melanie White NC melanie.white@example2.edu F iPhone
D. Baron CA danny.baron@example1.com M Winter jacket
Ben Tyler NV ben.tyler@example3.org M Flashlight
Arthur Duff OR arthur.duff@example4.com M Dining table
Daniel Baron CA danny.baron@example1.com M Bike
Jean Griffith WA jean.griffith@example5.org F Power drill
Melanie White NC melanie.white@example2.edu F iPad
Ben Morisson FL ben.morisson@example6.org M Amplifier
Arthur Duff OR arthur.duff@example4.com M Night table
DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli sort columns "duplicates" --first name --first state
orcli export tsv "duplicates" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -40,9 +40,9 @@ DATA
# action
cd "${tmpdir}" || exit 1
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "${t}.history"
orcli export tsv "duplicates" --output "${t}.output"
orcli import csv "https://git.io/fj5hF" --projectName "${t}"
orcli transform "${t}" "${t}.history"
orcli export tsv "${t}" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"

View File

@ -14,9 +14,9 @@ cp data/duplicates-history.json "${tmpdir}/${t}.history"
# action
cd "${tmpdir}" || exit 1
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "${t}.history"
orcli export tsv "duplicates" --output "${t}.output"
orcli import csv "https://git.io/fj5hF" --projectName "${t}"
orcli transform "${t}" "${t}.history"
orcli export tsv "${t}" --output "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"
diff -u "${t}.assert" "${t}.output"