orcli/src/bashly.yml

502 lines
17 KiB
YAML

name: orcli
help: OpenRefine command-line interface written in Bash
version: 0.4.0
footer: https://github.com/opencultureconsulting/orcli
dependencies:
curl: https://curl.se
jq: https://github.com/stedolan/jq
environment_variables:
- name: OPENREFINE_URL
help: URL to OpenRefine server
default: "http://localhost:3333"
examples:
- orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
- orcli list
- orcli info "duplicates"
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli search "duplicates" "^Ben"
- orcli sort columns "duplicates"
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli delete "duplicates"
- orcli run --interactive
- |-
orcli run << EOF
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "https://git.io/fj5ju"
orcli export tsv "duplicates"
EOF
commands:
- name: completions
help: |-
Generate bash completions
Usage: source <(orcli completions)
- name: delete
help: delete OpenRefine project
args:
- &project
name: project
help: project name or id
required: true
flags:
- long: --force
short: -f
help: delete all projects with the same name
- &quiet
long: --quiet
short: -q
help: suppress log output, print errors only
examples:
- orcli delete "duplicates"
- orcli delete "duplicates" --force
- orcli delete 1234567890123
- for p in \$(orcli list); do orcli delete \${p:0:13}; done
- name: import
help: commands to create OpenRefine projects from files or URLs
commands:
- name: csv
help: import character-separated values (CSV)
args:
- &file
name: file
help: Path to one or more files or URLs. When FILE is -, read standard input.
default: "-"
repeatable: true
flags:
- &separator
long: --separator
help: character(s) that separates columns
arg: separator
default: ","
- &blankCellsAsStrings
long: --blankCellsAsStrings
help: store blank cells as empty strings instead of nulls
- &columnNames
long: --columnNames
help: |-
set column names (comma separated)
hint: add --ignoreLines 1 to overwrite existing header row
arg: columnNames
conflicts: [--headerLines]
- &encoding_import
long: --encoding
help: set character encoding
arg: encoding
- &guessCellValueTypes
long: --guessCellValueTypes
help: attempt to parse cell text into numbers
- &headerLines
long: --headerLines
help: parse x line(s) as column headers
arg: headerLines
default: "1"
conflicts: [--columnNames]
- &ignoreLines
long: --ignoreLines
help: ignore first x line(s) at beginning of file
arg: ignoreLines
default: "-1"
- &ignoreQuoteCharacter
long: --ignoreQuoteCharacter
help: do not use any quote character to enclose cells containing column separators
- &includeFileSources
long: --includeFileSources
help: add column with file source
- &includeArchiveFileName
long: --includeArchiveFileName
help: add column with archive file name
- &limit
long: --limit
help: load at most x row(s) of data
arg: limit
default: "-1"
- &quoteCharacter
long: --quoteCharacter
help: quote character to enclose cells containing column separators
arg: quoteCharacter
default: '\\\"'
- &skipBlankRows
long: --skipBlankRows
help: do not store blank rows
- &skipDataLines
long: --skipDataLines
help: discard initial x row(s) of data
arg: skipDataLines
default: "0"
- &trimStrings
long: --trimStrings
help: trim leading & trailing whitespace from strings
- &projectName
long: --projectName
arg: projectName
help: set a name for the OpenRefine project
- &projectTags
long: --projectTags
help: set project tags (comma separated)
arg: projectTags
- *quiet
examples:
- orcli import csv "file"
- orcli import csv "file1" "file2"
- head -n 100 "file" | orcli import csv
- orcli import csv "https://git.io/fj5hF"
- |-
orcli import csv "file" \\\\
--separator ";" \\\\
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
--encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: tsv
help: import tab-separated values (TSV)
args:
- *file
flags:
- *blankCellsAsStrings
- *columnNames
- *encoding_import
- *guessCellValueTypes
- *headerLines
- *ignoreLines
- *ignoreQuoteCharacter
- *includeFileSources
- *includeArchiveFileName
- *limit
- *quoteCharacter
- *skipBlankRows
- *skipDataLines
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import tsv "file"
- orcli import tsv "file1" "file2"
- head -n 100 "file" | orcli import tsv
- orcli import tsv "https://example.com/file.tsv"
- |-
orcli import tsv "file" \\\\
--separator ";" \\\\
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
--encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: json
help: import JSON
args:
- *file
flags:
- &recordPath
long: --recordPath
help: specify record path elements in JSON array
arg: json
default: "[ \\\"_\\\" , \\\"_\\\" ]"
- &rename
long: --rename
help: rename columns after import to remove record path fragments
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
- *limit
- &storeEmptyStrings
long: --storeEmptyStrings
help: preserve empty strings
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import json "file"
- orcli import json "file1" "file2"
- orcli import json "https://example.com/file.json"
- |-
orcli import json "file" \\\\
--recordPath '[ "_", "rows", "_" ]' \\\\
--rename \\\\
--storeEmptyStrings \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: jsonl
help: import JSON Lines / newline-delimited JSON
args:
- *file
flags:
- *rename
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
- *limit
- *storeEmptyStrings
- *trimStrings
- *projectName
- *projectTags
- *quiet
examples:
- orcli import jsonl "file"
- orcli import jsonl "file1" "file2"
- orcli import jsonl "https://example.com/file.json"
- orcli import jsonl --rename <(orcli export jsonl "duplicates")
- |-
orcli import jsonl "file" \\\\
--rename \\\\
--storeEmptyStrings \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: list
help: list projects on OpenRefine server
- name: info
help: show OpenRefine project's metadata
args:
- *project
examples:
- orcli info "duplicates"
- orcli info 1234567890123
- orcli info "duplicates" | jq -r .columns[]
- name: search
help: |-
apply regex to each column and print matches in flattened tsv format
output: index column value
note that any exporter supports search by using --facets (see examples)
args:
- *project
- &regex
name: regex
help: search term (regular expression, case-sensitive)
flags:
- long: --index
help: print column values instead of row.index in the first column of the output
arg: column
examples:
- orcli search "duplicates" "^Ben"
- orcli search 1234567890123 "^Ben"
- orcli search "duplicates" "^F" --index "email"
- orcli search "duplicates" | column -t -s \$'\t'
- |-
orcli export tsv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter(row.columnNames,cn,cells[cn].value.find(/^Ben/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- |-
orcli export tsv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: sort
help: commands to sort OpenRefine projects
commands:
- name: columns
help: re-order columns alphabetically
args:
- *project
flags:
- long: --first
help: set key column(s)
arg: column
repeatable: true
examples:
- orcli sort columns "duplicates"
- orcli sort columns "duplicates" --first name
- name: test
help: run functional tests on tmp OpenRefine workspace
- name: transform
help: apply undo/redo JSON file(s) to an OpenRefine project
args:
- *project
- *file
flags:
- *quiet
examples:
- orcli transform "duplicates" "history.json"
- cat "history.json" | orcli transform "duplicates"
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli transform 1234567890123 "history.json"
- name: export
help: commands to export data from OpenRefine projects to files
commands:
- name: jsonl
help: export JSON Lines / newline-delimited JSON
args:
- *project
flags:
- &mode
long: --mode
help: specify if project contains multi-row records
arg: mode
allowed: [rows, records]
default: "rows"
- long: --separator
help: character(s) that separates multiple values in one cell (row mode only)
arg: separator
- &facets
long: --facets
help: filter result set by providing an OpenRefine facets config in json
arg: facets
default: "[]"
- &output
long: --output
help: Write to file instead of stdout
arg: file
- &encoding_export
long: --encoding
help: set character encoding
arg: encoding
default: "UTF-8"
- *quiet
examples:
- orcli export jsonl "duplicates"
- orcli export jsonl "duplicates" --output "duplicates.jsonl"
- orcli export jsonl "duplicates" --separator ' '
- orcli export jsonl "duplicates" --mode records
- |-
orcli export jsonl "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
- |-
orcli export jsonl "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: csv
help: export comma-separated values (CSV)
args:
- *project
flags:
- *separator
- &select
long: --select
help: |-
filter result set to one or more columns (comma separated)
example: --select "foo,bar,baz"
arg: columns
- *mode
- *facets
- *output
- *encoding_export
- *quiet
examples:
- orcli export csv "duplicates"
- orcli export csv "duplicates" --output "duplicates.tsv"
- orcli export csv "duplicates" --separator ";"
- orcli export csv "duplicates" --encoding "ISO-8859-1"
- orcli export csv "duplicates" --select "name,email,purchase"
- |-
orcli export csv "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
- |-
orcli export csv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: tsv
help: export tab-separated values (TSV)
args:
- *project
flags:
- *select
- *mode
- *facets
- *output
- *encoding_export
- *quiet
examples:
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli export tsv "duplicates" --encoding "ISO-8859-1"
- orcli export tsv "duplicates" --select "name,email,purchase"
- |-
orcli export tsv "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]'
- |-
orcli export tsv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]'
- name: template
help: export to any text format by providing your own GREL template
args:
- *project
- name: file
help: Path to row/record template file or URL. When FILE is -, read standard input.
default: "-"
flags:
- long: --separator
help: insert character(s) between rows/records
arg: separator
- long: --prefix
help: insert character(s) at the beginning of the file
arg: prefix
- long: --suffix
help: insert character(s) at the end of the file
arg: suffix
- *mode
- *facets
- *output
- *encoding_export
- *quiet
examples:
- orcli export template "duplicates" "template.txt"
- cat "template.txt" | orcli export template "duplicates"
- orcli export template "duplicates" "https://example.com/template.txt"
- orcli export template "duplicates" "template.txt" --output "duplicates.tsv"
- |-
orcli export template "duplicates" \\\\
<<< '{ "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \\\\
--prefix '{ "events" : [' \\\\
--separator , \\\\
--mode records \\\\
--suffix ]}$'\\\n' \\\\
--facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "invert": false, "query": "^Ben" } ]' \\\\
| jq
- name: run
help: run tmp OpenRefine workspace and execute shell script(s)
args:
- *file
flags:
- long: --memory
help: maximum RAM for OpenRefine java heap space
arg: ram
default: "2048M"
- long: --port
help: PORT on which OpenRefine should listen
arg: port
default: "3333"
- long: --interactive
help: do not exit on error and keep bash shell open
- *quiet
examples:
- orcli run --interactive
- |-
orcli run << EOF
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "https://git.io/fj5ju"
orcli export tsv "duplicates"
EOF
- |-
orcli run --memory "2000M" --port "3334" << EOF
orcli import csv "https://git.io/fj5hF" --projectName "duplicates" &
orcli import csv "https://git.io/fj5hF" --projectName "copy" &
wait
echo "finished import"
orcli export csv "duplicates" --output duplicates.csv &
orcli export tsv "duplicates" --output duplicates.tsv &
wait
wc duplicates*
EOF
- |-
orcli run --interactive "file1.sh" "file2.sh" - << EOF
echo "finished in \$SECONDS seconds"
EOF