orcli/src/bashly.yml

349 lines
11 KiB
YAML
Raw Normal View History

2022-03-25 11:16:02 +01:00
name: orcli
help: OpenRefine command-line interface written in Bash
version: 0.1.0
2022-04-04 23:00:37 +02:00
footer: https://github.com/opencultureconsulting/orcli
2022-03-25 11:16:02 +01:00
dependencies:
2022-12-06 12:09:29 +00:00
curl: https://curl.se
jq: https://github.com/stedolan/jq
2022-03-25 11:16:02 +01:00
environment_variables:
2022-03-25 22:02:28 +00:00
- name: OPENREFINE_URL
help: URL to OpenRefine server
default: "http://localhost:3333"
2022-03-25 11:16:02 +01:00
examples:
2022-04-20 10:27:53 +00:00
- orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
2022-04-14 10:06:54 +00:00
- orcli list
2022-04-20 10:27:53 +00:00
- orcli info "duplicates"
2022-11-03 21:07:08 +00:00
- orcli transform "duplicates" "https://git.io/fj5ju"
2023-10-27 21:12:16 +00:00
- orcli search "duplicates" "^Ben"
2022-04-20 10:27:53 +00:00
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
2023-01-14 23:43:25 +00:00
- orcli delete "duplicates"
2022-10-16 21:13:59 +00:00
- orcli run --interactive
2022-04-20 10:27:53 +00:00
- |-
orcli run << EOF
2022-10-06 11:35:34 +00:00
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "https://git.io/fj5ju"
2022-10-06 11:35:34 +00:00
orcli export tsv "duplicates"
2022-10-06 11:28:47 +00:00
EOF
2022-03-25 11:16:02 +01:00
commands:
2022-10-04 21:19:18 +00:00
- name: completions
help: |-
Generate bash completions
2023-10-22 22:09:14 +00:00
Usage: source <(orcli completions)
2022-10-06 11:28:47 +00:00
2022-12-06 14:38:20 +00:00
- name: delete
help: delete OpenRefine project
args:
2022-12-13 21:20:36 +00:00
- &project
name: project
2022-12-06 14:38:20 +00:00
help: project name or id
required: true
flags:
2022-12-13 11:05:18 +00:00
- long: --force
short: -f
help: delete all projects with the same name
2022-12-13 21:20:36 +00:00
- &quiet
long: --quiet
2022-12-06 14:38:20 +00:00
short: -q
help: suppress log output, print errors only
examples:
- orcli delete "duplicates"
2023-01-14 23:43:25 +00:00
- orcli delete "duplicates" --force
2022-12-06 14:38:20 +00:00
- orcli delete 1234567890123
2023-10-22 22:09:14 +00:00
- for p in \$(orcli list); do orcli delete \${p:0:13}; done
2022-12-06 14:38:20 +00:00
2022-04-12 10:54:16 +00:00
- name: import
2022-10-25 10:41:13 +00:00
help: commands to create OpenRefine projects from files or URLs
2022-04-13 11:36:23 +00:00
commands:
2022-04-13 21:55:47 +00:00
- name: csv
2022-11-16 22:22:54 +00:00
help: import character-separated values (CSV)
2022-04-13 21:55:47 +00:00
args:
2022-12-13 21:20:36 +00:00
- &file
name: file
2022-04-13 21:55:47 +00:00
help: Path to one or more files or URLs. When FILE is -, read standard input.
default: "-"
repeatable: true
flags:
2022-12-13 21:20:36 +00:00
- &separator
long: --separator
2022-04-13 21:55:47 +00:00
help: character(s) that separates columns
arg: separator
default: ","
2022-12-13 22:40:10 +00:00
- &blankCellsAsStrings
long: --blankCellsAsStrings
help: store blank cells as empty strings instead of nulls
- &columnNames
long: --columnNames
2023-01-13 22:30:12 +00:00
help: |-
set column names (comma separated)
hint: add --ignoreLines 1 to overwrite existing header row
arg: columnNames
2023-01-13 22:30:12 +00:00
conflicts: [--headerLines]
2022-12-13 21:20:36 +00:00
- &encoding_import
long: --encoding
2022-04-13 21:55:47 +00:00
help: set character encoding
arg: encoding
2022-12-13 22:40:10 +00:00
- &guessCellValueTypes
long: --guessCellValueTypes
help: attempt to parse cell text into numbers
- &headerLines
long: --headerLines
help: parse x line(s) as column headers
arg: headerLines
default: "1"
2023-01-13 22:30:12 +00:00
conflicts: [--columnNames]
2022-12-13 22:40:10 +00:00
- &ignoreLines
long: --ignoreLines
help: ignore first x line(s) at beginning of file
arg: ignoreLines
default: "-1"
- &ignoreQuoteCharacter
long: --ignoreQuoteCharacter
help: do not use any quote character to enclose cells containing column separators
- &includeFileSources
long: --includeFileSources
help: add column with file source
- &includeArchiveFileName
long: --includeArchiveFileName
help: add column with archive file name
- &limit
long: --limit
help: load at most x row(s) of data
arg: limit
default: "-1"
- &quoteCharacter
long: --quoteCharacter
help: quote character to enclose cells containing column separators
arg: quoteCharacter
default: '\\\"'
- &skipBlankRows
long: --skipBlankRows
help: do not store blank rows
- &skipDataLines
long: --skipDataLines
help: discard initial x row(s) of data
arg: skipDataLines
default: "0"
2022-12-13 21:20:36 +00:00
- &trimStrings
long: --trimStrings
2022-04-13 21:55:47 +00:00
help: trim leading & trailing whitespace from strings
2022-12-13 21:20:36 +00:00
- &projectName
long: --projectName
2022-04-13 21:55:47 +00:00
arg: projectName
help: set a name for the OpenRefine project
- &projectTags
long: --projectTags
help: set project tags (comma separated)
arg: projectTags
2022-12-13 21:20:36 +00:00
- *quiet
2022-04-13 21:55:47 +00:00
examples:
2022-04-20 10:27:53 +00:00
- orcli import csv "file"
- orcli import csv "file1" "file2"
2022-12-13 21:20:36 +00:00
- head -n 100 "file" | orcli import csv
2022-04-20 10:27:53 +00:00
- orcli import csv "https://git.io/fj5hF"
- |-
orcli import csv "file" \\\\
--separator ";" \\\\
2023-01-14 23:43:25 +00:00
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
2022-04-20 10:27:53 +00:00
--encoding "ISO-8859-1" \\\\
2023-01-14 23:43:25 +00:00
--limit 100 \\\\
2022-04-20 10:27:53 +00:00
--trimStrings \\\\
2023-10-22 22:09:14 +00:00
--projectName "duplicates" \\\\
2023-01-14 23:43:25 +00:00
--projectTags "test,urgent"
2022-04-12 10:54:16 +00:00
2022-11-16 22:22:54 +00:00
- name: tsv
help: import tab-separated values (TSV)
args:
2022-12-13 21:20:36 +00:00
- *file
2022-11-16 22:22:54 +00:00
flags:
- *blankCellsAsStrings
- *columnNames
2022-12-13 21:20:36 +00:00
- *encoding_import
- *guessCellValueTypes
- *headerLines
- *ignoreLines
- *ignoreQuoteCharacter
- *includeFileSources
- *includeArchiveFileName
- *limit
- *quoteCharacter
- *skipBlankRows
- *skipDataLines
2022-12-13 21:20:36 +00:00
- *trimStrings
- *projectName
- *projectTags
2022-12-13 21:20:36 +00:00
- *quiet
2022-11-16 22:22:54 +00:00
examples:
- orcli import tsv "file"
- orcli import tsv "file1" "file2"
2022-12-13 21:20:36 +00:00
- head -n 100 "file" | orcli import tsv
2023-01-27 18:04:27 +01:00
- orcli import tsv "https://example.com/file.tsv"
2022-11-16 22:22:54 +00:00
- |-
orcli import tsv "file" \\\\
--separator ";" \\\\
2023-01-14 23:43:25 +00:00
--columnNames "foo,bar,baz" \\\\
--ignoreLines 1 \\\\
2022-11-16 22:22:54 +00:00
--encoding "ISO-8859-1" \\\\
2023-01-14 23:43:25 +00:00
--limit 100 \\\\
2022-11-16 22:22:54 +00:00
--trimStrings \\\\
2023-10-22 22:09:14 +00:00
--projectName "duplicates" \\\\
2023-01-14 23:43:25 +00:00
--projectTags "test,urgent"
2022-11-16 22:22:54 +00:00
2022-03-25 22:02:28 +00:00
- name: list
help: list projects on OpenRefine server
2022-04-14 10:06:54 +00:00
- name: info
2022-10-25 10:41:13 +00:00
help: show OpenRefine project's metadata
2022-04-14 10:06:54 +00:00
args:
2022-12-13 21:20:36 +00:00
- *project
2022-04-14 10:06:54 +00:00
examples:
2022-11-03 21:07:08 +00:00
- orcli info "duplicates"
- orcli info 1234567890123
2023-01-14 23:43:25 +00:00
- orcli info "duplicates" | jq -r .columns[]
2022-11-03 21:07:08 +00:00
2023-10-27 21:12:16 +00:00
- name: search
help: apply regex to each column and print matches in flattened tsv format
args:
- *project
- &regex
name: regex
help: search
examples:
- orcli search "duplicates" "^Ben"
- orcli search 1234567890123 "^Ben"
- orcli search "duplicates" "^Ben" | column -t -s \$'\t'
2022-11-30 22:49:54 +00:00
- name: test
help: run functional tests on tmp OpenRefine workspace
2022-11-03 21:07:08 +00:00
- name: transform
help: apply undo/redo JSON file(s) to an OpenRefine project
args:
2022-12-13 21:20:36 +00:00
- *project
- *file
2022-12-06 14:38:20 +00:00
flags:
2022-12-13 21:20:36 +00:00
- *quiet
2022-11-03 21:07:08 +00:00
examples:
- orcli transform "duplicates" "history.json"
- cat "history.json" | orcli transform "duplicates"
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli transform 1234567890123 "history.json"
2022-04-14 10:06:54 +00:00
- name: export
2022-10-25 10:41:13 +00:00
help: commands to export data from OpenRefine projects to files
2022-04-14 10:06:54 +00:00
commands:
- name: tsv
help: export tab-separated values (TSV)
args:
2022-12-13 21:20:36 +00:00
- *project
2022-04-14 10:06:54 +00:00
flags:
2023-01-27 18:04:27 +01:00
- &facets
long: --facets
help: filter result set by providing an OpenRefine facets config in json
arg: facets
default: "[]"
2022-12-13 21:20:36 +00:00
- &output
long: --output
2022-04-14 10:06:54 +00:00
help: Write to file instead of stdout
arg: file
2022-12-13 21:20:36 +00:00
- &encoding_export
long: --encoding
2022-04-14 10:06:54 +00:00
help: set character encoding
arg: encoding
default: "UTF-8"
2022-12-13 21:20:36 +00:00
- *quiet
2022-04-14 10:06:54 +00:00
examples:
2022-04-20 10:27:53 +00:00
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
2023-01-14 23:43:25 +00:00
- orcli export tsv "duplicates" --encoding "ISO-8859-1"
2023-01-27 18:04:27 +01:00
- |-
orcli export tsv "duplicates" --facets '[ { "type": "text", "name": "foo", "columnName": "name", "mode": "regex", "caseSensitive": false, "query": "Ben" } ]'
- name: template
help: export to any text format by providing your own GREL template
args:
- *project
- name: file
help: Path to row/record template file or URL. When FILE is -, read standard input.
default: "-"
flags:
- long: --separator
help: insert character(s) between rows/records
arg: separator
- long: --prefix
help: insert character(s) at the beginning of the file
arg: prefix
- long: --suffix
help: insert character(s) at the end of the file
arg: suffix
- long: --mode
help: specify if template shall be applied to each row or record
arg: mode
allowed: [rows, records]
default: "rows"
- *facets
- *output
- *encoding_export
- *quiet
examples:
- orcli export template "duplicates" "template.txt"
- cat "template.txt" | orcli export template "duplicates"
- orcli export template "duplicates" "https://example.com/template.txt"
- orcli export template "duplicates" "template.txt" --output "duplicates.tsv"
- |-
orcli export template "duplicates" \\\\
<<< '{ "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \\\\
--prefix '{ "events" : [' \\\\
--separator , \\\\
--mode records \\\\
--suffix ]}$'\\\n' \\\\
--facets '[ { "type": "text", "name": "foo", "columnName": "name", "mode": "regex", "caseSensitive": false, "query": "Ben" } ]' \\\\
| jq
- name: run
help: run tmp OpenRefine workspace and execute shell script(s)
args:
2022-12-13 21:20:36 +00:00
- *file
flags:
- long: --memory
help: maximum RAM for OpenRefine java heap space
arg: ram
default: "2048M"
- long: --port
help: PORT on which OpenRefine should listen
arg: port
default: "3333"
2022-10-16 21:13:59 +00:00
- long: --interactive
help: do not exit on error and keep bash shell open
2022-12-13 21:20:36 +00:00
- *quiet
examples:
2022-10-16 21:13:59 +00:00
- orcli run --interactive
- |-
orcli run << EOF
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
orcli transform "duplicates" "https://git.io/fj5ju"
orcli export tsv "duplicates"
EOF
- |-
orcli run --memory "2000M" --port "3334" << EOF
orcli import csv "https://git.io/fj5hF" --projectName "duplicates" &
orcli import csv "https://git.io/fj5hF" --projectName "copy" &
wait
echo "finished import"
orcli export csv "duplicates" --output duplicates.csv &
orcli export tsv "duplicates" --output duplicates.tsv &
wait
wc duplicates*
EOF
- |-
2022-10-16 21:13:59 +00:00
orcli run --interactive "file1.sh" "file2.sh" - << EOF
echo "finished in \$SECONDS seconds"
EOF