2022-03-25 11:16:02 +01:00
|
|
|
name: orcli
|
|
|
|
help: OpenRefine command-line interface written in Bash
|
|
|
|
version: 0.1.0
|
2022-04-04 23:00:37 +02:00
|
|
|
footer: https://github.com/opencultureconsulting/orcli
|
2022-03-25 11:16:02 +01:00
|
|
|
|
|
|
|
dependencies:
|
2022-03-25 23:02:28 +01:00
|
|
|
- curl
|
|
|
|
- jq
|
2022-03-25 11:16:02 +01:00
|
|
|
|
|
|
|
environment_variables:
|
2022-03-25 23:02:28 +01:00
|
|
|
- name: OPENREFINE_URL
|
|
|
|
help: URL to OpenRefine server
|
|
|
|
default: "http://localhost:3333"
|
2022-03-25 11:16:02 +01:00
|
|
|
|
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
|
2022-04-14 12:06:54 +02:00
|
|
|
- orcli list
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli info "duplicates"
|
|
|
|
- orcli export tsv "duplicates"
|
|
|
|
- orcli export tsv "duplicates" --output "duplicates.tsv"
|
|
|
|
- |-
|
2022-10-06 13:28:47 +02:00
|
|
|
orcli batch << EOF
|
2022-10-06 13:35:34 +02:00
|
|
|
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
|
|
|
|
orcli info "duplicates"
|
|
|
|
orcli export tsv "duplicates"
|
2022-10-06 13:28:47 +02:00
|
|
|
EOF
|
2022-03-25 11:16:02 +01:00
|
|
|
|
2022-10-07 11:02:05 +02:00
|
|
|
flags:
|
|
|
|
- long: --quiet
|
|
|
|
short: -q
|
|
|
|
help: suppress log output, print errors only
|
|
|
|
|
2022-03-25 11:16:02 +01:00
|
|
|
commands:
|
2022-10-04 23:19:18 +02:00
|
|
|
- name: completions
|
|
|
|
help: |-
|
|
|
|
Generate bash completions
|
|
|
|
Usage: eval "\$(orcli completions)"
|
2022-10-06 13:28:47 +02:00
|
|
|
|
2022-04-20 12:27:53 +02:00
|
|
|
- name: batch
|
2022-10-06 13:28:47 +02:00
|
|
|
help: run tmp OpenRefine workspace and execute shell script
|
|
|
|
args:
|
|
|
|
- name: file
|
|
|
|
help: Path to one or more files. When FILE is -, read standard input.
|
|
|
|
default: "-"
|
|
|
|
repeatable: true
|
2022-04-20 12:27:53 +02:00
|
|
|
flags:
|
|
|
|
- long: --memory
|
|
|
|
help: maximum RAM for OpenRefine java heap space
|
|
|
|
arg: ram
|
|
|
|
default: "2048M"
|
|
|
|
- long: --port
|
|
|
|
help: PORT on which OpenRefine should listen
|
|
|
|
arg: port
|
|
|
|
default: "3333"
|
|
|
|
examples:
|
|
|
|
- |-
|
2022-10-06 13:28:47 +02:00
|
|
|
orcli batch << EOF
|
2022-10-06 13:35:34 +02:00
|
|
|
orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
|
|
|
|
orcli info "duplicates"
|
|
|
|
orcli export tsv "duplicates"
|
2022-10-06 13:28:47 +02:00
|
|
|
EOF
|
2022-04-20 12:27:53 +02:00
|
|
|
- |-
|
2022-10-07 11:02:05 +02:00
|
|
|
orcli batch --memory "2000M" --port "3334" << EOF
|
2022-10-06 13:35:34 +02:00
|
|
|
orcli import csv "https://git.io/fj5hF" --projectName "duplicates" &
|
|
|
|
orcli import csv "https://git.io/fj5hF" --projectName "copy" &
|
|
|
|
wait
|
|
|
|
echo "finished import"
|
|
|
|
orcli export csv "duplicates" --output duplicates.csv &
|
|
|
|
orcli export tsv "duplicates" --output duplicates.tsv &
|
|
|
|
wait
|
|
|
|
wc duplicates*
|
2022-10-07 11:02:05 +02:00
|
|
|
EOF
|
|
|
|
- |-
|
|
|
|
orcli batch "file1.sh" "file2.sh" - << EOF
|
|
|
|
echo "finished in \$SECONDS seconds"
|
2022-10-06 13:28:47 +02:00
|
|
|
EOF
|
2022-04-20 12:27:53 +02:00
|
|
|
|
2022-04-12 12:54:16 +02:00
|
|
|
- name: import
|
2022-04-13 13:36:23 +02:00
|
|
|
help: import commands
|
|
|
|
|
|
|
|
commands:
|
2022-04-13 23:55:47 +02:00
|
|
|
- name: csv
|
|
|
|
help: import comma-separated values (CSV)
|
|
|
|
args:
|
|
|
|
- name: file
|
|
|
|
help: Path to one or more files or URLs. When FILE is -, read standard input.
|
|
|
|
default: "-"
|
|
|
|
repeatable: true
|
|
|
|
flags:
|
|
|
|
- long: --separator
|
|
|
|
help: character(s) that separates columns
|
|
|
|
arg: separator
|
|
|
|
default: ","
|
|
|
|
- long: --encoding
|
|
|
|
help: set character encoding
|
|
|
|
arg: encoding
|
|
|
|
- long: --trimStrings
|
|
|
|
help: trim leading & trailing whitespace from strings
|
|
|
|
- long: --projectName
|
|
|
|
arg: projectName
|
|
|
|
help: set a name for the OpenRefine project
|
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli import csv "file"
|
|
|
|
- orcli import csv "file1" "file2"
|
|
|
|
- cat "file" | orcli import csv
|
|
|
|
- orcli import csv "https://git.io/fj5hF"
|
|
|
|
- |-
|
|
|
|
orcli import csv "file" \\\\
|
|
|
|
--separator ";" \\\\
|
|
|
|
--encoding "ISO-8859-1" \\\\
|
|
|
|
--trimStrings \\\\
|
|
|
|
--projectName "duplicates"
|
2022-04-12 12:54:16 +02:00
|
|
|
|
2022-03-25 23:02:28 +01:00
|
|
|
- name: list
|
|
|
|
help: list projects on OpenRefine server
|
2022-04-14 12:06:54 +02:00
|
|
|
|
|
|
|
- name: info
|
|
|
|
help: show project metadata
|
|
|
|
args:
|
|
|
|
- name: project
|
|
|
|
help: project name or id
|
|
|
|
required: true
|
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- info "duplicates"
|
2022-04-14 12:06:54 +02:00
|
|
|
- info 1234567890123
|
|
|
|
|
|
|
|
- name: export
|
|
|
|
help: export commands
|
|
|
|
|
|
|
|
commands:
|
|
|
|
- name: tsv
|
|
|
|
help: export tab-separated values (TSV)
|
|
|
|
args:
|
|
|
|
- name: project
|
|
|
|
help: project name or id
|
|
|
|
required: true
|
|
|
|
flags:
|
|
|
|
- long: --output
|
|
|
|
help: Write to file instead of stdout
|
|
|
|
arg: file
|
|
|
|
- long: --encoding
|
|
|
|
help: set character encoding
|
|
|
|
arg: encoding
|
|
|
|
default: "UTF-8"
|
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli export tsv "duplicates"
|
|
|
|
- orcli export tsv "duplicates" --output "duplicates.tsv"
|