2022-03-25 11:16:02 +01:00
|
|
|
name: orcli
|
|
|
|
help: OpenRefine command-line interface written in Bash
|
|
|
|
version: 0.1.0
|
2022-04-04 23:00:37 +02:00
|
|
|
footer: https://github.com/opencultureconsulting/orcli
|
2022-03-25 11:16:02 +01:00
|
|
|
|
|
|
|
dependencies:
|
2022-03-25 23:02:28 +01:00
|
|
|
- curl
|
|
|
|
- jq
|
2022-03-25 11:16:02 +01:00
|
|
|
|
|
|
|
environment_variables:
|
2022-03-25 23:02:28 +01:00
|
|
|
- name: OPENREFINE_URL
|
|
|
|
help: URL to OpenRefine server
|
|
|
|
default: "http://localhost:3333"
|
2022-03-25 11:16:02 +01:00
|
|
|
|
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli import csv "https://git.io/fj5hF" --projectName "duplicates"
|
2022-04-14 12:06:54 +02:00
|
|
|
- orcli list
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli info "duplicates"
|
|
|
|
- orcli export tsv "duplicates"
|
|
|
|
- orcli export tsv "duplicates" --output "duplicates.tsv"
|
|
|
|
- |-
|
|
|
|
orcli batch \\\\
|
|
|
|
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
|
|
|
|
info "duplicates" \\\\
|
|
|
|
export tsv "duplicates"
|
2022-03-25 11:16:02 +01:00
|
|
|
|
|
|
|
commands:
|
2022-04-20 12:27:53 +02:00
|
|
|
- name: batch
|
|
|
|
help: start tmp OpenRefine workspace and run multiple orcli commands
|
|
|
|
catch_all:
|
|
|
|
label: orcli commands
|
|
|
|
help: |-
|
|
|
|
provide orcli commands without further separators (see examples below)
|
|
|
|
avoid "import" "info" "list" "transform" "export" in file or project names
|
|
|
|
use bash -c to execute custom commands
|
|
|
|
required: true
|
|
|
|
flags:
|
|
|
|
- long: --memory
|
|
|
|
help: maximum RAM for OpenRefine java heap space
|
|
|
|
arg: ram
|
|
|
|
default: "2048M"
|
|
|
|
- long: --port
|
|
|
|
help: PORT on which OpenRefine should listen
|
|
|
|
arg: port
|
|
|
|
default: "3333"
|
|
|
|
- long: --quiet
|
|
|
|
short: -q
|
|
|
|
help: suppress log output, print errors only
|
|
|
|
examples:
|
|
|
|
- |-
|
|
|
|
orcli batch \\\\
|
|
|
|
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
|
|
|
|
info "duplicates" \\\\
|
|
|
|
export tsv "duplicates"
|
|
|
|
- |-
|
|
|
|
orcli batch --memory "2000M" --port "3334" \\\\
|
|
|
|
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
|
|
|
|
export tsv "duplicates"
|
|
|
|
- |-
|
|
|
|
orcli batch --quiet \\\\
|
|
|
|
import csv "https://git.io/fj5hF" --projectName "duplicates" \\\\
|
|
|
|
export tsv "duplicates" --output "output/duplicates.tsv" \\\\
|
|
|
|
bash -c 'wc -l output/*; echo "finished" in \$SECONDS seconds'
|
|
|
|
|
2022-04-12 12:54:16 +02:00
|
|
|
- name: import
|
2022-04-13 13:36:23 +02:00
|
|
|
help: import commands
|
|
|
|
|
|
|
|
commands:
|
2022-04-13 23:55:47 +02:00
|
|
|
- name: csv
|
|
|
|
help: import comma-separated values (CSV)
|
|
|
|
args:
|
|
|
|
- name: file
|
|
|
|
help: Path to one or more files or URLs. When FILE is -, read standard input.
|
|
|
|
default: "-"
|
|
|
|
repeatable: true
|
|
|
|
flags:
|
|
|
|
- long: --separator
|
|
|
|
help: character(s) that separates columns
|
|
|
|
arg: separator
|
|
|
|
default: ","
|
|
|
|
- long: --encoding
|
|
|
|
help: set character encoding
|
|
|
|
arg: encoding
|
|
|
|
- long: --trimStrings
|
|
|
|
help: trim leading & trailing whitespace from strings
|
|
|
|
- long: --projectName
|
|
|
|
arg: projectName
|
|
|
|
help: set a name for the OpenRefine project
|
2022-04-20 12:27:53 +02:00
|
|
|
- long: --quiet
|
|
|
|
short: -q
|
|
|
|
help: suppress log output, print errors only
|
2022-04-13 23:55:47 +02:00
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli import csv "file"
|
|
|
|
- orcli import csv "file1" "file2"
|
|
|
|
- cat "file" | orcli import csv
|
|
|
|
- orcli import csv "https://git.io/fj5hF"
|
|
|
|
- |-
|
|
|
|
orcli import csv "file" \\\\
|
|
|
|
--separator ";" \\\\
|
|
|
|
--encoding "ISO-8859-1" \\\\
|
|
|
|
--trimStrings \\\\
|
|
|
|
--projectName "duplicates"
|
2022-04-12 12:54:16 +02:00
|
|
|
|
2022-03-25 23:02:28 +01:00
|
|
|
- name: list
|
|
|
|
help: list projects on OpenRefine server
|
2022-04-20 12:27:53 +02:00
|
|
|
flags:
|
|
|
|
- long: --quiet
|
|
|
|
short: -q
|
|
|
|
help: suppress log output, print errors only
|
2022-04-14 12:06:54 +02:00
|
|
|
|
|
|
|
- name: info
|
|
|
|
help: show project metadata
|
|
|
|
args:
|
|
|
|
- name: project
|
|
|
|
help: project name or id
|
|
|
|
required: true
|
2022-04-20 12:27:53 +02:00
|
|
|
flags:
|
|
|
|
- long: --quiet
|
|
|
|
short: -q
|
|
|
|
help: suppress log output, print errors only
|
2022-04-14 12:06:54 +02:00
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- info "duplicates"
|
2022-04-14 12:06:54 +02:00
|
|
|
- info 1234567890123
|
|
|
|
|
|
|
|
- name: export
|
|
|
|
help: export commands
|
|
|
|
|
|
|
|
commands:
|
|
|
|
- name: tsv
|
|
|
|
help: export tab-separated values (TSV)
|
|
|
|
args:
|
|
|
|
- name: project
|
|
|
|
help: project name or id
|
|
|
|
required: true
|
|
|
|
flags:
|
|
|
|
- long: --output
|
|
|
|
help: Write to file instead of stdout
|
|
|
|
arg: file
|
|
|
|
- long: --encoding
|
|
|
|
help: set character encoding
|
|
|
|
arg: encoding
|
|
|
|
default: "UTF-8"
|
2022-04-20 12:27:53 +02:00
|
|
|
- long: --quiet
|
|
|
|
short: -q
|
|
|
|
help: suppress log output, print errors only
|
2022-04-14 12:06:54 +02:00
|
|
|
examples:
|
2022-04-20 12:27:53 +02:00
|
|
|
- orcli export tsv "duplicates"
|
|
|
|
- orcli export tsv "duplicates" --output "duplicates.tsv"
|