name: orcli help: OpenRefine command-line interface written in Bash version: 0.1.0 footer: https://github.com/opencultureconsulting/orcli dependencies: curl: https://curl.se jq: https://github.com/stedolan/jq environment_variables: - name: OPENREFINE_URL help: URL to OpenRefine server default: "http://localhost:3333" examples: - orcli import csv "https://git.io/fj5hF" --projectName "duplicates" - orcli list - orcli info "duplicates" - orcli transform "duplicates" "https://git.io/fj5ju" - orcli search "duplicates" "^Ben" - orcli export tsv "duplicates" - orcli export tsv "duplicates" --output "duplicates.tsv" - orcli delete "duplicates" - orcli run --interactive - |- orcli run << EOF orcli import csv "https://git.io/fj5hF" --projectName "duplicates" orcli transform "duplicates" "https://git.io/fj5ju" orcli export tsv "duplicates" EOF commands: - name: completions help: |- Generate bash completions Usage: source <(orcli completions) - name: delete help: delete OpenRefine project args: - &project name: project help: project name or id required: true flags: - long: --force short: -f help: delete all projects with the same name - &quiet long: --quiet short: -q help: suppress log output, print errors only examples: - orcli delete "duplicates" - orcli delete "duplicates" --force - orcli delete 1234567890123 - for p in \$(orcli list); do orcli delete \${p:0:13}; done - name: import help: commands to create OpenRefine projects from files or URLs commands: - name: csv help: import character-separated values (CSV) args: - &file name: file help: Path to one or more files or URLs. When FILE is -, read standard input. default: "-" repeatable: true flags: - &separator long: --separator help: character(s) that separates columns arg: separator default: "," - &blankCellsAsStrings long: --blankCellsAsStrings help: store blank cells as empty strings instead of nulls - &columnNames long: --columnNames help: |- set column names (comma separated) hint: add --ignoreLines 1 to overwrite existing header row arg: columnNames conflicts: [--headerLines] - &encoding_import long: --encoding help: set character encoding arg: encoding - &guessCellValueTypes long: --guessCellValueTypes help: attempt to parse cell text into numbers - &headerLines long: --headerLines help: parse x line(s) as column headers arg: headerLines default: "1" conflicts: [--columnNames] - &ignoreLines long: --ignoreLines help: ignore first x line(s) at beginning of file arg: ignoreLines default: "-1" - &ignoreQuoteCharacter long: --ignoreQuoteCharacter help: do not use any quote character to enclose cells containing column separators - &includeFileSources long: --includeFileSources help: add column with file source - &includeArchiveFileName long: --includeArchiveFileName help: add column with archive file name - &limit long: --limit help: load at most x row(s) of data arg: limit default: "-1" - "eCharacter long: --quoteCharacter help: quote character to enclose cells containing column separators arg: quoteCharacter default: '\\\"' - &skipBlankRows long: --skipBlankRows help: do not store blank rows - &skipDataLines long: --skipDataLines help: discard initial x row(s) of data arg: skipDataLines default: "0" - &trimStrings long: --trimStrings help: trim leading & trailing whitespace from strings - &projectName long: --projectName arg: projectName help: set a name for the OpenRefine project - &projectTags long: --projectTags help: set project tags (comma separated) arg: projectTags - *quiet examples: - orcli import csv "file" - orcli import csv "file1" "file2" - head -n 100 "file" | orcli import csv - orcli import csv "https://git.io/fj5hF" - |- orcli import csv "file" \\\\ --separator ";" \\\\ --columnNames "foo,bar,baz" \\\\ --ignoreLines 1 \\\\ --encoding "ISO-8859-1" \\\\ --limit 100 \\\\ --trimStrings \\\\ --projectName "duplicates" \\\\ --projectTags "test,urgent" - name: tsv help: import tab-separated values (TSV) args: - *file flags: - *blankCellsAsStrings - *columnNames - *encoding_import - *guessCellValueTypes - *headerLines - *ignoreLines - *ignoreQuoteCharacter - *includeFileSources - *includeArchiveFileName - *limit - *quoteCharacter - *skipBlankRows - *skipDataLines - *trimStrings - *projectName - *projectTags - *quiet examples: - orcli import tsv "file" - orcli import tsv "file1" "file2" - head -n 100 "file" | orcli import tsv - orcli import tsv "https://example.com/file.tsv" - |- orcli import tsv "file" \\\\ --separator ";" \\\\ --columnNames "foo,bar,baz" \\\\ --ignoreLines 1 \\\\ --encoding "ISO-8859-1" \\\\ --limit 100 \\\\ --trimStrings \\\\ --projectName "duplicates" \\\\ --projectTags "test,urgent" - name: list help: list projects on OpenRefine server - name: info help: show OpenRefine project's metadata args: - *project examples: - orcli info "duplicates" - orcli info 1234567890123 - orcli info "duplicates" | jq -r .columns[] - name: search help: |- apply regex to each column and print matches in flattened tsv format output: index column value note that any exporter supports search by using --facets (see examples) args: - *project - ®ex name: regex help: search term (regular expression, case-sensitive) flags: - long: --index help: print column values instead of row.index in the first column of the output arg: column examples: - orcli search "duplicates" "^Ben" - orcli search 1234567890123 "^Ben" - orcli search "duplicates" "^F" --index "email" - orcli search "duplicates" | column -t -s \$'\t' - |- orcli export tsv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter(row.columnNames,cn,cells[cn].value.find(/^Ben/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]' - |- orcli export tsv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]' - name: test help: run functional tests on tmp OpenRefine workspace - name: transform help: apply undo/redo JSON file(s) to an OpenRefine project args: - *project - *file flags: - *quiet examples: - orcli transform "duplicates" "history.json" - cat "history.json" | orcli transform "duplicates" - orcli transform "duplicates" "https://git.io/fj5ju" - orcli transform 1234567890123 "history.json" - name: export help: commands to export data from OpenRefine projects to files commands: - name: tsv help: export tab-separated values (TSV) args: - *project flags: - &facets long: --facets help: filter result set by providing an OpenRefine facets config in json arg: facets default: "[]" - &output long: --output help: Write to file instead of stdout arg: file - &encoding_export long: --encoding help: set character encoding arg: encoding default: "UTF-8" - *quiet examples: - orcli export tsv "duplicates" - orcli export tsv "duplicates" --output "duplicates.tsv" - orcli export tsv "duplicates" --encoding "ISO-8859-1" - |- orcli export tsv "duplicates" --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "query": "^Ben" } ]' - |- orcli export tsv "duplicates" --facets '[{ "type": "list", "expression": "grel:filter([\\\\"gender\\\\",\\\\"purchase\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0", "columnName": "", "selection": [{"v": {"v": true}}] }]' - name: template help: export to any text format by providing your own GREL template args: - *project - name: file help: Path to row/record template file or URL. When FILE is -, read standard input. default: "-" flags: - long: --separator help: insert character(s) between rows/records arg: separator - long: --prefix help: insert character(s) at the beginning of the file arg: prefix - long: --suffix help: insert character(s) at the end of the file arg: suffix - long: --mode help: specify if template shall be applied to each row or record arg: mode allowed: [rows, records] default: "rows" - *facets - *output - *encoding_export - *quiet examples: - orcli export template "duplicates" "template.txt" - cat "template.txt" | orcli export template "duplicates" - orcli export template "duplicates" "https://example.com/template.txt" - orcli export template "duplicates" "template.txt" --output "duplicates.tsv" - |- orcli export template "duplicates" \\\\ <<< '{ "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \\\\ --prefix '{ "events" : [' \\\\ --separator , \\\\ --mode records \\\\ --suffix ]}$'\\\n' \\\\ --facets '[ { "type": "text", "columnName": "name", "mode": "regex", "caseSensitive": false, "query": "^Ben" } ]' \\\\ | jq - name: run help: run tmp OpenRefine workspace and execute shell script(s) args: - *file flags: - long: --memory help: maximum RAM for OpenRefine java heap space arg: ram default: "2048M" - long: --port help: PORT on which OpenRefine should listen arg: port default: "3333" - long: --interactive help: do not exit on error and keep bash shell open - *quiet examples: - orcli run --interactive - |- orcli run << EOF orcli import csv "https://git.io/fj5hF" --projectName "duplicates" orcli transform "duplicates" "https://git.io/fj5ju" orcli export tsv "duplicates" EOF - |- orcli run --memory "2000M" --port "3334" << EOF orcli import csv "https://git.io/fj5hF" --projectName "duplicates" & orcli import csv "https://git.io/fj5hF" --projectName "copy" & wait echo "finished import" orcli export csv "duplicates" --output duplicates.csv & orcli export tsv "duplicates" --output duplicates.tsv & wait wc duplicates* EOF - |- orcli run --interactive "file1.sh" "file2.sh" - << EOF echo "finished in \$SECONDS seconds" EOF