orcli/orcli

5373 lines
135 KiB
Bash
Executable File

#!/usr/bin/env bash
# This script was generated by bashly 1.1.3 (https://bashly.dannyb.co)
# Modifying it manually is not recommended
# :wrapper.bash3_bouncer
if [[ "${BASH_VERSINFO:-0}" -lt 4 ]]; then
printf "bash version 4 or higher is required\n" >&2
exit 1
fi
# :command.master_script
# :command.version_command
version_command() {
echo "$version"
}
# :command.usage
orcli_usage() {
if [[ -n $long_usage ]]; then
printf "orcli - OpenRefine command-line interface written in Bash\n"
echo
else
printf "orcli - OpenRefine command-line interface written in Bash\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli COMMAND\n"
printf " orcli [COMMAND] --help | -h\n"
printf " orcli --version | -v\n"
echo
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s Generate bash completions\n" "completions"
printf " %s delete OpenRefine project\n" "delete "
printf " %s commands to create OpenRefine projects from files or URLs\n" "import "
printf " %s list projects on OpenRefine server\n" "list "
printf " %s show OpenRefine project's metadata\n" "info "
printf " %s apply regex to each column and print matches in flattened tsv format\n" "search "
printf " %s commands to sort OpenRefine projects\n" "sort "
printf " %s run functional tests on tmp OpenRefine workspace\n" "test "
printf " %s apply undo/redo JSON file(s) to an OpenRefine project\n" "transform "
printf " %s commands to export data from OpenRefine projects to files\n" "export "
printf " %s run tmp OpenRefine workspace and execute shell script(s)\n" "run "
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
printf " %s\n" "--version, -v"
printf " Show version number\n"
echo
# :command.usage_environment_variables
printf "%s\n" "Environment Variables:"
# :environment_variable.usage
printf " %s\n" "OPENREFINE_URL"
printf " URL to OpenRefine server\n"
printf " Default: http://localhost:3333\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n"
printf " orcli list\n"
printf " orcli info \"duplicates\"\n"
printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n"
printf " orcli search \"duplicates\" \"^Ben\"\n"
printf " orcli sort columns \"duplicates\"\n"
printf " orcli export tsv \"duplicates\"\n"
printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli delete \"duplicates\"\n"
printf " orcli run --interactive\n"
printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n orcli export tsv \"duplicates\"\n EOF\n"
echo
# :command.footer
printf "https://github.com/opencultureconsulting/orcli\n"
echo
fi
}
# :command.usage
orcli_completions_usage() {
if [[ -n $long_usage ]]; then
printf "orcli completions\n"
echo
printf " Generate bash completions\n Usage: source <(orcli completions)\n"
echo
else
printf "orcli completions - Generate bash completions\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli completions\n"
printf " orcli completions --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
fi
}
# :command.usage
orcli_delete_usage() {
if [[ -n $long_usage ]]; then
printf "orcli delete - delete OpenRefine project\n"
echo
else
printf "orcli delete - delete OpenRefine project\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli delete PROJECT [OPTIONS]\n"
printf " orcli delete --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--force, -f"
printf " delete all projects with the same name\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli delete \"duplicates\"\n"
printf " orcli delete \"duplicates\" --force\n"
printf " orcli delete 1234567890123\n"
printf " for p in \$(orcli list); do orcli delete \${p:0:13}; done\n"
echo
fi
}
# :command.usage
orcli_import_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import - commands to create OpenRefine projects from files or URLs\n"
echo
else
printf "orcli import - commands to create OpenRefine projects from files or URLs\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import COMMAND\n"
printf " orcli import [COMMAND] --help | -h\n"
echo
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s import character-separated values (CSV)\n" "csv "
printf " %s import tab-separated values (TSV)\n" "tsv "
printf " %s import JSON\n" "json "
printf " %s import JSON Lines / newline-delimited JSON\n" "jsonl"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
fi
}
# :command.usage
orcli_import_csv_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import csv - import character-separated values (CSV)\n"
echo
else
printf "orcli import csv - import character-separated values (CSV)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import csv [FILE...] [OPTIONS]\n"
printf " orcli import csv --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--separator SEPARATOR"
printf " character(s) that separates columns\n"
printf " Default: ,\n"
echo
# :flag.usage
printf " %s\n" "--blankCellsAsStrings"
printf " store blank cells as empty strings instead of nulls\n"
echo
# :flag.usage
printf " %s\n" "--columnNames COLUMNNAMES"
printf " set column names (comma separated)\n hint: add --ignoreLines 1 to overwrite existing header row\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--headerLines HEADERLINES"
printf " parse x line(s) as column headers\n"
printf " Default: 1\n"
echo
# :flag.usage
printf " %s\n" "--ignoreLines IGNORELINES"
printf " ignore first x line(s) at beginning of file\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--ignoreQuoteCharacter"
printf " do not use any quote character to enclose cells containing column separators\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--quoteCharacter QUOTECHARACTER"
printf " quote character to enclose cells containing column separators\n"
printf " Default: \\\"\n"
echo
# :flag.usage
printf " %s\n" "--skipBlankRows"
printf " do not store blank rows\n"
echo
# :flag.usage
printf " %s\n" "--skipDataLines SKIPDATALINES"
printf " discard initial x row(s) of data\n"
printf " Default: 0\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import csv \"file\"\n"
printf " orcli import csv \"file1\" \"file2\"\n"
printf " head -n 100 \"file\" | orcli import csv\n"
printf " orcli import csv \"https://git.io/fj5hF\"\n"
printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_import_tsv_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import tsv - import tab-separated values (TSV)\n"
echo
else
printf "orcli import tsv - import tab-separated values (TSV)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import tsv [FILE...] [OPTIONS]\n"
printf " orcli import tsv --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--blankCellsAsStrings"
printf " store blank cells as empty strings instead of nulls\n"
echo
# :flag.usage
printf " %s\n" "--columnNames COLUMNNAMES"
printf " set column names (comma separated)\n hint: add --ignoreLines 1 to overwrite existing header row\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--headerLines HEADERLINES"
printf " parse x line(s) as column headers\n"
printf " Default: 1\n"
echo
# :flag.usage
printf " %s\n" "--ignoreLines IGNORELINES"
printf " ignore first x line(s) at beginning of file\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--ignoreQuoteCharacter"
printf " do not use any quote character to enclose cells containing column separators\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--quoteCharacter QUOTECHARACTER"
printf " quote character to enclose cells containing column separators\n"
printf " Default: \\\"\n"
echo
# :flag.usage
printf " %s\n" "--skipBlankRows"
printf " do not store blank rows\n"
echo
# :flag.usage
printf " %s\n" "--skipDataLines SKIPDATALINES"
printf " discard initial x row(s) of data\n"
printf " Default: 0\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import tsv \"file\"\n"
printf " orcli import tsv \"file1\" \"file2\"\n"
printf " head -n 100 \"file\" | orcli import tsv\n"
printf " orcli import tsv \"https://example.com/file.tsv\"\n"
printf " orcli import tsv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_import_json_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import json - import JSON\n"
echo
else
printf "orcli import json - import JSON\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import json [FILE...] [OPTIONS]\n"
printf " orcli import json --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--recordPath JSON"
printf " specify record path elements in JSON array\n"
printf " Default: [ \"_\" , \"_\" ]\n"
echo
# :flag.usage
printf " %s\n" "--rename"
printf " rename columns after import to remove record path fragments\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--storeEmptyStrings"
printf " preserve empty strings\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import json \"file\"\n"
printf " orcli import json \"file1\" \"file2\"\n"
printf " orcli import json \"https://example.com/file.json\"\n"
printf " orcli import json \"file\" \\\\\n --recordPath '[ \"_\", \"rows\", \"_\" ]' \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_import_jsonl_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import jsonl - import JSON Lines / newline-delimited JSON\n"
echo
else
printf "orcli import jsonl - import JSON Lines / newline-delimited JSON\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import jsonl [FILE...] [OPTIONS]\n"
printf " orcli import jsonl --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--rename"
printf " rename columns after import to remove record path fragments\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--storeEmptyStrings"
printf " preserve empty strings\n"
echo
# :flag.usage
printf " %s\n" "--trimStrings"
printf " trim leading & trailing whitespace from strings\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import jsonl \"file\"\n"
printf " orcli import jsonl \"file1\" \"file2\"\n"
printf " orcli import jsonl \"https://example.com/file.json\"\n"
printf " orcli import jsonl --rename <(orcli export jsonl \"duplicates\")\n"
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_list_usage() {
if [[ -n $long_usage ]]; then
printf "orcli list - list projects on OpenRefine server\n"
echo
else
printf "orcli list - list projects on OpenRefine server\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli list\n"
printf " orcli list --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
fi
}
# :command.usage
orcli_info_usage() {
if [[ -n $long_usage ]]; then
printf "orcli info - show OpenRefine project's metadata\n"
echo
else
printf "orcli info - show OpenRefine project's metadata\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli info PROJECT\n"
printf " orcli info --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli info \"duplicates\"\n"
printf " orcli info 1234567890123\n"
printf " orcli info \"duplicates\" | jq -r .columns[]\n"
echo
fi
}
# :command.usage
orcli_search_usage() {
if [[ -n $long_usage ]]; then
printf "orcli search\n"
echo
printf " apply regex to each column and print matches in flattened tsv format\n output: index column value\n note that any exporter supports search by using --facets (see examples)\n"
echo
else
printf "orcli search - apply regex to each column and print matches in flattened tsv format\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli search PROJECT [REGEX] [OPTIONS]\n"
printf " orcli search --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--index COLUMN"
printf " print column values instead of row.index in the first column of the output\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :argument.usage
printf " %s\n" "REGEX"
printf " search term (regular expression, case-sensitive)\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli search \"duplicates\" \"^Ben\"\n"
printf " orcli search 1234567890123 \"^Ben\"\n"
printf " orcli search \"duplicates\" \"^F\" --index \"email\"\n"
printf " orcli search \"duplicates\" | column -t -s \$'\t'\n"
printf " orcli export tsv \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter(row.columnNames,cn,cells[cn].value.find(/^Ben/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
printf " orcli export tsv \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
echo
fi
}
# :command.usage
orcli_sort_usage() {
if [[ -n $long_usage ]]; then
printf "orcli sort - commands to sort OpenRefine projects\n"
echo
else
printf "orcli sort - commands to sort OpenRefine projects\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli sort COMMAND\n"
printf " orcli sort [COMMAND] --help | -h\n"
echo
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s re-order columns alphabetically\n" "columns"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
fi
}
# :command.usage
orcli_sort_columns_usage() {
if [[ -n $long_usage ]]; then
printf "orcli sort columns - re-order columns alphabetically\n"
echo
else
printf "orcli sort columns - re-order columns alphabetically\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli sort columns PROJECT [OPTIONS]\n"
printf " orcli sort columns --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--first COLUMN (repeatable)"
printf " set key column(s)\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli sort columns \"duplicates\"\n"
printf " orcli sort columns \"duplicates\" --first name\n"
echo
fi
}
# :command.usage
orcli_test_usage() {
if [[ -n $long_usage ]]; then
printf "orcli test - run functional tests on tmp OpenRefine workspace\n"
echo
else
printf "orcli test - run functional tests on tmp OpenRefine workspace\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli test\n"
printf " orcli test --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
fi
}
# :command.usage
orcli_transform_usage() {
if [[ -n $long_usage ]]; then
printf "orcli transform - apply undo/redo JSON file(s) to an OpenRefine project\n"
echo
else
printf "orcli transform - apply undo/redo JSON file(s) to an OpenRefine project\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli transform PROJECT [FILE...] [OPTIONS]\n"
printf " orcli transform --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli transform \"duplicates\" \"history.json\"\n"
printf " cat \"history.json\" | orcli transform \"duplicates\"\n"
printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n"
printf " orcli transform 1234567890123 \"history.json\"\n"
echo
fi
}
# :command.usage
orcli_export_usage() {
if [[ -n $long_usage ]]; then
printf "orcli export - commands to export data from OpenRefine projects to files\n"
echo
else
printf "orcli export - commands to export data from OpenRefine projects to files\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli export COMMAND\n"
printf " orcli export [COMMAND] --help | -h\n"
echo
# :command.usage_commands
printf "%s\n" "Commands:"
printf " %s export JSON Lines / newline-delimited JSON\n" "jsonl "
printf " %s export comma-separated values (CSV)\n" "csv "
printf " %s export tab-separated values (TSV)\n" "tsv "
printf " %s export to any text format by providing your own GREL template\n" "template"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
fi
}
# :command.usage
orcli_export_jsonl_usage() {
if [[ -n $long_usage ]]; then
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
echo
else
printf "orcli export jsonl - export JSON Lines / newline-delimited JSON\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli export jsonl PROJECT [OPTIONS]\n"
printf " orcli export jsonl --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--mode MODE"
printf " specify if project contains multi-row records\n"
printf " Allowed: rows, records\n"
printf " Default: rows\n"
echo
# :flag.usage
printf " %s\n" "--separator SEPARATOR"
printf " character(s) that separates multiple values in one cell (row mode only)\n"
echo
# :flag.usage
printf " %s\n" "--facets FACETS"
printf " filter result set by providing an OpenRefine facets config in json\n"
printf " Default: []\n"
echo
# :flag.usage
printf " %s\n" "--output FILE"
printf " Write to file instead of stdout\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
printf " Default: UTF-8\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli export jsonl \"duplicates\"\n"
printf " orcli export jsonl \"duplicates\" --output \"duplicates.jsonl\"\n"
printf " orcli export jsonl \"duplicates\" --separator ' '\n"
printf " orcli export jsonl \"duplicates\" --mode records\n"
printf " orcli export jsonl \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
printf " orcli export jsonl \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
echo
fi
}
# :command.usage
orcli_export_csv_usage() {
if [[ -n $long_usage ]]; then
printf "orcli export csv - export comma-separated values (CSV)\n"
echo
else
printf "orcli export csv - export comma-separated values (CSV)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli export csv PROJECT [OPTIONS]\n"
printf " orcli export csv --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--separator SEPARATOR"
printf " character(s) that separates columns\n"
printf " Default: ,\n"
echo
# :flag.usage
printf " %s\n" "--select COLUMNS"
printf " filter result set to one or more columns (comma separated)\n example: --select \"foo,bar,baz\"\n"
echo
# :flag.usage
printf " %s\n" "--mode MODE"
printf " specify if project contains multi-row records\n"
printf " Allowed: rows, records\n"
printf " Default: rows\n"
echo
# :flag.usage
printf " %s\n" "--facets FACETS"
printf " filter result set by providing an OpenRefine facets config in json\n"
printf " Default: []\n"
echo
# :flag.usage
printf " %s\n" "--output FILE"
printf " Write to file instead of stdout\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
printf " Default: UTF-8\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli export csv \"duplicates\"\n"
printf " orcli export csv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli export csv \"duplicates\" --separator \";\"\n"
printf " orcli export csv \"duplicates\" --encoding \"ISO-8859-1\"\n"
printf " orcli export csv \"duplicates\" --select \"name,email,purchase\"\n"
printf " orcli export csv \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
printf " orcli export csv \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
echo
fi
}
# :command.usage
orcli_export_tsv_usage() {
if [[ -n $long_usage ]]; then
printf "orcli export tsv - export tab-separated values (TSV)\n"
echo
else
printf "orcli export tsv - export tab-separated values (TSV)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli export tsv PROJECT [OPTIONS]\n"
printf " orcli export tsv --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--select COLUMNS"
printf " filter result set to one or more columns (comma separated)\n example: --select \"foo,bar,baz\"\n"
echo
# :flag.usage
printf " %s\n" "--mode MODE"
printf " specify if project contains multi-row records\n"
printf " Allowed: rows, records\n"
printf " Default: rows\n"
echo
# :flag.usage
printf " %s\n" "--facets FACETS"
printf " filter result set by providing an OpenRefine facets config in json\n"
printf " Default: []\n"
echo
# :flag.usage
printf " %s\n" "--output FILE"
printf " Write to file instead of stdout\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
printf " Default: UTF-8\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli export tsv \"duplicates\"\n"
printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli export tsv \"duplicates\" --encoding \"ISO-8859-1\"\n"
printf " orcli export tsv \"duplicates\" --select \"name,email,purchase\"\n"
printf " orcli export tsv \"duplicates\" --facets '[ { \"type\": \"text\", \"columnName\":\n \"name\", \"mode\": \"regex\", \"caseSensitive\": false, \"invert\": false, \"query\":\n \"^Ben\" } ]'\n"
printf " orcli export tsv \"duplicates\" --facets '[{ \"type\": \"list\", \"expression\":\n \"grel:filter([\\\\\"gender\\\\\",\\\\\"purchase\\\\\"],cn,cells[cn].value.find(/^F/).length()>0).length()>0\",\n \"columnName\": \"\", \"selection\": [{\"v\": {\"v\": true}}] }]'\n"
echo
fi
}
# :command.usage
orcli_export_template_usage() {
if [[ -n $long_usage ]]; then
printf "orcli export template - export to any text format by providing your own GREL template\n"
echo
else
printf "orcli export template - export to any text format by providing your own GREL template\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli export template PROJECT [FILE] [OPTIONS]\n"
printf " orcli export template --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--separator SEPARATOR"
printf " insert character(s) between rows/records\n"
echo
# :flag.usage
printf " %s\n" "--prefix PREFIX"
printf " insert character(s) at the beginning of the file\n"
echo
# :flag.usage
printf " %s\n" "--suffix SUFFIX"
printf " insert character(s) at the end of the file\n"
echo
# :flag.usage
printf " %s\n" "--mode MODE"
printf " specify if project contains multi-row records\n"
printf " Allowed: rows, records\n"
printf " Default: rows\n"
echo
# :flag.usage
printf " %s\n" "--facets FACETS"
printf " filter result set by providing an OpenRefine facets config in json\n"
printf " Default: []\n"
echo
# :flag.usage
printf " %s\n" "--output FILE"
printf " Write to file instead of stdout\n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
printf " Default: UTF-8\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :argument.usage
printf " %s\n" "FILE"
printf " Path to row/record template file or URL. When FILE is -, read standard\n input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli export template \"duplicates\" \"template.txt\"\n"
printf " cat \"template.txt\" | orcli export template \"duplicates\"\n"
printf " orcli export template \"duplicates\" \"https://example.com/template.txt\"\n"
printf " orcli export template \"duplicates\" \"template.txt\" --output \"duplicates.tsv\"\n"
printf " orcli export template \"duplicates\" \\\\\n <<< '{ \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" :\n {{jsonize(cells[\"purchase\"].value)}} }' \\\\\n --prefix '{ \"events\" : [' \\\\\n --separator , \\\\\n --mode records \\\\\n --suffix ]}$'\\\n' \\\\\n --facets '[ { \"type\": \"text\", \"columnName\": \"name\", \"mode\": \"regex\",\n \"caseSensitive\": false, \"invert\": false, \"query\": \"^Ben\" } ]' \\\\\n | jq\n"
echo
fi
}
# :command.usage
orcli_run_usage() {
if [[ -n $long_usage ]]; then
printf "orcli run - run tmp OpenRefine workspace and execute shell script(s)\n"
echo
else
printf "orcli run - run tmp OpenRefine workspace and execute shell script(s)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli run [FILE...] [OPTIONS]\n"
printf " orcli run --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--memory RAM"
printf " maximum RAM for OpenRefine java heap space\n"
printf " Default: 2048M\n"
echo
# :flag.usage
printf " %s\n" "--port PORT"
printf " PORT on which OpenRefine should listen\n"
printf " Default: 3333\n"
echo
# :flag.usage
printf " %s\n" "--interactive"
printf " do not exit on error and keep bash shell open\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli run --interactive\n"
printf " orcli run << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\"\n orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n orcli export tsv \"duplicates\"\n EOF\n"
printf " orcli run --memory \"2000M\" --port \"3334\" << EOF\n orcli import csv \"https://git.io/fj5hF\" --projectName \"duplicates\" &\n orcli import csv \"https://git.io/fj5hF\" --projectName \"copy\" &\n wait\n echo \"finished import\"\n orcli export csv \"duplicates\" --output duplicates.csv &\n orcli export tsv \"duplicates\" --output duplicates.tsv &\n wait\n wc duplicates*\n EOF\n"
printf " orcli run --interactive \"file1.sh\" \"file2.sh\" - << EOF\n echo \"finished in \$SECONDS seconds\"\n EOF\n"
echo
fi
}
# :command.normalize_input
normalize_input() {
local arg flags
while [[ $# -gt 0 ]]; do
arg="$1"
if [[ $arg =~ ^(--[a-zA-Z0-9_\-]+)=(.+)$ ]]; then
input+=("${BASH_REMATCH[1]}")
input+=("${BASH_REMATCH[2]}")
elif [[ $arg =~ ^(-[a-zA-Z0-9])=(.+)$ ]]; then
input+=("${BASH_REMATCH[1]}")
input+=("${BASH_REMATCH[2]}")
elif [[ $arg =~ ^-([a-zA-Z0-9][a-zA-Z0-9]+)$ ]]; then
flags="${BASH_REMATCH[1]}"
for ((i = 0; i < ${#flags}; i++)); do
input+=("-${flags:i:1}")
done
else
input+=("$arg")
fi
shift
done
}
# :command.inspect_args
inspect_args() {
if ((${#args[@]})); then
readarray -t sorted_keys < <(printf '%s\n' "${!args[@]}" | sort)
echo args:
for k in "${sorted_keys[@]}"; do echo "- \${args[$k]} = ${args[$k]}"; done
else
echo args: none
fi
if ((${#other_args[@]})); then
echo
echo other_args:
echo "- \${other_args[*]} = ${other_args[*]}"
for i in "${!other_args[@]}"; do
echo "- \${other_args[$i]} = ${other_args[$i]}"
done
fi
if ((${#deps[@]})); then
readarray -t sorted_keys < <(printf '%s\n' "${!deps[@]}" | sort)
echo
echo deps:
for k in "${sorted_keys[@]}"; do echo "- \${deps[$k]} = ${deps[$k]}"; done
fi
}
# :command.user_lib
# src/lib/get_csrf.sh
# get CSRF token (introduced in OpenRefine 3.3)
# shellcheck shell=bash
function get_csrf() {
local response
if ! response="$(curl -fs "${OPENREFINE_URL}/command/core/get-csrf-token")"; then
if ! response="$(curl -fs "${OPENREFINE_URL}/command/core/get-version")"; then
error "no OpenRefine reachable/running at ${OPENREFINE_URL}"
fi
else
if ! [[ "${response}" == '{"token":"'* ]]; then
error "getting CSRF token failed!"
fi
echo "?csrf_token=$(echo "$response" | cut -d \" -f 4)"
fi
}
# src/lib/get_id.sh
# get project id (derived from project name if needed)
# shellcheck shell=bash
function get_id() {
local response projects projectid
if ! response="$(curl -fs --get "${OPENREFINE_URL}/command/core/get-all-project-metadata")"; then
error "no OpenRefine reachable/running at ${OPENREFINE_URL}"
fi
if ! projects="$(echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"' | grep -e ":$1$" -e "^$1:")"; then
error "project $1 not found"
fi
projectid=$(echo "$projects" | cut -d : -f 1)
if ! [[ "${#projectid}" == 13 ]]; then
error "multiple projects found" "$projects"
fi
echo "$projectid"
}
function get_ids() {
local response projects
if ! response="$(curl -fs --get "${OPENREFINE_URL}/command/core/get-all-project-metadata")"; then
error "no OpenRefine reachable/running at ${OPENREFINE_URL}"
fi
if ! projects="$(echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"' | grep -e ":$1$" -e "^$1:")"; then
error "project $1 not found"
fi
echo "$projects" | cut -d : -f 1
}
# src/lib/init_import.sh
# common import tasks to support multiple files and URLs
# shellcheck shell=bash disable=SC2154
function init_import() {
# catch args, convert the space delimited string to an array
local files=()
eval "files=(${args[file]})"
# create tmp directory
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' 0 2 3 15
# download files if name starts with http:// or https://
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "download of ${files[$i]} failed!"
fi
files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# read pipes if name starts with /dev/fd
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "/dev/fd"* ]]; then
if ! cat "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "reading of ${files[$i]} failed!"
fi
files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# create a zip archive if there are multiple files
if [[ ${#files[@]} -gt 1 ]]; then
file="$tmpdir/Untitled.zip"
if ! zip --quiet --must-match "$file" "${files[@]}"; then
error "creating zip archive with ${files[*]} failed!"
fi
else
file="${files[0]}"
fi
}
# src/lib/interactive.sh
# shellcheck shell=bash
function interactive() {
cat <<'EOF'
PS1="(orcli) [\u@\h \W]\$ "
source <(orcli completions)
echo '================================================================'
echo 'Interactive Bash shell with OpenRefine running in the background'
echo 'Use the "orcli" command and tab completion to control OpenRefine'
echo 'Type "history -a FILE" to write out your session history'
echo 'Type "exit" or CTRL-D to destroy temporary OpenRefine workspace'
echo '================================================================'
EOF
}
# src/lib/logging.sh
# print messages to STDERR
# shellcheck shell=bash
function error() {
echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] ERROR: $1"
shift
for msg in "$@"; do echo >&2 " $msg"; done
if [[ -f "$OPENREFINE_TMPDIR/openrefine.log" ]]; then
echo >&2 "last 50 lines of OpenRefine's server log:"
echo >&2 "-----------------------------------------"
tail >&2 -50 "$OPENREFINE_TMPDIR/openrefine.log"
echo >&2 "-----------------------------------------"
fi
exit 1
}
function log() {
if ! [[ ${args[--quiet]} || $ORCLI_QUIET ]]; then
echo >&2 "[$(date +'%Y-%m-%dT%H:%M:%S')] $1"
shift
for msg in "$@"; do echo >&2 " $msg"; done
fi
}
# src/lib/post_export.sh
# post to export-rows endpoint
# shellcheck shell=bash disable=SC2154
function post_export() {
local curloptions
for d in "$@"; do
curloptions+=("--data-urlencode")
curloptions+=("$d")
done
# support filtering result sets with facets
if [[ ${args[--mode]} == "records" ]]; then
mode="record-based"
else
mode="row-based"
fi
curloptions+=("--data-urlencode")
curloptions+=("engine={\"facets\":${args[--facets]},\"mode\":\"${mode}\"}")
# support file output
if [[ ${args[--output]} ]]; then
if ! mkdir -p "$(dirname "${args[--output]}")"; then
error "unable to create parent directory for ${args[--output]}"
fi
curloptions+=("--output" "${args[--output]}")
fi
# post
if ! curl -fs "${curloptions[@]}" "${OPENREFINE_URL}/command/core/export-rows"; then
error "exporting ${args[project]} failed!"
else
if [[ ${args[--output]} ]]; then
log "exported ${args[project]}" "file: ${args[--output]}" "lines: $(wc -l <"${args[--output]}")"
fi
fi
}
# src/lib/post_import.sh
# post to create-project endpoint and validate
# shellcheck shell=bash disable=SC2154
function post_import() {
local curloptions projectid projectname rows
for d in "$@"; do
curloptions+=("--form-string")
curloptions+=("$d")
done
# basic post data
if [[ ${file} == "-" ]]; then
curloptions+=("--form" "project-file=@-")
else
if ! path=$(readlink -e "${file}"); then
error "cannot open ${file} (no such file)!"
fi
curloptions+=("--form" "project-file=@${path}")
fi
if [[ ${args[--projectName]} ]]; then
curloptions+=("--form-string" "project-name=${args[--projectName]}")
else
if [[ ${file} == "-" ]]; then
name="Untitled"
else
name="$(basename "${path}" | tr '.' ' ')"
fi
curloptions+=("--form-string" "project-name=${name}")
fi
# post
if ! redirect_url="$(curl -fs --write-out "%{redirect_url}\n" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/create-project-from-upload$(get_csrf)")"; then
error "importing ${args[file]} failed!"
fi
# validate
projectid=$(cut -d '=' -f 2 <<<"$redirect_url")
if [[ ${#projectid} != 13 ]]; then
error "importing ${args[file]} failed!"
fi
projectname=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-project-metadata" | tr "," "\n" | grep name | cut -d ":" -f 2)
projectname="${projectname:1:${#projectname}-2}"
rows=$(curl -fs --get --data project="$projectid" --data limit=0 "${OPENREFINE_URL}/command/core/get-rows" | tr "," "\n" | grep total | cut -d ":" -f 2)
if [[ "$rows" = "0" ]]; then
error "import of ${args[file]} contains 0 rows!"
else
log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}"
fi
# json / jsonl --rename
if [[ ${args[--rename]} ]]; then
csrf="$(get_csrf)"
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
for c in "${columns[@]}"; do
if ! curl -fs -o /dev/null --data project="$projectid" --data "oldColumnName=${c}" --data "newColumnName=${c##_ - }" "${OPENREFINE_URL}/command/core/rename-column${csrf}"; then
error "renaming columns in ${projectname} failed!"
fi
done
log "renamed columns in ${projectname}"
fi
}
# src/lib/send_completions.sh
send_completions() {
echo $'# orcli completion -*- shell-script -*-'
echo $''
echo $'# This bash completions script was generated by'
echo $'# completely (https://github.com/dannyben/completely)'
echo $'# Modifying it manually is not recommended'
echo $''
echo $'_orcli_completions_filter() {'
echo $' local words="$1"'
echo $' local cur=${COMP_WORDS[COMP_CWORD]}'
echo $' local result=()'
echo $''
echo $' if [[ "${cur:0:1}" == "-" ]]; then'
echo $' echo "$words"'
echo $' '
echo $' else'
echo $' for word in $words; do'
echo $' [[ "${word:0:1}" != "-" ]] && result+=("$word")'
echo $' done'
echo $''
echo $' echo "${result[*]}"'
echo $''
echo $' fi'
echo $'}'
echo $''
echo $'_orcli_completions() {'
echo $' local cur=${COMP_WORDS[COMP_CWORD]}'
echo $' local compwords=("${COMP_WORDS[@]:1:$COMP_CWORD-1}")'
echo $' local compline="${compwords[*]}"'
echo $''
echo $' case "$compline" in'
echo $' \'export template\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export jsonl\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*\'--mode\')'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "rows records")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export template\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --prefix --quiet --separator --suffix -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'sort columns\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--first --help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --separator -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'completions\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import json\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --separator --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export csv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select --separator -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --mode --output --quiet --select -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'transform\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --quiet -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'delete\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--force --help --quiet -f -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv json jsonl tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'search\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --index -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv jsonl template tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'list\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'info\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'sort\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h columns")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'test\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'run\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --interactive --memory --port --quiet -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' *)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run search sort test transform")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' esac'
echo $'} &&'
echo $'complete -F _orcli_completions orcli'
echo $''
echo $'# ex: filetype=sh'
}
# :command.command_functions
# :command.function
orcli_completions_command() {
# src/completions_command.sh
# Users can now enable bash completion for this script by running:
#
# $ eval "$(orcli completions)"
#
send_completions
}
# :command.function
orcli_delete_command() {
# src/delete_command.sh
# shellcheck shell=bash disable=SC2154
# get project id(s)
if [[ ${args[--force]} ]]; then
projectids="$(get_ids "${args[project]}")"
else
projectids="$(get_id "${args[project]}")"
fi
# loop over one or more project ids
for projectid in ${projectids}; do
# get csrf token and post data
if response="$(curl -fs --data "project=${projectid}" "${OPENREFINE_URL}/command/core/delete-project$(get_csrf)")"; then
response_code="$(jq -r '.code' <<<"$response")"
if [[ $response_code == "ok" ]]; then
log "deleted ${args[project]} (${projectid})"
else
error "deleting ${args[project]} failed!"
fi
else
error "deleting ${args[project]} failed!"
fi
done
}
# :command.function
orcli_import_csv_command() {
# src/import_csv_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_csv_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/line-based/*sv")
options='{ '
options+="\"separator\": \"${args[--separator]}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--columnNames]} ]]; then
IFS=',' read -ra columnNames <<< "${args[--columnNames]}"
options+=', '
options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]"
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--ignoreQuoteCharacter]} ]]; then
options+=', '
options+='"processQuotes": false'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_import_tsv_command() {
# src/import_tsv_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_tsv_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/line-based/*sv")
options='{ '
options+="\"separator\": \"\\t\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--columnNames]} ]]; then
IFS=',' read -ra columnNames <<< "${args[--columnNames]}"
options+=', '
options+="\"columnNames\": [ $(printf ',"'%s'"' "${columnNames[@]}" | cut -c2-) ]"
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--ignoreQuoteCharacter]} ]]; then
options+=', '
options+='"processQuotes": false'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--quoteCharacter]} ]]; then
options+=', '
options+="\"quoteCharacter\": \"${args[--quoteCharacter]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_import_json_command() {
# src/import_json_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_json_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": ${args[--recordPath]}"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_import_jsonl_command() {
# src/import_jsonl_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_jsonl_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/json")
options='{ '
options+="\"recordPath\": [\"_\"]"
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--storeEmptyStrings]} ]]; then
options+=', '
options+='"storeEmptyStrings": true'
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
if [[ ${args[--trimStrings]} ]]; then
options+=', '
options+='"trimStrings": true'
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_list_command() {
# src/list_command.sh
# get all project metadata and reshape json to print a list
# shellcheck shell=bash
if ! response="$(curl -fs --get "${OPENREFINE_URL}/command/core/get-all-project-metadata")"; then
error "no OpenRefine reachable/running at ${OPENREFINE_URL}"
else
if [[ "${response}" == '{"projects":{}}' ]]; then
log "${OPENREFINE_URL} does not contain any projects yet."
else
echo "$response" | jq -r '.projects | keys[] as $k | "\($k):\(.[$k] | .name)"'
fi
fi
}
# :command.function
orcli_info_command() {
# src/info_command.sh
# shellcheck shell=bash disable=SC2154
# get project id
projectid="$(get_id "${args[project]}")"
if ! response="$(curl -fs --get --data "project=${projectid}" "${OPENREFINE_URL}/command/core/get-project-metadata")"; then
error "reading metadata of ${args[project]} failed!"
else
columns="$(curl -fs --get --data "project=${projectid}" "${OPENREFINE_URL}/command/core/get-models" | jq '[ .columnModel | .columns[] | .name ]')"
jq "{ id: ${projectid} } + . + {columns: $columns }" <<<"$response"
fi
}
# :command.function
orcli_search_command() {
# src/search_command.sh
# shellcheck shell=bash disable=SC2154
# get project id
projectid="$(get_id "${args[project]}")"
# set facets config
args['--facets']='[ { "type": "list", "expression": "grel:filter(row.columnNames,cn,cells[cn].value.find(/'
args['--facets']+="${args[regex]}"
args['--facets']+='/).length()>0).length()>0", "columnName": "", "selection": [ { "v": { "v": true } } ] } ]'
# set template
template='{{'
template+='forEach(filter(row.columnNames, cn, cells[cn].value.find(/'
template+="${args[regex]}"
template+='/).length()>0), cn,'
if [[ ${args[--index]} ]]; then
template+='cells["'
template+="${args[--index]}"
template+='"].value'
else
template+='(row.index + 1)'
fi
template+='+ "\t" + cn + "\t" +'
template+='forNonBlank(cells[cn].value, v, if(v.contains(" "), if(v.contains('\''"'\''), '\''"'\'' + v.replace('\''"'\'','\''""'\'') + '\''"'\'', '\''"'\'' + v + '\''"'\''), v),"")'
template+='+ "\n")'
template+='}}'
# assemble specific post data
data+=("project=${projectid}")
data+=("format=template")
data+=("template=${template}")
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_sort_columns_command() {
# src/sort_columns_command.sh
# get columns, sort and transform with re-order columns
# shellcheck shell=bash
# catch args, convert the space delimited string to an array
first=()
eval "first=(${args[--first]})"
# convert to a comma-separated list of elements
columns=$(printf ',"'%s'"' "${first[@]}" | cut -c2-)
# get project id
projectid="$(get_id "${args[project]}")"
csrf="$(get_csrf)"
if ! sorted=$(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq --argjson columns "[ ${columns} ]" '($columns) + ([ .[].name ] | del (.[] | select (. | IN( $columns[] ))) | sort)'); then
error "getting columns in ${args[project]} failed!"
fi
if ! curl -fs -o /dev/null --data project="$projectid" --data "columnNames=${sorted}" "${OPENREFINE_URL}/command/core/reorder-columns${csrf}"; then
error "sorting columns in ${args[project]} failed!"
fi
log "sorted columns in ${args[project]}"
}
# :command.function
orcli_test_command() {
# src/test_command.sh
# shellcheck shell=bash disable=SC2154
# locate orcli and OpenRefine
scriptpath=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
if [[ -x "${scriptpath}/refine" ]]; then
openrefine="${scriptpath}/refine"
else
error "OpenRefine's startup script (refine) not found!" "Did you put orcli in your OpenRefine app dir?"
fi
# check if OpenRefine is already running
if curl -fs "${OPENREFINE_URL}" &>/dev/null; then
error "OpenRefine is already running on port 3333." "Please stop the other process."
fi
# create tmp directory
OPENREFINE_TMPDIR="$(mktemp -d)"
trap '{ rm -rf "$OPENREFINE_TMPDIR"; }' 0 2 3 15
# download the test files if needed
if ! [[ -f "tests/help.sh" ]]; then
cd "$OPENREFINE_TMPDIR"
if ! curl -fs -L -o orcli.zip https://github.com/opencultureconsulting/orcli/archive/refs/heads/main.zip; then
error "downloading test files failed!" "Please download the tests dir manually from GitHub."
fi
unzip -q -j orcli.zip "*/tests/*.sh" -d "tests/"
unzip -q -j orcli.zip "*/tests/data/*" -d "tests/data/"
fi
# start OpenRefine with tmp workspace
$openrefine -d "$OPENREFINE_TMPDIR" -x refine.headless=true -v warn &>"$OPENREFINE_TMPDIR/openrefine.log" &
OPENREFINE_PID="$!"
# update trap to kill OpenRefine on error or exit
trap '{ rm -rf "$OPENREFINE_TMPDIR"; rm -rf /tmp/jetty-127_0_0_1-3333*; kill -9 "$OPENREFINE_PID"; }' 0 2 3 15
# wait until OpenRefine is running (timeout 20s)
for i in {1..20}; do
sleep 1
if curl -fs "${OPENREFINE_URL}/command/core/get-version" &>/dev/null; then
log "started OpenRefine with tmp workspace ${OPENREFINE_TMPDIR}"
break
fi
if [[ $i == 20 ]]; then
error "starting OpenRefine server failed!"
fi
done
# execute tests in subshell
export OPENREFINE_TMPDIR OPENREFINE_URL OPENREFINE_PID
cd "tests"
files=(*.sh)
results=()
for i in "${!files[@]}"; do
set +e # do not exit on failed tests
bash -e <(
echo "shopt -s expand_aliases"
echo "alias orcli=${scriptpath}/orcli"
awk 1 "${files[$i]}"
) &>"$OPENREFINE_TMPDIR/test.log"
results+=(${?})
set -e
if [[ "${results[$i]}" =~ [1-9] ]]; then
cat "$OPENREFINE_TMPDIR/test.log"
log "FAILED ${files[$i]} with exit code ${results[$i]}!"
else
log "PASSED ${files[$i]}"
fi
done
# print overall result
if [[ "${results[*]}" =~ [1-9] ]]; then
error "failed tests!"
else
log "all tests passed!"
fi
}
# :command.function
orcli_transform_command() {
# src/transform_command.sh
# shellcheck shell=bash disable=SC2154 disable=SC2155
# check if stdin is present if selected
if [[ ${args[file]} == '-' ]] || [[ ${args[file]} == '"-"' ]]; then
if ! read -u 0 -t 0; then
orcli_transform_usage
exit 1
fi
fi
# catch args, convert the space delimited string to an array
files=()
eval "files=(${args[file]})"
# get project id
projectid="$(get_id "${args[project]}")"
# create tmp directory
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' 0 2 3 15
# download files if name starts with http:// or https://
for i in "${!files[@]}"; do
if [[ ${files[$i]} == "http://"* ]] || [[ ${files[$i]} == "https://"* ]]; then
if ! curl -fs --location "${files[$i]}" >"${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"; then
error "download of ${files[$i]} failed!"
fi
files[i]="${tmpdir}/${files[$i]//[^A-Za-z0-9._-]/_}"
fi
done
# check existence of files and stdin
for i in "${!files[@]}"; do
if [[ "${files[$i]}" == '-' ]] || [[ "${files[$i]}" == '"-"' ]]; then
# exit if stdin is selected but not present
if ! read -u 0 -t 0; then
orcli_transform_usage
exit 1
fi
else
# exit if file does not exist
if ! [[ -f "${files[$i]}" ]]; then
error "cannot open ${files[$i]} (no such file)!"
fi
fi
done
# support multiple files
for i in "${!files[@]}"; do
# read each operation into one line
if json="$(jq -c '.[]' "${files[$i]}")"; then
mapfile -t jsonlines <<<"$json"
else
error "parsing ${files[$i]} failed!"
fi
for line in "${jsonlines[@]}"; do
# parse one line/operation into array
filter='[to_entries[]|"["+(.key|@sh)+"]="+(.value|tostring|@sh)]|"("+join(" ")+")"'
declare -A array=$(jq --join-output "${filter}" <<< "$line")
if [[ ! ${array[op]} ]]; then
error "parsing ${files[$i]} failed!"
fi
# map operation names to command endpoints
# https://github.com/OpenRefine/OpenRefine/blob/master/main/webapp/modules/core/MOD-INF/controller.js
com="${array[op]#core/}"
if [[ $com == "multivalued-cell-join" ]]; then com="join-multi-value-cells"; fi
if [[ $com == "multivalued-cell-split" ]]; then com="split-multi-value-cells"; fi
if [[ $com == "column-addition" ]]; then com="add-column"; fi
if [[ $com == "column-addition-by-fetching-urls" ]]; then com="add-column-by-fetching-urls"; fi
if [[ $com == "column-removal" ]]; then com="remove-column"; fi
if [[ $com == "column-rename" ]]; then com="rename-column"; fi
if [[ $com == "column-move" ]]; then com="move-column"; fi
if [[ $com == "column-split" ]]; then com="split-column"; fi
if [[ $com == "column-reorder" ]]; then com="reorder-columns"; fi
if [[ $com == "recon" ]]; then com="reconcile"; fi
if [[ $com == "extend-reconciled-data" ]]; then com="extend-data"; fi
if [[ $com == "row-star" ]]; then com="annotate-rows"; fi
if [[ $com == "row-flag" ]]; then com="annotate-rows"; fi
if [[ $com == "row-removal" ]]; then com="remove-rows"; fi
if [[ $com == "row-reorder" ]]; then com="reorder-rows"; fi
unset "array[op]"
# rename engineConfig to engine
array[engine]="${array[engineConfig]}"
unset "array[engineConfig]"
# drop description
unset "array[description]"
# remove line breaks in expression
array[expression]="${array[expression]//$'\n'/}"
# prepare curl options
mapfile -t curloptions < <(for K in "${!array[@]}"; do
echo "--data-urlencode"
echo "$K=${array[$K]}"
done)
# get csrf token and post data to it's individual endpoint
if response="$(curl -fs --data "project=${projectid}" "${curloptions[@]}" "${OPENREFINE_URL}/command/core/${com}$(get_csrf)")"; then
response_code="$(jq -r '.code' <<<"$response")"
if [[ $response_code == "ok" ]]; then
log "transformed ${args[project]} with ${com}" "Response: $(jq -r '.historyEntry.description' <<<"$response")"
else
error "transforming ${args[project]} with ${com} from ${files[$i]} failed!" "Response: $(jq -r '.message' <<<"$response")"
fi
else
error "transforming ${args[project]} with ${com} from ${files[$i]} failed!"
fi
unset array
done
done
}
# :command.function
orcli_export_jsonl_command() {
# src/export_jsonl_command.sh
# shellcheck shell=bash disable=SC2154 disable=SC2155
projectid="$(get_id "${args[project]}")"
# get columns that contain multiple values
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
if [[ ${args[--separator]} ]]; then
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,cells[cn].value.contains(\"'
engine+="${args[--separator]}"
engine+='\"))","selection":[]}],"mode":"row-based"}'
fi
if [[ ${args[--mode]} == "records" ]]; then
engine='{"facets":[{"type":"list","columnName":"","expression":"grel:filter(row.columnNames,cn,row.record.cells[cn].value.length()>1)","selection":[]}],"mode":"row-based"}'
fi
readarray -t columns_mv < <(curl -fs --data project="$projectid" --data "engine=${engine}" "${OPENREFINE_URL}/command/core/compute-facets" | jq -r '.facets[].choices[].v.v')
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
readarray -t columns_mix < <(for i in "${columns[@]}"; do
skip=
for j in "${columns_mv[@]}"; do
if [[ "$i" == "$j" ]]; then
echo "\"$j⊌\"" # add special character that is used in template below
skip=1; break
fi
done
if [[ -z $skip ]]; then
echo "\"$i\""
fi
done)
multivalued=$(IFS=, ; echo "[${columns_mix[*]}]")
fi
# set template
template='{{'
if [[ ${args[--mode]} == "records" ]]; then
template+='if(row.index - row.record.fromRowIndex == 0,'
fi
template+='"%7B".unescape("url") + " " +'
template+='forEach('
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
template+="$multivalued"
else
template+='row.columnNames'
fi
template+=', cn, forNonBlank('
if [[ ${args[--separator]} || ${args[--mode]} == "records" ]]; then
template+='cells[cn.chomp("⊌")].value, v, if(cn.endsWith("⊌"), "\"" + cn.chomp("⊌") + "\": " +'
if [[ ${args[--separator]} ]]; then
template+="v.split(\"${args[--separator]}\").jsonize()"
fi
if [[ ${args[--mode]} == "records" ]]; then
template+='row.record.cells[cn.chomp("⊌")].value.jsonize()'
fi
template+=', "\"" + cn + "\": " + v.jsonize())'
else
template+='cells[cn].value, v, "\"" + cn + "\": " + v.jsonize()'
fi
template+=', null)'
template+=').join(", ")'
template+='+ " " + "%7D".unescape("url") + "\n"'
if [[ ${args[--mode]} == "records" ]]; then
template+=', "")'
fi
template+='}}'
# assemble specific post data
data+=("project=${projectid}")
data+=("format=template")
data+=("template=${template}")
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_export_csv_command() {
# src/export_csv_command.sh
# shellcheck shell=bash
projectid="$(get_id "${args[project]}")"
separator="${args[--separator]:-,}"
# assemble specific post data (some options require json format)
data+=("project=${projectid}")
data+=("format=csv")
options='{ '
options+="\"separator\": \"${separator}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--select]} ]]; then
options+=', '
options+='"columns": ['
IFS=',' read -ra columns <<< "${args[--select]}"
options+='{"name":"'
options+="${columns[0]}"
options+='"}'
for cn in "${columns[@]:1}"; do
options+=', '
options+='{"name":"'
options+="${cn}"
options+='"}'
done
options+="]"
fi
options+=' }'
data+=("options=${options}")
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_export_tsv_command() {
# src/export_tsv_command.sh
# shellcheck shell=bash
projectid="$(get_id "${args[project]}")"
separator='\t'
# assemble specific post data (some options require json format)
data+=("project=${projectid}")
data+=("format=tsv")
options='{ '
options+="\"separator\": \"${separator}\""
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--select]} ]]; then
options+=', '
options+='"columns": ['
IFS=',' read -ra columns <<< "${args[--select]}"
options+='{"name":"'
options+="${columns[0]}"
options+='"}'
for cn in "${columns[@]:1}"; do
options+=', '
options+='{"name":"'
options+="${cn}"
options+='"}'
done
options+="]"
fi
options+=' }'
data+=("options=${options}")
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_export_template_command() {
# src/export_template_command.sh
# shellcheck shell=bash disable=SC2154 disable=SC2155
# get project id
projectid="$(get_id "${args[project]}")"
# create tmp directory
tmpdir="$(mktemp -d)"
trap 'rm -rf "$tmpdir"' 0 2 3 15
# download file if name starts with http:// or https://
if [[ ${args[file]} == "http://"* ]] || [[ ${args[file]} == "https://"* ]]; then
if ! curl -fs --location "${args[file]}" >"${tmpdir}/${args[file]//[^A-Za-z0-9._-]/_}"; then
error "download of ${args[file]} failed!"
fi
args[file]="${tmpdir}/${args[file]//[^A-Za-z0-9._-]/_}"
fi
# check existence of file or stdin
if [[ "${args[file]}" == '-' ]] || [[ "${args[file]}" == '"-"' ]]; then
# exit if stdin is selected but not present
if ! read -u 0 -t 0; then
orcli_export_template_usage
exit 1
fi
else
# exit if file does not exist
if ! [[ -f "${args[file]}" ]]; then
error "cannot open ${args[file]} (no such file)!"
fi
fi
# read args[file] into variable to remove trailing newline
template=$(cat "${args[file]}")
# assemble specific post data
data+=("project=${projectid}")
data+=("format=template")
data+=("template=${template}")
if [[ ${args[--prefix]} ]]; then
data+=("prefix=${args[--prefix]}")
fi
if [[ ${args[--suffix]} ]]; then
data+=("suffix=${args[--suffix]}")
fi
if [[ ${args[--separator]} ]]; then
data+=("separator=${args[--separator]}")
fi
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_run_command() {
# src/run_command.sh
# shellcheck shell=bash disable=SC2154 source=/dev/null
# catch args, convert the space delimited string to an array
files=()
eval "files=(${args[file]})"
# check existence of files and stdin
for i in "${!files[@]}"; do
if [[ "${files[$i]}" == '-' ]] || [[ "${files[$i]}" == '"-"' ]]; then
# exit if stdin is selected but not present
if ! [[ ${args[--interactive]} ]]; then
if ! read -u 0 -t 0; then
orcli_run_usage
exit 1
fi
fi
else
# exit if file does not exist
if ! [[ -f "${files[$i]}" ]]; then
error "cannot open ${files[$i]} (no such file)!"
fi
fi
done
# assume that quiet flag shall suppress log output generally in batch mode
if [[ ${args[--quiet]} ]]; then
export ORCLI_QUIET=1
fi
# update OPENREFINE_URL env
OPENREFINE_URL="http://localhost:${args[--port]}"
# locate orcli and OpenRefine
scriptpath=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
if [[ -x "${scriptpath}/refine" ]]; then
openrefine="${scriptpath}/refine"
else
error "OpenRefine's startup script (refine) not found!" "Did you put orcli in your OpenRefine app dir?"
fi
# check if OpenRefine is already running
if curl -fs "${OPENREFINE_URL}" &>/dev/null; then
error "OpenRefine is already running on port ${args[--port]}." "Hint: Stop the other process or use another port."
fi
# create tmp directory
OPENREFINE_TMPDIR="$(mktemp -d)"
trap '{ rm -rf "$OPENREFINE_TMPDIR"; }' 0 2 3 15
# start OpenRefine with tmp workspace and autosave period 25 hours
REFINE_AUTOSAVE_PERIOD=1440 $openrefine -d "$OPENREFINE_TMPDIR" -m "${args[--memory]}" -p "${args[--port]}" -x refine.headless=true -v warn &>"$OPENREFINE_TMPDIR/openrefine.log" &
OPENREFINE_PID="$!"
# update trap to kill OpenRefine on error or exit
trap '{ rm -rf "$OPENREFINE_TMPDIR"; rm -rf /tmp/jetty-127_0_0_1-${OPENREFINE_URL##*:}*; kill -9 "$OPENREFINE_PID"; }' 0 2 3 15
# wait until OpenRefine is running (timeout 20s)
for i in {1..20}; do
sleep 1
if curl -fs "${OPENREFINE_URL}/command/core/get-version" &>/dev/null; then
log "started OpenRefine with tmp workspace ${OPENREFINE_TMPDIR}"
break
fi
if [[ $i == 20 ]]; then
error "starting OpenRefine server failed!"
fi
done
# execute script(s) in subshell
export OPENREFINE_TMPDIR OPENREFINE_URL OPENREFINE_PID
if [[ ${args[file]} == '-' || ${args[file]} == '"-"' ]]; then
if ! read -u 0 -t 0; then
# case 1: interactive mode if stdin is selected but not present
bash --rcfile <(
cat ~/.bashrc
echo "alias orcli=${scriptpath}/orcli"
interactive
) -i </dev/tty
exit
fi
fi
if [[ ${args[--interactive]} ]]; then
# case 2: execute scripts and keep shell running
bash --rcfile <(
cat ~/.bashrc
echo "alias orcli=${scriptpath}/orcli"
for i in "${!files[@]}"; do
log "executing script ${files[$i]}..."
awk 1 "${files[$i]}"
done
interactive
) -i </dev/tty
else
# case 3: just execute scripts
for i in "${!files[@]}"; do
log "executing script ${files[$i]}..."
bash -e <(
echo "shopt -s expand_aliases"
echo "alias orcli=${scriptpath}/orcli"
awk 1 "${files[$i]}"
)
done
# print stats
log "used $(($(ps --no-headers -o rss -p "$OPENREFINE_PID") / 1024)) MB RAM and $(ps --no-headers -o cputime -p "$OPENREFINE_PID") CPU time"
fi
}
# :command.parse_requirements
parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--version | -v)
version_command
exit
;;
--help | -h)
long_usage=yes
orcli_usage
exit
;;
*)
break
;;
esac
done
# :command.environment_variables_filter
# :command.environment_variables_default
export OPENREFINE_URL="${OPENREFINE_URL:-http://localhost:3333}"
# :command.dependencies_filter
if command -v curl >/dev/null 2>&1; then
deps['curl']="$(command -v curl | head -n1)"
else
printf "missing dependency: curl\n" >&2
printf "%s\n" "https://curl.se" >&2
exit 1
fi
if command -v jq >/dev/null 2>&1; then
deps['jq']="$(command -v jq | head -n1)"
else
printf "missing dependency: jq\n" >&2
printf "%s\n" "https://github.com/stedolan/jq" >&2
exit 1
fi
# :command.command_filter
action=${1:-}
case $action in
-*) ;;
completions)
action="completions"
shift
orcli_completions_parse_requirements "$@"
shift $#
;;
delete)
action="delete"
shift
orcli_delete_parse_requirements "$@"
shift $#
;;
import)
action="import"
shift
orcli_import_parse_requirements "$@"
shift $#
;;
list)
action="list"
shift
orcli_list_parse_requirements "$@"
shift $#
;;
info)
action="info"
shift
orcli_info_parse_requirements "$@"
shift $#
;;
search)
action="search"
shift
orcli_search_parse_requirements "$@"
shift $#
;;
sort)
action="sort"
shift
orcli_sort_parse_requirements "$@"
shift $#
;;
test)
action="test"
shift
orcli_test_parse_requirements "$@"
shift $#
;;
transform)
action="transform"
shift
orcli_transform_parse_requirements "$@"
shift $#
;;
export)
action="export"
shift
orcli_export_parse_requirements "$@"
shift $#
;;
run)
action="run"
shift
orcli_run_parse_requirements "$@"
shift $#
;;
# :command.command_fallback
"")
orcli_usage >&2
exit 1
;;
*)
printf "invalid command: %s\n" "$action" >&2
exit 1
;;
esac
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
printf "invalid argument: %s\n" "$key" >&2
exit 1
;;
esac
done
}
# :command.parse_requirements
orcli_completions_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_completions_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="completions"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
printf "invalid argument: %s\n" "$key" >&2
exit 1
;;
esac
done
}
# :command.parse_requirements
orcli_delete_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_delete_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="delete"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--force | -f)
# :flag.case_no_arg
args['--force']=1
shift
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
if [[ -z ${args['project']+x} ]]; then
args['project']=$1
shift
else
printf "invalid argument: %s\n" "$key" >&2
exit 1
fi
;;
esac
done
# :command.required_args_filter
if [[ -z ${args['project']+x} ]]; then
printf "missing required argument: PROJECT\nusage: orcli delete PROJECT [OPTIONS]\n" >&2
exit 1
fi
}
# :command.parse_requirements
orcli_import_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_import_usage
exit
;;