first draft

This commit is contained in:
Felix Lohmeier 2023-10-27 21:12:16 +00:00
parent b465fdf1fd
commit 216991d212
6 changed files with 228 additions and 2 deletions

View File

@ -11,6 +11,7 @@
- [info](info.md)
- [list](list.md)
- [run](run.md)
- [search](search.md)
- [test](test.md)
- [transform](transform.md)
@ -30,6 +31,7 @@ Commands:
import commands to create OpenRefine projects from files or URLs
list list projects on OpenRefine server
info show OpenRefine project's metadata
search apply regex to each column and print matches in flattened tsv format
test run functional tests on tmp OpenRefine workspace
transform apply undo/redo JSON file(s) to an OpenRefine project
export commands to export data from OpenRefine projects to files
@ -52,6 +54,7 @@ Examples:
orcli list
orcli info "duplicates"
orcli transform "duplicates" "https://git.io/fj5ju"
orcli search "duplicates" "^Ben"
orcli export tsv "duplicates"
orcli export tsv "duplicates" --output "duplicates.tsv"
orcli delete "duplicates"

28
help/search.md Normal file
View File

@ -0,0 +1,28 @@
# orcli search
```
orcli search - apply regex to each column and print matches in flattened tsv format
Usage:
orcli search PROJECT [REGEX]
orcli search --help | -h
Options:
--help, -h
Show this help
Arguments:
PROJECT
project name or id
REGEX
search
Examples:
orcli search "duplicates" "^Ben"
orcli search 1234567890123 "^Ben"
orcli search "duplicates" "^Ben" | column -t -s $' '
```
code: [src/search_command.sh](../src/search_command.sh)

155
orcli
View File

@ -39,6 +39,7 @@ orcli_usage() {
printf " %s commands to create OpenRefine projects from files or URLs\n" "import "
printf " %s list projects on OpenRefine server\n" "list "
printf " %s show OpenRefine project's metadata\n" "info "
printf " %s apply regex to each column and print matches in flattened tsv format\n" "search "
printf " %s run functional tests on tmp OpenRefine workspace\n" "test "
printf " %s apply undo/redo JSON file(s) to an OpenRefine project\n" "transform "
printf " %s commands to export data from OpenRefine projects to files\n" "export "
@ -72,6 +73,7 @@ orcli_usage() {
printf " orcli list\n"
printf " orcli info \"duplicates\"\n"
printf " orcli transform \"duplicates\" \"https://git.io/fj5ju\"\n"
printf " orcli search \"duplicates\" \"^Ben\"\n"
printf " orcli export tsv \"duplicates\"\n"
printf " orcli export tsv \"duplicates\" --output \"duplicates.tsv\"\n"
printf " orcli delete \"duplicates\"\n"
@ -563,6 +565,55 @@ orcli_info_usage() {
fi
}
# :command.usage
orcli_search_usage() {
if [[ -n $long_usage ]]; then
printf "orcli search - apply regex to each column and print matches in flattened tsv format\n"
echo
else
printf "orcli search - apply regex to each column and print matches in flattened tsv format\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli search PROJECT [REGEX]\n"
printf " orcli search --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "PROJECT"
printf " project name or id\n"
echo
# :argument.usage
printf " %s\n" "REGEX"
printf " search\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli search \"duplicates\" \"^Ben\"\n"
printf " orcli search 1234567890123 \"^Ben\"\n"
printf " orcli search \"duplicates\" \"^Ben\" | column -t -s \$'\t'\n"
echo
fi
}
# :command.usage
orcli_test_usage() {
if [[ -n $long_usage ]]; then
@ -1242,6 +1293,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'search\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
echo $' ;;'
@ -1263,7 +1318,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' *)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run test transform")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run search test transform")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' esac'
@ -1531,6 +1586,36 @@ orcli_info_command() {
}
# :command.function
orcli_search_command() {
# src/search_command.sh
# shellcheck shell=bash disable=SC2154
# get project id
projectid="$(get_id "${args[project]}")"
# set facets config
args[facets]="[ { \"type\": \"list\", \"expression\": \"grel:filter(row.columnNames,cn,cells[cn].value.find(/${args[--regex]}/).length()>0).length()>0\", \"columnName\": \"\", \"selection\": [ { \"v\": { \"v\": true } } ] } ]"
# set template
template='{{'
template+='forEach(filter(row.columnNames, cn, cells[cn].value.find(/'
template+="${args[regex]}"
template+='/).length()>0), cn, '
template+='(row.record.fromRowIndex + 1) + "\t" + cn + "\t" + '
template+='forNonBlank(cells[cn].value, v, if(v.contains(" "), if(v.contains('\''"'\''), '\''"'\'' + v.replace('\''"'\'','\''""'\'') + '\''"'\'', '\''"'\'' + v + '\''"'\''), v),"")'
template+='+ "\n")'
template+='}}'
# assemble specific post data
data+=("project=${projectid}")
data+=("format=template")
data+=("template=${template}")
# call post_export function to post data and validate results
post_export "${data[@]}"
}
# :command.function
orcli_test_command() {
# src/test_command.sh
@ -1999,6 +2084,13 @@ parse_requirements() {
shift $#
;;
search)
action="search"
shift
orcli_search_parse_requirements "$@"
shift $#
;;
test)
action="test"
shift
@ -2900,6 +2992,66 @@ orcli_info_parse_requirements() {
}
# :command.parse_requirements
orcli_search_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_search_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="search"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_simple
if [[ -z ${args['project']+x} ]]; then
args['project']=$1
shift
elif [[ -z ${args['regex']+x} ]]; then
args['regex']=$1
shift
else
printf "invalid argument: %s\n" "$key" >&2
exit 1
fi
;;
esac
done
# :command.required_args_filter
if [[ -z ${args['project']+x} ]]; then
printf "missing required argument: PROJECT\nusage: orcli search PROJECT [REGEX]\n" >&2
exit 1
fi
}
# :command.parse_requirements
orcli_test_parse_requirements() {
# :command.fixed_flags_filter
@ -3514,6 +3666,7 @@ run() {
"import tsv") orcli_import_tsv_command ;;
"list") orcli_list_command ;;
"info") orcli_info_command ;;
"search") orcli_search_command ;;
"test") orcli_test_command ;;
"transform") orcli_transform_command ;;
"export") orcli_export_command ;;

View File

@ -17,6 +17,7 @@ examples:
- orcli list
- orcli info "duplicates"
- orcli transform "duplicates" "https://git.io/fj5ju"
- orcli search "duplicates" "^Ben"
- orcli export tsv "duplicates"
- orcli export tsv "duplicates" --output "duplicates.tsv"
- orcli delete "duplicates"
@ -206,6 +207,18 @@ commands:
- orcli info 1234567890123
- orcli info "duplicates" | jq -r .columns[]
- name: search
help: apply regex to each column and print matches in flattened tsv format
args:
- *project
- &regex
name: regex
help: search
examples:
- orcli search "duplicates" "^Ben"
- orcli search 1234567890123 "^Ben"
- orcli search "duplicates" "^Ben" | column -t -s \$'\t'
- name: test
help: run functional tests on tmp OpenRefine workspace

View File

@ -66,6 +66,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'search\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h template tsv")" -- "$cur" )'
echo $' ;;'
@ -87,7 +91,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' *)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run test transform")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help --version -h -v completions delete export import info list run search test transform")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' esac'

25
src/search_command.sh Normal file
View File

@ -0,0 +1,25 @@
# shellcheck shell=bash disable=SC2154
# get project id
projectid="$(get_id "${args[project]}")"
# set facets config
args[facets]="[ { \"type\": \"list\", \"expression\": \"grel:filter(row.columnNames,cn,cells[cn].value.find(/${args[--regex]}/).length()>0).length()>0\", \"columnName\": \"\", \"selection\": [ { \"v\": { \"v\": true } } ] } ]"
# set template
template='{{'
template+='forEach(filter(row.columnNames, cn, cells[cn].value.find(/'
template+="${args[regex]}"
template+='/).length()>0), cn, '
template+='(row.record.fromRowIndex + 1) + "\t" + cn + "\t" + '
template+='forNonBlank(cells[cn].value, v, if(v.contains(" "), if(v.contains('\''"'\''), '\''"'\'' + v.replace('\''"'\'','\''""'\'') + '\''"'\'', '\''"'\'' + v + '\''"'\''), v),"")'
template+='+ "\n")'
template+='}}'
# assemble specific post data
data+=("project=${projectid}")
data+=("format=template")
data+=("template=${template}")
# call post_export function to post data and validate results
post_export "${data[@]}"