import txt first draft

This commit is contained in:
felixlohmeier 2023-05-14 10:39:36 +00:00
parent 6c47e108f7
commit 1d093f5937
4 changed files with 558 additions and 7 deletions

449
orcli
View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# This script was generated by bashly 1.0.3 (https://bashly.dannyb.co)
# This script was generated by bashly 1.0.4 (https://bashly.dannyb.co)
# Modifying it manually is not recommended
# :wrapper.bash3_bouncer
@ -194,6 +194,7 @@ orcli_import_usage() {
printf "%s\n" "Commands:"
printf " %s import character-separated values (CSV)\n" "csv"
printf " %s import tab-separated values (TSV)\n" "tsv"
printf " %s import text files (txt)\n" "txt"
echo
# :command.long_usage
@ -346,7 +347,7 @@ orcli_import_csv_usage() {
printf " orcli import csv \"file1\" \"file2\"\n"
printf " head -n 100 \"file\" | orcli import csv\n"
printf " orcli import csv \"https://git.io/fj5hF\"\n"
printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n --projectTags \"test,urgent\"\n"
printf " orcli import csv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
@ -484,7 +485,130 @@ orcli_import_tsv_usage() {
printf " orcli import tsv \"file1\" \"file2\"\n"
printf " head -n 100 \"file\" | orcli import tsv\n"
printf " orcli import tsv \"https://example.com/file.tsv\"\n"
printf " orcli import tsv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\"\n --projectTags \"test,urgent\"\n"
printf " orcli import tsv \"file\" \\\\\n --separator \";\" \\\\\n --columnNames \"foo,bar,baz\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
}
# :command.usage
orcli_import_txt_usage() {
if [[ -n $long_usage ]]; then
printf "orcli import txt - import text files (txt)\n"
echo
else
printf "orcli import txt - import text files (txt)\n"
echo
fi
printf "%s\n" "Usage:"
printf " orcli import txt [FILE...] [OPTIONS]\n"
printf " orcli import txt --help | -h\n"
echo
# :command.long_usage
if [[ -n $long_usage ]]; then
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--blankCellsAsStrings"
printf " store blank cells as empty strings instead of nulls\n"
echo
# :flag.usage
printf " %s\n" "--columnWidths COLUMNWIDTHS"
printf " set columnWidths (numbers separated by comma)\n"
printf " Default: \n"
echo
# :flag.usage
printf " %s\n" "--encoding ENCODING"
printf " set character encoding\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
echo
# :flag.usage
printf " %s\n" "--headerLines HEADERLINES"
printf " parse x line(s) as column headers\n"
printf " Default: 1\n"
echo
# :flag.usage
printf " %s\n" "--ignoreLines IGNORELINES"
printf " ignore first x line(s) at beginning of file\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--includeFileSources"
printf " add column with file source\n"
echo
# :flag.usage
printf " %s\n" "--includeArchiveFileName"
printf " add column with archive file name\n"
echo
# :flag.usage
printf " %s\n" "--limit LIMIT"
printf " load at most x row(s) of data\n"
printf " Default: -1\n"
echo
# :flag.usage
printf " %s\n" "--projectName PROJECTNAME"
printf " set a name for the OpenRefine project\n"
echo
# :flag.usage
printf " %s\n" "--projectTags PROJECTTAGS"
printf " set project tags (comma separated)\n"
echo
# :flag.usage
printf " %s\n" "--quiet, -q"
printf " suppress log output, print errors only\n"
echo
# :flag.usage
printf " %s\n" "--skipBlankRows"
printf " do not store blank rows\n"
echo
# :flag.usage
printf " %s\n" "--skipDataLines SKIPDATALINES"
printf " discard initial x row(s) of data\n"
printf " Default: 0\n"
echo
# :command.usage_fixed_flags
printf " %s\n" "--help, -h"
printf " Show this help\n"
echo
# :command.usage_args
printf "%s\n" "Arguments:"
# :argument.usage
printf " %s\n" "FILE..."
printf " Path to one or more files or URLs. When FILE is -, read standard input.\n"
printf " Default: -\n"
echo
# :command.usage_examples
printf "%s\n" "Examples:"
printf " orcli import txt \"file\"\n"
printf " orcli import txt \"file1\" \"file2\"\n"
printf " head -n 100 \"file\" | orcli import txt\n"
printf " orcli import txt \"https://example.com/file.txt\"\n"
printf " orcli import txt \"file\" \\\\\n --columnWidths \"7,5\" \\\\\n --ignoreLines 1 \\\\\n --encoding \"ISO-8859-1\" \\\\\n --limit 100 \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
@ -1220,6 +1344,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import txt\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnWidths --encoding --guessCellValueTypes --headerLines --help --ignoreLines --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --skipBlankRows --skipDataLines -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
echo $' ;;'
@ -1233,7 +1361,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv txt")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export\'*)'
@ -1491,6 +1619,81 @@ orcli_import_tsv_command() {
}
# :command.function
orcli_import_txt_command() {
# src/import_txt_command.sh
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_txt_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/line-based/fixed-width")
options='{ '
options+="\"columnWidths\": [ ${args[--columnWidths]} ]"
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"
}
# :command.function
orcli_list_command() {
# src/list_command.sh
@ -2215,6 +2418,13 @@ orcli_import_parse_requirements() {
shift $#
;;
txt)
action="txt"
shift
orcli_import_txt_parse_requirements "$@"
shift $#
;;
# :command.command_fallback
"")
orcli_import_usage >&2
@ -2799,6 +3009,236 @@ orcli_import_tsv_parse_requirements() {
}
# :command.parse_requirements
orcli_import_txt_parse_requirements() {
# :command.fixed_flags_filter
while [[ $# -gt 0 ]]; do
case "${1:-}" in
--help | -h)
long_usage=yes
orcli_import_txt_usage
exit
;;
*)
break
;;
esac
done
# :command.command_filter
action="import txt"
# :command.parse_requirements_while
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--blankCellsAsStrings)
# :flag.case_no_arg
args['--blankCellsAsStrings']=1
shift
;;
# :flag.case
--columnWidths)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--columnWidths']="$2"
shift
shift
else
printf "%s\n" "--columnWidths requires an argument: --columnWidths COLUMNWIDTHS" >&2
exit 1
fi
;;
# :flag.case
--encoding)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--encoding']="$2"
shift
shift
else
printf "%s\n" "--encoding requires an argument: --encoding ENCODING" >&2
exit 1
fi
;;
# :flag.case
--guessCellValueTypes)
# :flag.case_no_arg
args['--guessCellValueTypes']=1
shift
;;
# :flag.case
--headerLines)
# :flag.conflicts
if [[ -n "${args['--columnNames']:-}" ]]; then
printf "conflicting options: %s cannot be used with %s\n" "$key" "--columnNames" >&2
exit 1
fi
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--headerLines']="$2"
shift
shift
else
printf "%s\n" "--headerLines requires an argument: --headerLines HEADERLINES" >&2
exit 1
fi
;;
# :flag.case
--ignoreLines)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--ignoreLines']="$2"
shift
shift
else
printf "%s\n" "--ignoreLines requires an argument: --ignoreLines IGNORELINES" >&2
exit 1
fi
;;
# :flag.case
--includeFileSources)
# :flag.case_no_arg
args['--includeFileSources']=1
shift
;;
# :flag.case
--includeArchiveFileName)
# :flag.case_no_arg
args['--includeArchiveFileName']=1
shift
;;
# :flag.case
--limit)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--limit']="$2"
shift
shift
else
printf "%s\n" "--limit requires an argument: --limit LIMIT" >&2
exit 1
fi
;;
# :flag.case
--projectName)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectName']="$2"
shift
shift
else
printf "%s\n" "--projectName requires an argument: --projectName PROJECTNAME" >&2
exit 1
fi
;;
# :flag.case
--projectTags)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--projectTags']="$2"
shift
shift
else
printf "%s\n" "--projectTags requires an argument: --projectTags PROJECTTAGS" >&2
exit 1
fi
;;
# :flag.case
--quiet | -q)
# :flag.case_no_arg
args['--quiet']=1
shift
;;
# :flag.case
--skipBlankRows)
# :flag.case_no_arg
args['--skipBlankRows']=1
shift
;;
# :flag.case
--skipDataLines)
# :flag.case_arg
if [[ -n ${2+x} ]]; then
args['--skipDataLines']="$2"
shift
shift
else
printf "%s\n" "--skipDataLines requires an argument: --skipDataLines SKIPDATALINES" >&2
exit 1
fi
;;
-?*)
printf "invalid option: %s\n" "$key" >&2
exit 1
;;
*)
# :command.parse_requirements_case
# :command.parse_requirements_case_repeatable
if [[ -z ${args['file']+x} ]]; then
args['file']="\"$1\""
shift
else
args['file']="${args[file]} \"$1\""
shift
fi
;;
esac
done
# :command.default_assignments
[[ -n ${args['file']:-} ]] || args['file']="-"
[[ -n ${args['--columnWidths']:-} ]] || args['--columnWidths']=""
[[ -n ${args['--headerLines']:-} ]] || args['--headerLines']="1"
[[ -n ${args['--ignoreLines']:-} ]] || args['--ignoreLines']="-1"
[[ -n ${args['--limit']:-} ]] || args['--limit']="-1"
[[ -n ${args['--skipDataLines']:-} ]] || args['--skipDataLines']="0"
}
# :command.parse_requirements
orcli_list_parse_requirements() {
# :command.fixed_flags_filter
@ -3511,6 +3951,7 @@ run() {
"import") orcli_import_command ;;
"import csv") orcli_import_csv_command ;;
"import tsv") orcli_import_tsv_command ;;
"import txt") orcli_import_txt_command ;;
"list") orcli_list_command ;;
"info") orcli_info_command ;;
"test") orcli_test_command ;;

View File

@ -153,7 +153,7 @@ commands:
--encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates"
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: tsv
@ -191,7 +191,44 @@ commands:
--encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--trimStrings \\\\
--projectName "duplicates"
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: txt
help: import text files (txt)
args:
- *file
flags:
- *blankCellsAsStrings
- &columnWidths
long: --columnWidths
help: set columnWidths (numbers separated by comma)
arg: columnWidths
default: ""
- *encoding_import
- *guessCellValueTypes
- *headerLines
- *ignoreLines
- *includeFileSources
- *includeArchiveFileName
- *limit
- *projectName
- *projectTags
- *quiet
- *skipBlankRows
- *skipDataLines
examples:
- orcli import txt "file"
- orcli import txt "file1" "file2"
- head -n 100 "file" | orcli import txt
- orcli import txt "https://example.com/file.txt"
- |-
orcli import txt "file" \\\\
--columnWidths "7,5" \\\\
--ignoreLines 1 \\\\
--encoding "ISO-8859-1" \\\\
--limit 100 \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
- name: list

69
src/import_txt_command.sh Normal file
View File

@ -0,0 +1,69 @@
# shellcheck shell=bash disable=SC2154
# call init_import function to eval args and to set basic post data
init_import
# check if stdin is present if selected
if [[ ${file} == '-' ]] && ! read -u 0 -t 0; then
orcli_import_txt_usage
exit 1
fi
# assemble specific post data (some options require json format)
data+=("format=text/line-based/fixed-width")
options='{ '
options+="\"columnWidths\": [ ${args[--columnWidths]} ]"
if [[ ${args[--blankCellsAsStrings]} ]]; then
options+=', '
options+='"storeBlankCellsAsNulls": false'
fi
if [[ ${args[--encoding]} ]]; then
options+=', '
options+="\"encoding\": \"${args[--encoding]}\""
fi
if [[ ${args[--guessCellValueTypes]} ]]; then
options+=', '
options+='"guessCellValueTypes": true'
fi
if [[ ${args[--headerLines]} ]]; then
options+=', '
options+="\"headerLines\": ${args[--headerLines]}"
fi
if [[ ${args[--ignoreLines]} ]]; then
options+=', '
options+="\"ignoreLines\": ${args[--ignoreLines]}"
fi
if [[ ${args[--includeFileSources]} ]]; then
options+=', '
options+='includeFileSources: true'
fi
if [[ ${args[--includeArchiveFileName]} ]]; then
options+=', '
options+='"includeArchiveFileName": true'
fi
if [[ ${args[--limit]} ]]; then
options+=', '
options+="\"limit\": ${args[--limit]}"
fi
if [[ ${args[--projectName]} ]]; then
options+=', '
options+="\"projectName\": \"${args[--projectName]}\""
fi
if [[ ${args[--skipBlankRows]} ]]; then
options+=', '
options+='"storeBlankRows": false'
fi
if [[ ${args[--skipDataLines]} ]]; then
options+=', '
options+="\"skipDataLines\": ${args[--skipDataLines]}"
fi
if [[ ${args[--projectTags]} ]]; then
IFS=',' read -ra projectTags <<< "${args[--projectTags]}"
options+=', '
options+="\"projectTags\": [ $(printf ',"'%s'"' "${projectTags[@]}" | cut -c2-) ]"
fi
options+=' }'
data+=("options=${options}")
# call post_import function to post data and validate results
post_import "${data[@]}"

View File

@ -50,6 +50,10 @@ send_completions() {
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnNames --encoding --guessCellValueTypes --headerLines --help --ignoreLines --ignoreQuoteCharacter --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --quoteCharacter --skipBlankRows --skipDataLines --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import txt\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--blankCellsAsStrings --columnWidths --encoding --guessCellValueTypes --headerLines --help --ignoreLines --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --skipBlankRows --skipDataLines -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export tsv\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--encoding --facets --help --output --quiet -h -q")" -- "$cur" )'
echo $' ;;'
@ -63,7 +67,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--help -h csv tsv txt")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'export\'*)'