import json/jsonl: --rename

This commit is contained in:
Felix Lohmeier 2023-10-28 21:37:02 +00:00
parent 090cdbd1d3
commit 4653af8e18
7 changed files with 93 additions and 12 deletions

View File

@ -12,6 +12,9 @@ Options:
specify record path elements in JSON array
Default: [ "_" , "_" ]
--rename
rename columns after import to remove record path fragments
--guessCellValueTypes
attempt to parse cell text into numbers
@ -54,7 +57,8 @@ Examples:
orcli import json "https://example.com/file.json"
orcli import json "file" \
--recordPath '[ "_", "rows", "_" ]' \
--limit 100 \
--rename \
--storeEmptyStrings \
--trimStrings \
--projectName "duplicates" \
--projectTags "test,urgent"

View File

@ -8,6 +8,9 @@ Usage:
orcli import jsonl --help | -h
Options:
--rename
rename columns after import to remove record path fragments
--guessCellValueTypes
attempt to parse cell text into numbers
@ -49,8 +52,8 @@ Examples:
orcli import jsonl "file1" "file2"
orcli import jsonl "https://example.com/file.json"
orcli import jsonl "file" \
--recordPath '[ "_" ]' \
--limit 100 \
--rename \
--storeEmptyStrings \
--trimStrings \
--projectName "duplicates" \
--projectTags "test,urgent"

45
orcli
View File

@ -522,6 +522,11 @@ orcli_import_json_usage() {
printf " Default: [ \"_\" , \"_\" ]\n"
echo
# :flag.usage
printf " %s\n" "--rename"
printf " rename columns after import to remove record path fragments\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
@ -587,7 +592,7 @@ orcli_import_json_usage() {
printf " orcli import json \"file\"\n"
printf " orcli import json \"file1\" \"file2\"\n"
printf " orcli import json \"https://example.com/file.json\"\n"
printf " orcli import json \"file\" \\\\\n --recordPath '[ \"_\", \"rows\", \"_\" ]' \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
printf " orcli import json \"file\" \\\\\n --recordPath '[ \"_\", \"rows\", \"_\" ]' \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
@ -615,6 +620,11 @@ orcli_import_jsonl_usage() {
printf "%s\n" "Options:"
# :command.usage_flags
# :flag.usage
printf " %s\n" "--rename"
printf " rename columns after import to remove record path fragments\n"
echo
# :flag.usage
printf " %s\n" "--guessCellValueTypes"
printf " attempt to parse cell text into numbers\n"
@ -680,7 +690,7 @@ orcli_import_jsonl_usage() {
printf " orcli import jsonl \"file\"\n"
printf " orcli import jsonl \"file1\" \"file2\"\n"
printf " orcli import jsonl \"https://example.com/file.json\"\n"
printf " orcli import jsonl \"file\" \\\\\n --recordPath '[ \"_\" ]' \\\\\n --limit 100 \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
printf " orcli import jsonl \"file\" \\\\\n --rename \\\\\n --storeEmptyStrings \\\\\n --trimStrings \\\\\n --projectName \"duplicates\" \\\\\n --projectTags \"test,urgent\"\n"
echo
fi
@ -1430,6 +1440,17 @@ function post_import() {
else
log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}"
fi
# json / jsonl --rename
if [[ ${args[--rename]} ]]; then
csrf="$(get_csrf)"
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
for c in "${columns[@]}"; do
if ! curl -fs -o /dev/null --data project="$projectid" --data "oldColumnName=${c}" --data "newColumnName=${c##_ - }" "${OPENREFINE_URL}/command/core/rename-column${csrf}"; then
error "renaming columns in ${projectname} failed!"
fi
done
log "renamed columns in ${projectname}"
fi
}
# src/lib/send_completions.sh
@ -1473,7 +1494,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'completions\'*)'
@ -1481,7 +1502,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import json\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import csv\'*)'
@ -3289,6 +3310,14 @@ orcli_import_json_parse_requirements() {
fi
;;
# :flag.case
--rename)
# :flag.case_no_arg
args['--rename']=1
shift
;;
# :flag.case
--guessCellValueTypes)
@ -3436,6 +3465,14 @@ orcli_import_jsonl_parse_requirements() {
while [[ $# -gt 0 ]]; do
key="$1"
case "$key" in
# :flag.case
--rename)
# :flag.case_no_arg
args['--rename']=1
shift
;;
# :flag.case
--guessCellValueTypes)

View File

@ -205,6 +205,9 @@ commands:
help: specify record path elements in JSON array
arg: json
default: "[ \\\"_\\\" , \\\"_\\\" ]"
- &rename
long: --rename
help: rename columns after import to remove record path fragments
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
@ -223,7 +226,8 @@ commands:
- |-
orcli import json "file" \\\\
--recordPath '[ "_", "rows", "_" ]' \\\\
--limit 100 \\\\
--rename \\\\
--storeEmptyStrings \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"
@ -233,6 +237,7 @@ commands:
args:
- *file
flags:
- *rename
- *guessCellValueTypes
- *includeFileSources
- *includeArchiveFileName
@ -248,8 +253,8 @@ commands:
- orcli import jsonl "https://example.com/file.json"
- |-
orcli import jsonl "file" \\\\
--recordPath '[ "_" ]' \\\\
--limit 100 \\\\
--rename \\\\
--storeEmptyStrings \\\\
--trimStrings \\\\
--projectName "duplicates" \\\\
--projectTags "test,urgent"

View File

@ -42,4 +42,15 @@ function post_import() {
else
log "imported ${args[file]}" "${redirect_url}" "name: ${projectname}" "rows: ${rows}"
fi
# json / jsonl --rename
if [[ ${args[--rename]} ]]; then
csrf="$(get_csrf)"
readarray -t columns < <(curl -fs --get --data project="$projectid" "${OPENREFINE_URL}/command/core/get-columns-info" | jq -r '.[].name')
for c in "${columns[@]}"; do
if ! curl -fs -o /dev/null --data project="$projectid" --data "oldColumnName=${c}" --data "newColumnName=${c##_ - }" "${OPENREFINE_URL}/command/core/rename-column${csrf}"; then
error "renaming columns in ${projectname} failed!"
fi
done
log "renamed columns in ${projectname}"
fi
}

View File

@ -39,7 +39,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import jsonl\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'completions\'*)'
@ -47,7 +47,7 @@ send_completions() {
echo $' ;;'
echo $''
echo $' \'import json\'*)'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' while read -r; do COMPREPLY+=( "$REPLY" ); done < <( compgen -W "$(_orcli_completions_filter "--guessCellValueTypes --help --includeArchiveFileName --includeFileSources --limit --projectName --projectTags --quiet --recordPath --rename --storeEmptyStrings --trimStrings -h -q")" -- "$cur" )'
echo $' ;;'
echo $''
echo $' \'import csv\'*)'

View File

@ -0,0 +1,21 @@
#!/bin/bash
t="import-json-rename"
# create tmp directory
tmpdir="$(mktemp -d)"
trap '{ rm -rf "${tmpdir}"; }' 0 2 3 15
# input
cp data/example.json "${tmpdir}/${t}.json"
# assertion
cp data/example.tsv "${tmpdir}/${t}.assert"
# action
cd "${tmpdir}" || exit 1
orcli import json "${t}.json" --projectName "${t}" --rename
orcli export tsv "${t}" > "${t}.output"
# test
diff -u "${t}.assert" "${t}.output"