split up code
This commit is contained in:
parent
634d61d7a6
commit
09986428d1
|
@ -0,0 +1,28 @@
|
||||||
|
## How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
|
||||||
|
|
||||||
|
tested on Fedora 32 with bash 5.0.17 and curl 7.69.1
|
||||||
|
|
||||||
|
### Quick start
|
||||||
|
|
||||||
|
1. Clone this gist
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://gist.github.com/d76bd27fbc4b8ab6d683822cdf61f81d.git bash-refine
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Execute all supplied examples for a quick demo
|
||||||
|
|
||||||
|
```
|
||||||
|
cd bash-refine
|
||||||
|
./templates.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Build your own workflow
|
||||||
|
|
||||||
|
3. Copy minimal pre-structured script to a new file
|
||||||
|
|
||||||
|
```
|
||||||
|
cp minimal.sh myworkflow.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Use the templates in `templates.sh` to develop your workflow
|
563
bash-refine.sh
563
bash-refine.sh
|
@ -1,26 +1,17 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# bash-refine.sh, Felix Lohmeier, v1.0.0, 2020-07-09
|
# bash-refine v1.1.0: bash-refine.sh, Felix Lohmeier, 2020-07-10
|
||||||
# How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
|
|
||||||
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
||||||
# tested on Fedora 32 with OpenRefine 3.3, bash 5.0.17, curl 7.69.1 and jq 1.4
|
|
||||||
# license: MIT License https://choosealicense.com/licenses/mit/
|
# license: MIT License https://choosealicense.com/licenses/mit/
|
||||||
|
|
||||||
# TODO: support for macOS
|
# TODO: support for macOS
|
||||||
# TODO: example for setting metadata
|
|
||||||
# TODO: example for engine config (facets)
|
|
||||||
|
|
||||||
# make script executable from another directory
|
|
||||||
cd "$(dirname "${0}")" || exit 1
|
|
||||||
|
|
||||||
# ================================== CONFIG ================================== #
|
# ================================== CONFIG ================================== #
|
||||||
|
|
||||||
port="3333"
|
endpoint="http://localhost:3333"
|
||||||
endpoint="http://localhost:${port}"
|
|
||||||
memory="1400M" # increase to available RAM
|
memory="1400M" # increase to available RAM
|
||||||
date="$(date +%Y%m%d_%H%M%S)"
|
date="$(date +%Y%m%d_%H%M%S)"
|
||||||
workspace="output/${date}"
|
workspace="output/${date}"
|
||||||
logfile="${workspace}/${date}.log"
|
logfile="${workspace}/${date}.log"
|
||||||
|
|
||||||
csrf=true # set to false for OpenRefine < 3.3
|
csrf=true # set to false for OpenRefine < 3.3
|
||||||
jq="jq" # path to executable
|
jq="jq" # path to executable
|
||||||
openrefine="openrefine/refine" # path to executable
|
openrefine="openrefine/refine" # path to executable
|
||||||
|
@ -78,7 +69,7 @@ function refine_start() {
|
||||||
echo "start OpenRefine server..."
|
echo "start OpenRefine server..."
|
||||||
local dir
|
local dir
|
||||||
dir="$(readlink -f "${workspace}")"
|
dir="$(readlink -f "${workspace}")"
|
||||||
${openrefine} -v warn -m "${memory}" -p "${port}" -d "${dir}" &
|
${openrefine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
|
||||||
pid_server=${!}
|
pid_server=${!}
|
||||||
timeout 30s bash -c "until curl -s \"${endpoint}\" \
|
timeout 30s bash -c "until curl -s \"${endpoint}\" \
|
||||||
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|
||||||
|
@ -207,7 +198,7 @@ function checkpoint_stats {
|
||||||
# calculate and print run time for each step
|
# calculate and print run time for each step
|
||||||
for i in "${!keys[@]}"; do
|
for i in "${!keys[@]}"; do
|
||||||
diffsec=$(( values[$((i + 1))] - values[i] ))
|
diffsec=$(( values[$((i + 1))] - values[i] ))
|
||||||
printf "%36s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
|
printf "%35s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
|
||||||
"$(date -d @"${values[$i]}")" \
|
"$(date -d @"${values[$i]}")" \
|
||||||
"($(date -d @${diffsec} -u +%H:%M:%S))"
|
"($(date -d @${diffsec} -u +%H:%M:%S))"
|
||||||
done
|
done
|
||||||
|
@ -223,552 +214,10 @@ function count_output {
|
||||||
}
|
}
|
||||||
|
|
||||||
function init() {
|
function init() {
|
||||||
|
# check requirements and download software if necessary
|
||||||
|
requirements
|
||||||
# set trap, create directories and tee to log file
|
# set trap, create directories and tee to log file
|
||||||
trap 'error "script interrupted!"' HUP INT QUIT TERM
|
trap 'error "script interrupted!"' HUP INT QUIT TERM
|
||||||
mkdir -p "${workspace}"
|
mkdir -p "${workspace}"
|
||||||
exec &> >(tee -a "${logfile}")
|
exec &> >(tee -a "${logfile}")
|
||||||
}
|
}
|
||||||
|
|
||||||
# ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
|
|
||||||
|
|
||||||
# To increase readability, you may prefer to split up the code:
|
|
||||||
# - move all code below to a separate script (e.g. one for each workflow)
|
|
||||||
# - add the following lines at the beginning of the new file(s)
|
|
||||||
# #!/bin/bash
|
|
||||||
# . bash-refine.sh
|
|
||||||
|
|
||||||
# ================================= STARTUP ================================== #
|
|
||||||
|
|
||||||
checkpoint "Startup"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# check requirements and download software if necessary
|
|
||||||
requirements
|
|
||||||
|
|
||||||
# override default config?
|
|
||||||
#port="3333"
|
|
||||||
#endpoint="http://localhost:${port}"
|
|
||||||
#memory="1400M"
|
|
||||||
#date="$(date +%Y%m%d_%H%M%S)"
|
|
||||||
#workspace="output/${date}"
|
|
||||||
#logfile="${workspace}/${date}.log"
|
|
||||||
|
|
||||||
# set trap, create directories and tee to log file
|
|
||||||
init
|
|
||||||
|
|
||||||
# start OpenRefine server
|
|
||||||
refine_start
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ============================= MOCKUP TEST DATA ============================= #
|
|
||||||
|
|
||||||
mkdir -p input
|
|
||||||
|
|
||||||
cat << "DATA" > "input/example1.csv"
|
|
||||||
a,b,c
|
|
||||||
1,2,3
|
|
||||||
0,0,0
|
|
||||||
$,\,'
|
|
||||||
DATA
|
|
||||||
|
|
||||||
cat << "DATA" > "input/example2.tsv"
|
|
||||||
a b c
|
|
||||||
' \ $
|
|
||||||
0 0 0
|
|
||||||
3 2 1
|
|
||||||
DATA
|
|
||||||
|
|
||||||
cat << "DATA" > "input/example-operations-history.json"
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"op": "core/column-addition",
|
|
||||||
"engineConfig": {
|
|
||||||
"mode": "row-based"
|
|
||||||
},
|
|
||||||
"newColumnName": "apply-from-file",
|
|
||||||
"columnInsertIndex": 2,
|
|
||||||
"baseColumnName": "b",
|
|
||||||
"expression": "grel:value.replace('2','TEST')",
|
|
||||||
"onError": "set-to-blank"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
DATA
|
|
||||||
|
|
||||||
# ================================== IMPORT ================================== #
|
|
||||||
|
|
||||||
checkpoint "Import"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# declare input
|
|
||||||
projects["from heredoc"]=""
|
|
||||||
projects["csv file example"]="input/example1.csv"
|
|
||||||
projects["tsv file example"]="input/example2.tsv"
|
|
||||||
projects["another csv example"]="input/example1.csv"
|
|
||||||
projects["yet another csv example"]="input/example1.csv"
|
|
||||||
|
|
||||||
# --------------------------- IMPORT FROM HEREDOC ---------------------------- #
|
|
||||||
|
|
||||||
# quoted heredoc ("DATA") will not be expanded by bash (no escaping needed)
|
|
||||||
# project id will be stored in as ${projects[csv file example]}
|
|
||||||
p="from heredoc"
|
|
||||||
f="" # optional filename, will be stored in OpenRefine project metadata
|
|
||||||
echo "import heredoc..."
|
|
||||||
if curl -fs --write-out "%{redirect_url}\n" \
|
|
||||||
--form project-file="@-$(if [[ -n $f ]]; then echo ";filename=${f}"; fi)" \
|
|
||||||
--form project-name="${p}" \
|
|
||||||
--form format="text/line-based/*sv" \
|
|
||||||
--form options='{
|
|
||||||
"encoding": "UTF-8",
|
|
||||||
"separator": " "
|
|
||||||
}' \
|
|
||||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
|
||||||
> "${workspace}/${p}.id" \
|
|
||||||
<< "DATA"
|
|
||||||
a b c
|
|
||||||
1 2 3
|
|
||||||
0 0 0
|
|
||||||
$ \ '
|
|
||||||
DATA
|
|
||||||
then
|
|
||||||
log "imported heredoc as ${p}"
|
|
||||||
else
|
|
||||||
error "import of ${p} failed!"
|
|
||||||
fi
|
|
||||||
refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ---------------------------- IMPORT FROM FILE ------------------------------ #
|
|
||||||
|
|
||||||
# project id will be stored in ${projects[tsv file example]}
|
|
||||||
p="tsv file example"
|
|
||||||
echo "import file ${projects[$p]} ..."
|
|
||||||
if curl -fs --write-out "%{redirect_url}\n" \
|
|
||||||
--form project-file="@${projects[$p]}" \
|
|
||||||
--form project-name="${p}" \
|
|
||||||
--form format="text/line-based/*sv" \
|
|
||||||
--form options='{
|
|
||||||
"encoding": "UTF-8",
|
|
||||||
"separator": "\t"
|
|
||||||
}' \
|
|
||||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
|
||||||
> "${workspace}/${p}.id"
|
|
||||||
then
|
|
||||||
log "imported ${projects[$p]} as ${p}"
|
|
||||||
else
|
|
||||||
error "import of ${projects[$p]} failed!"
|
|
||||||
fi
|
|
||||||
refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# -------------------- IMPORT MULTIPLE FILES (PARALLEL) ---------------------- #
|
|
||||||
|
|
||||||
# project ids will be stored in ${projects[another csv example]} etc.
|
|
||||||
ps=( "csv file example" "another csv example" "yet another csv example" )
|
|
||||||
echo "import files" \
|
|
||||||
"$(for p in "${ps[@]}"; do printf "%s" "${projects[$p]} "; done)..."
|
|
||||||
for p in "${ps[@]}"; do
|
|
||||||
(if curl -fs --write-out "%{redirect_url}\n" \
|
|
||||||
--form project-file="@${projects[$p]}" \
|
|
||||||
--form project-name="${p}" \
|
|
||||||
--form format="line-based" \
|
|
||||||
--form options='{
|
|
||||||
"encoding": "UTF-8",
|
|
||||||
"separator": ","
|
|
||||||
}' \
|
|
||||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
|
||||||
> "${workspace}/${p}.id"
|
|
||||||
then
|
|
||||||
log "imported ${projects[$p]} as ${p}"
|
|
||||||
else
|
|
||||||
error "import of ${projects[$p]} failed!"
|
|
||||||
fi) &
|
|
||||||
monitor "${p}"
|
|
||||||
done
|
|
||||||
monitoring
|
|
||||||
for p in "${ps[@]}"; do
|
|
||||||
refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
|
|
||||||
done
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ================================ TRANSFORM ================================= #
|
|
||||||
|
|
||||||
checkpoint "Transform"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------ APPLY OPERATIONS FROM FILE ------------------------ #
|
|
||||||
|
|
||||||
p="csv file example"
|
|
||||||
f="input/example-operations-history.json"
|
|
||||||
echo "apply ${f} to ${p}..."
|
|
||||||
if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data-urlencode operations@"${f}" \
|
|
||||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
|
|
||||||
then
|
|
||||||
log "transformed ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "transform ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ---------------------- APPLY OPERATIONS FROM HEREDOC ----------------------- #
|
|
||||||
|
|
||||||
# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
|
|
||||||
p="csv file example"
|
|
||||||
echo "add column apply-from-heredoc to ${p}..."
|
|
||||||
if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data-urlencode "operations@-" \
|
|
||||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
||||||
<< "JSON"
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"op": "core/column-addition",
|
|
||||||
"engineConfig": {
|
|
||||||
"mode": "row-based"
|
|
||||||
},
|
|
||||||
"newColumnName": "apply-from-heredoc",
|
|
||||||
"columnInsertIndex": 2,
|
|
||||||
"baseColumnName": "b",
|
|
||||||
"expression": "grel:value.replace('2','TEST')",
|
|
||||||
"onError": "set-to-blank"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
JSON
|
|
||||||
then
|
|
||||||
log "transformed ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "transform ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ---------------- APPLY OPERATIONS FROM HEREDOC AND VARIABLES --------------- #
|
|
||||||
|
|
||||||
# unquoted heredocs with variable and multi-line expression (requires jq)
|
|
||||||
# \ must be used to quote the characters \, $, and `.
|
|
||||||
p="csv file example"
|
|
||||||
replace='TEST'
|
|
||||||
column="apply with variables"
|
|
||||||
echo "add column ${column} to ${p}..."
|
|
||||||
read -r -d '' expression << EXPRESSION
|
|
||||||
grel:value.replace(
|
|
||||||
'2',
|
|
||||||
'${replace}'
|
|
||||||
)
|
|
||||||
EXPRESSION
|
|
||||||
if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data-urlencode "operations@-" \
|
|
||||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
||||||
<< JSON
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"op": "core/column-addition",
|
|
||||||
"engineConfig": {
|
|
||||||
"mode": "row-based"
|
|
||||||
},
|
|
||||||
"newColumnName": "${column}",
|
|
||||||
"columnInsertIndex": 2,
|
|
||||||
"baseColumnName": "b",
|
|
||||||
"expression": $(echo "${expression}" | ${jq} -s -R '.'),
|
|
||||||
"onError": "set-to-blank"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
JSON
|
|
||||||
then
|
|
||||||
log "transformed ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "transform ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------ APPLY OPERATIONS FROM HEREDOC TO MULTIPLE PROJECTS (PARALLEL) ------ #
|
|
||||||
|
|
||||||
# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
|
|
||||||
ps=( "another csv example" "yet another csv example" )
|
|
||||||
echo "add column apply-from-heredoc to" "${ps[@]}" "..."
|
|
||||||
for p in "${ps[@]}"; do
|
|
||||||
(if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data-urlencode "operations@-" \
|
|
||||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
||||||
<< "JSON"
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"op": "core/column-addition",
|
|
||||||
"engineConfig": {
|
|
||||||
"mode": "row-based"
|
|
||||||
},
|
|
||||||
"newColumnName": "apply-from-heredoc",
|
|
||||||
"columnInsertIndex": 2,
|
|
||||||
"baseColumnName": "b",
|
|
||||||
"expression": "grel:value.replace('2','TEST')",
|
|
||||||
"onError": "set-to-blank"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
JSON
|
|
||||||
then
|
|
||||||
log "transformed ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "transform ${p} (${projects[$p]}) failed!"
|
|
||||||
fi) &
|
|
||||||
monitor "${p}"
|
|
||||||
done
|
|
||||||
monitoring
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------- APPLY MULTIPLE OPERATIONS GENERATED FROM HEREDOC ------------- #
|
|
||||||
|
|
||||||
# unquoted heredoc (JSON) with variables and multiplied (requires jq)
|
|
||||||
# \ must be used to quote the characters \, $, and `.
|
|
||||||
p="csv file example"
|
|
||||||
columns=( "apply-from-file" "apply-from-heredoc" )
|
|
||||||
echo "delete columns" "${columns[@]}" "in ${p}..."
|
|
||||||
for column in "${columns[@]}"; do
|
|
||||||
cat << JSON >> "${workspace}/${p}.tmp"
|
|
||||||
[
|
|
||||||
{
|
|
||||||
"op": "core/column-removal",
|
|
||||||
"columnName": "${column}"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
JSON
|
|
||||||
done
|
|
||||||
if "${jq}" -s add "${workspace}/${p}.tmp" | curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data-urlencode operations@- \
|
|
||||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
|
|
||||||
then
|
|
||||||
log "transformed ${p} (${projects[$p]})"
|
|
||||||
rm "${workspace}/${p}.tmp"
|
|
||||||
else
|
|
||||||
error "transform ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ================================== EXPORT ================================== #
|
|
||||||
|
|
||||||
checkpoint "Export"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ----------------------------- EXPORT TO STDOUT ----------------------------- #
|
|
||||||
|
|
||||||
p="csv file example"
|
|
||||||
format="tsv"
|
|
||||||
echo "export ${p} in ${format} format..."
|
|
||||||
if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data format="tsv" \
|
|
||||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
||||||
"${endpoint}/command/core/export-rows"
|
|
||||||
then
|
|
||||||
log "exported ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "export of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------------ EXPORT TO FILE ------------------------------ #
|
|
||||||
|
|
||||||
p="csv file example"
|
|
||||||
format="csv"
|
|
||||||
echo "export ${p} to ${format} file..."
|
|
||||||
if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data format="${format}" \
|
|
||||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
||||||
"${endpoint}/command/core/export-rows" \
|
|
||||||
> "${workspace}/${p}.${format}"
|
|
||||||
then
|
|
||||||
log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
|
|
||||||
else
|
|
||||||
error "export of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------- TEMPLATING EXPORT TO FILE ------------------------ #
|
|
||||||
|
|
||||||
p="csv file example"
|
|
||||||
format="json"
|
|
||||||
echo "export ${p} to ${format} file using template..."
|
|
||||||
IFS= read -r -d '' template << "TEMPLATE"
|
|
||||||
{
|
|
||||||
"a": {{cells['a'].value.jsonize()}},
|
|
||||||
"b": {{cells['b'].value.jsonize()}},
|
|
||||||
"c": {{cells['c'].value.jsonize()}}
|
|
||||||
}
|
|
||||||
TEMPLATE
|
|
||||||
if echo "${template}" | head -c -2 | curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data format="template" \
|
|
||||||
--data prefix="[
|
|
||||||
" \
|
|
||||||
--data suffix="
|
|
||||||
]" \
|
|
||||||
--data separator=",
|
|
||||||
" \
|
|
||||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
||||||
--data-urlencode template@- \
|
|
||||||
"${endpoint}/command/core/export-rows" \
|
|
||||||
> "${workspace}/${p}.${format}"
|
|
||||||
then
|
|
||||||
log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
|
|
||||||
else
|
|
||||||
error "export of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------- EXPORT TO MULTIPLE FILES (PARALLEL) -------------------- #
|
|
||||||
|
|
||||||
ps=( "another csv example" "yet another csv example" )
|
|
||||||
format="tsv"
|
|
||||||
echo "export" "${ps[@]}" "to ${format} files..."
|
|
||||||
for p in "${ps[@]}"; do
|
|
||||||
(if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data format="${format}" \
|
|
||||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
||||||
"${endpoint}/command/core/export-rows" \
|
|
||||||
> "${workspace}/${p}.${format}"
|
|
||||||
then
|
|
||||||
log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
|
|
||||||
else
|
|
||||||
error "export of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi) &
|
|
||||||
monitor "${p}"
|
|
||||||
done
|
|
||||||
monitoring
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ================================ UTILITIES ================================= #
|
|
||||||
|
|
||||||
checkpoint "Utilities"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------------ LIST PROJECTS ------------------------------- #
|
|
||||||
|
|
||||||
# get all project metadata and reshape json to print a list (requires jq)
|
|
||||||
echo "list projects..."
|
|
||||||
if curl -fs --get \
|
|
||||||
"${endpoint}/command/core/get-all-project-metadata" \
|
|
||||||
| "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
|
|
||||||
then
|
|
||||||
: #log "printed list of projects"
|
|
||||||
else
|
|
||||||
error "getting list of projects failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------------- GET METADATA ------------------------------- #
|
|
||||||
|
|
||||||
# get project metadata and reshape json to include project id (requires jq)
|
|
||||||
p="csv file example"
|
|
||||||
echo "metadata for ${p}..."
|
|
||||||
if curl -fs --get \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
"${endpoint}/command/core/get-project-metadata" \
|
|
||||||
| "${jq}" "{ id: ${projects[$p]} } + ."
|
|
||||||
then
|
|
||||||
: #log "printed metadata of ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "getting metadata of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------------ GET ROW COUNT ------------------------------- #
|
|
||||||
|
|
||||||
# get total number of rows
|
|
||||||
p="csv file example"
|
|
||||||
echo "total number of rows in ${p}..."
|
|
||||||
if curl -fs --get \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
--data limit=0 \
|
|
||||||
"${endpoint}/command/core/get-rows" \
|
|
||||||
| tr "," "\n" | grep total | cut -d ":" -f 2
|
|
||||||
then
|
|
||||||
: #log "printed row count of ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "getting row count of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------------- GET COLUMNS -------------------------------- #
|
|
||||||
|
|
||||||
# get column names from project model (requires jq)
|
|
||||||
p="csv file example"
|
|
||||||
echo "column names of ${p}..."
|
|
||||||
if curl -fs --get \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
"${endpoint}/command/core/get-models" \
|
|
||||||
| "${jq}" -r '.columnModel | .columns[] | .name'
|
|
||||||
then
|
|
||||||
: #log "printed column names of ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "getting column names of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# -------------------------- GET OPERATIONS HISTORY -------------------------- #
|
|
||||||
|
|
||||||
# get operations history and reshape json to make it applicable (requires jq)
|
|
||||||
p="csv file example"
|
|
||||||
f="${workspace}/${p}_history.json"
|
|
||||||
echo "history of operations for ${p}..."
|
|
||||||
if curl -fs --get \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
"${endpoint}/command/core/get-operations" \
|
|
||||||
| "${jq}" '[ .entries[] | .operation ]' \
|
|
||||||
> "${f}"
|
|
||||||
then
|
|
||||||
log "saved ops history of ${p} (${projects[$p]}) to ${f}"
|
|
||||||
else
|
|
||||||
error "getting ops history of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ---------------------------- GET IMPORT HISTORY ---------------------------- #
|
|
||||||
|
|
||||||
# get project metadata and filter import options history (requires jq)
|
|
||||||
p="csv file example"
|
|
||||||
echo "history of import for ${p}..."
|
|
||||||
if curl -fs --get \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
"${endpoint}/command/core/get-project-metadata" \
|
|
||||||
| "${jq}" ".importOptionMetadata[0]"
|
|
||||||
then
|
|
||||||
: #log "printed import history of ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "getting import history of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ------------------------------ DELETE PROJECT ------------------------------ #
|
|
||||||
|
|
||||||
# delete a project (rarely needed for batch processing)
|
|
||||||
p="yet another csv example"
|
|
||||||
echo "delete project ${p}..."
|
|
||||||
if curl -fs \
|
|
||||||
--data project="${projects[$p]}" \
|
|
||||||
"${endpoint}/command/core/delete-project$(refine_csrf)" > /dev/null
|
|
||||||
then
|
|
||||||
log "deleted ${p} (${projects[$p]})"
|
|
||||||
else
|
|
||||||
error "deletion of ${p} (${projects[$p]}) failed!"
|
|
||||||
fi
|
|
||||||
echo
|
|
||||||
|
|
||||||
# ================================== FINISH ================================== #
|
|
||||||
|
|
||||||
checkpoint "Finish"
|
|
||||||
echo
|
|
||||||
|
|
||||||
# stop OpenRefine server
|
|
||||||
refine_stop
|
|
||||||
echo
|
|
||||||
|
|
||||||
# calculate run time based on checkpoints
|
|
||||||
checkpoint_stats
|
|
||||||
echo
|
|
||||||
|
|
||||||
# word count on all files in workspace
|
|
||||||
count_output
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# bash-refine v1.1.0: minimal.sh, Felix Lohmeier, 2020-07-10
|
||||||
|
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
||||||
|
# license: MIT License https://choosealicense.com/licenses/mit/
|
||||||
|
|
||||||
|
# =============================== ENVIRONMENT ================================ #
|
||||||
|
|
||||||
|
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||||
|
source bash-refine.sh
|
||||||
|
init
|
||||||
|
|
||||||
|
# ================================= STARTUP ================================== #
|
||||||
|
|
||||||
|
checkpoint "Startup"; echo
|
||||||
|
refine_start; echo
|
||||||
|
|
||||||
|
# ================================== IMPORT ================================== #
|
||||||
|
|
||||||
|
checkpoint "Import"; echo
|
||||||
|
|
||||||
|
# <-- insert snippet from templates.sh here -->
|
||||||
|
|
||||||
|
# ================================ TRANSFORM ================================= #
|
||||||
|
|
||||||
|
checkpoint "Transform"; echo
|
||||||
|
|
||||||
|
# <-- insert snippet from templates.sh here -->
|
||||||
|
|
||||||
|
# ================================== EXPORT ================================== #
|
||||||
|
|
||||||
|
checkpoint "Export"; echo
|
||||||
|
|
||||||
|
# <-- insert snippet from templates.sh here -->
|
||||||
|
|
||||||
|
# ================================== FINISH ================================== #
|
||||||
|
|
||||||
|
checkpoint "Finish"; echo
|
||||||
|
refine_stop; echo
|
||||||
|
checkpoint_stats; echo
|
||||||
|
count_output
|
|
@ -0,0 +1,546 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# bash-refine v1.1.0: templates.sh, Felix Lohmeier, 2020-07-10
|
||||||
|
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
||||||
|
# license: MIT License https://choosealicense.com/licenses/mit/
|
||||||
|
|
||||||
|
# TODO: example for setting metadata
|
||||||
|
# TODO: example for engine config (facets)
|
||||||
|
|
||||||
|
# ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
|
||||||
|
|
||||||
|
# The following code shows several options for import, transform and export
|
||||||
|
# use the templates to write your own scripts or execute this file for a demo
|
||||||
|
|
||||||
|
# =============================== ENVIRONMENT ================================ #
|
||||||
|
|
||||||
|
# make script executable from another directory
|
||||||
|
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||||
|
|
||||||
|
# source the main script
|
||||||
|
source bash-refine.sh
|
||||||
|
|
||||||
|
### override default config?
|
||||||
|
#endpoint="http://localhost:3333"
|
||||||
|
#memory="1400M" # increase to available RAM
|
||||||
|
#date="$(date +%Y%m%d_%H%M%S)"
|
||||||
|
#workspace="output/${date}"
|
||||||
|
#logfile="${workspace}/${date}.log"
|
||||||
|
#csrf=true # set to false for OpenRefine < 3.3
|
||||||
|
#jq="jq" # path to executable
|
||||||
|
#openrefine="openrefine/refine" # path to executable
|
||||||
|
|
||||||
|
# check requirements, set trap, create workspace and tee to logfile
|
||||||
|
init
|
||||||
|
|
||||||
|
# ================================= STARTUP ================================== #
|
||||||
|
|
||||||
|
checkpoint "Startup"; echo
|
||||||
|
|
||||||
|
# start OpenRefine server
|
||||||
|
refine_start; echo
|
||||||
|
|
||||||
|
# ============================= MOCKUP TEST DATA ============================= #
|
||||||
|
|
||||||
|
mkdir -p input
|
||||||
|
|
||||||
|
cat << "DATA" > "input/example1.csv"
|
||||||
|
a,b,c
|
||||||
|
1,2,3
|
||||||
|
0,0,0
|
||||||
|
$,\,'
|
||||||
|
DATA
|
||||||
|
|
||||||
|
cat << "DATA" > "input/example2.tsv"
|
||||||
|
a b c
|
||||||
|
' \ $
|
||||||
|
0 0 0
|
||||||
|
3 2 1
|
||||||
|
DATA
|
||||||
|
|
||||||
|
cat << "DATA" > "input/example-operations-history.json"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"op": "core/column-addition",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based"
|
||||||
|
},
|
||||||
|
"newColumnName": "apply-from-file",
|
||||||
|
"columnInsertIndex": 2,
|
||||||
|
"baseColumnName": "b",
|
||||||
|
"expression": "grel:value.replace('2','TEST')",
|
||||||
|
"onError": "set-to-blank"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# ================================== IMPORT ================================== #
|
||||||
|
|
||||||
|
checkpoint "Import"; echo
|
||||||
|
|
||||||
|
# declare input
|
||||||
|
projects["from heredoc"]=""
|
||||||
|
projects["csv file example"]="input/example1.csv"
|
||||||
|
projects["tsv file example"]="input/example2.tsv"
|
||||||
|
projects["another csv example"]="input/example1.csv"
|
||||||
|
projects["yet another csv example"]="input/example1.csv"
|
||||||
|
|
||||||
|
# --------------------------- IMPORT FROM HEREDOC ---------------------------- #
|
||||||
|
|
||||||
|
# quoted heredoc ("DATA") will not be expanded by bash (no escaping needed)
|
||||||
|
# project id will be stored in as ${projects[csv file example]}
|
||||||
|
p="from heredoc"
|
||||||
|
f="" # optional filename, will be stored in OpenRefine project metadata
|
||||||
|
echo "import heredoc..."
|
||||||
|
if curl -fs --write-out "%{redirect_url}\n" \
|
||||||
|
--form project-file="@-$(if [[ -n $f ]]; then echo ";filename=${f}"; fi)" \
|
||||||
|
--form project-name="${p}" \
|
||||||
|
--form format="text/line-based/*sv" \
|
||||||
|
--form options='{
|
||||||
|
"encoding": "UTF-8",
|
||||||
|
"separator": " "
|
||||||
|
}' \
|
||||||
|
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||||
|
> "${workspace}/${p}.id" \
|
||||||
|
<< "DATA"
|
||||||
|
a b c
|
||||||
|
1 2 3
|
||||||
|
0 0 0
|
||||||
|
$ \ '
|
||||||
|
DATA
|
||||||
|
then
|
||||||
|
log "imported heredoc as ${p}"
|
||||||
|
else
|
||||||
|
error "import of ${p} failed!"
|
||||||
|
fi
|
||||||
|
refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ---------------------------- IMPORT FROM FILE ------------------------------ #
|
||||||
|
|
||||||
|
# project id will be stored in ${projects[tsv file example]}
|
||||||
|
p="tsv file example"
|
||||||
|
echo "import file ${projects[$p]} ..."
|
||||||
|
if curl -fs --write-out "%{redirect_url}\n" \
|
||||||
|
--form project-file="@${projects[$p]}" \
|
||||||
|
--form project-name="${p}" \
|
||||||
|
--form format="text/line-based/*sv" \
|
||||||
|
--form options='{
|
||||||
|
"encoding": "UTF-8",
|
||||||
|
"separator": "\t"
|
||||||
|
}' \
|
||||||
|
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||||
|
> "${workspace}/${p}.id"
|
||||||
|
then
|
||||||
|
log "imported ${projects[$p]} as ${p}"
|
||||||
|
else
|
||||||
|
error "import of ${projects[$p]} failed!"
|
||||||
|
fi
|
||||||
|
refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
|
||||||
|
echo
|
||||||
|
|
||||||
|
# -------------------- IMPORT MULTIPLE FILES (PARALLEL) ---------------------- #
|
||||||
|
|
||||||
|
# project ids will be stored in ${projects[another csv example]} etc.
|
||||||
|
ps=( "csv file example" "another csv example" "yet another csv example" )
|
||||||
|
echo "import files" \
|
||||||
|
"$(for p in "${ps[@]}"; do printf "%s" "${projects[$p]} "; done)..."
|
||||||
|
for p in "${ps[@]}"; do
|
||||||
|
(if curl -fs --write-out "%{redirect_url}\n" \
|
||||||
|
--form project-file="@${projects[$p]}" \
|
||||||
|
--form project-name="${p}" \
|
||||||
|
--form format="line-based" \
|
||||||
|
--form options='{
|
||||||
|
"encoding": "UTF-8",
|
||||||
|
"separator": ","
|
||||||
|
}' \
|
||||||
|
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||||
|
> "${workspace}/${p}.id"
|
||||||
|
then
|
||||||
|
log "imported ${projects[$p]} as ${p}"
|
||||||
|
else
|
||||||
|
error "import of ${projects[$p]} failed!"
|
||||||
|
fi) &
|
||||||
|
monitor "${p}"
|
||||||
|
done
|
||||||
|
monitoring
|
||||||
|
for p in "${ps[@]}"; do
|
||||||
|
refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
|
||||||
|
done
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ================================ TRANSFORM ================================= #
|
||||||
|
|
||||||
|
checkpoint "Transform"; echo
|
||||||
|
|
||||||
|
# ------------------------ APPLY OPERATIONS FROM FILE ------------------------ #
|
||||||
|
|
||||||
|
p="csv file example"
|
||||||
|
f="input/example-operations-history.json"
|
||||||
|
echo "apply ${f} to ${p}..."
|
||||||
|
if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data-urlencode operations@"${f}" \
|
||||||
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
|
||||||
|
then
|
||||||
|
log "transformed ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "transform ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ---------------------- APPLY OPERATIONS FROM HEREDOC ----------------------- #
|
||||||
|
|
||||||
|
# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
|
||||||
|
p="csv file example"
|
||||||
|
echo "add column apply-from-heredoc to ${p}..."
|
||||||
|
if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data-urlencode "operations@-" \
|
||||||
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||||
|
<< "JSON"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"op": "core/column-addition",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based"
|
||||||
|
},
|
||||||
|
"newColumnName": "apply-from-heredoc",
|
||||||
|
"columnInsertIndex": 2,
|
||||||
|
"baseColumnName": "b",
|
||||||
|
"expression": "grel:value.replace('2','TEST')",
|
||||||
|
"onError": "set-to-blank"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
JSON
|
||||||
|
then
|
||||||
|
log "transformed ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "transform ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ---------------- APPLY OPERATIONS FROM HEREDOC AND VARIABLES --------------- #
|
||||||
|
|
||||||
|
# unquoted heredocs with variable and multi-line expression (requires jq)
|
||||||
|
# \ must be used to quote the characters \, $, and `.
|
||||||
|
p="csv file example"
|
||||||
|
replace='TEST'
|
||||||
|
column="apply with variables"
|
||||||
|
echo "add column ${column} to ${p}..."
|
||||||
|
read -r -d '' expression << EXPRESSION
|
||||||
|
grel:value.replace(
|
||||||
|
'2',
|
||||||
|
'${replace}'
|
||||||
|
)
|
||||||
|
EXPRESSION
|
||||||
|
if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data-urlencode "operations@-" \
|
||||||
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||||
|
<< JSON
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"op": "core/column-addition",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based"
|
||||||
|
},
|
||||||
|
"newColumnName": "${column}",
|
||||||
|
"columnInsertIndex": 2,
|
||||||
|
"baseColumnName": "b",
|
||||||
|
"expression": $(echo "${expression}" | ${jq} -s -R '.'),
|
||||||
|
"onError": "set-to-blank"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
JSON
|
||||||
|
then
|
||||||
|
log "transformed ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "transform ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------ APPLY OPERATIONS FROM HEREDOC TO MULTIPLE PROJECTS (PARALLEL) ------ #
|
||||||
|
|
||||||
|
# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
|
||||||
|
ps=( "another csv example" "yet another csv example" )
|
||||||
|
echo "add column apply-from-heredoc to" "${ps[@]}" "..."
|
||||||
|
for p in "${ps[@]}"; do
|
||||||
|
(if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data-urlencode "operations@-" \
|
||||||
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||||
|
<< "JSON"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"op": "core/column-addition",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based"
|
||||||
|
},
|
||||||
|
"newColumnName": "apply-from-heredoc",
|
||||||
|
"columnInsertIndex": 2,
|
||||||
|
"baseColumnName": "b",
|
||||||
|
"expression": "grel:value.replace('2','TEST')",
|
||||||
|
"onError": "set-to-blank"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
JSON
|
||||||
|
then
|
||||||
|
log "transformed ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "transform ${p} (${projects[$p]}) failed!"
|
||||||
|
fi) &
|
||||||
|
monitor "${p}"
|
||||||
|
done
|
||||||
|
monitoring
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------- APPLY MULTIPLE OPERATIONS GENERATED FROM HEREDOC ------------- #
|
||||||
|
|
||||||
|
# unquoted heredoc (JSON) with variables and multiplied (requires jq)
|
||||||
|
# \ must be used to quote the characters \, $, and `.
|
||||||
|
p="csv file example"
|
||||||
|
columns=( "apply-from-file" "apply-from-heredoc" )
|
||||||
|
echo "delete columns" "${columns[@]}" "in ${p}..."
|
||||||
|
for column in "${columns[@]}"; do
|
||||||
|
cat << JSON >> "${workspace}/${p}.tmp"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"op": "core/column-removal",
|
||||||
|
"columnName": "${column}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
JSON
|
||||||
|
done
|
||||||
|
if "${jq}" -s add "${workspace}/${p}.tmp" | curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data-urlencode operations@- \
|
||||||
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
|
||||||
|
then
|
||||||
|
log "transformed ${p} (${projects[$p]})"
|
||||||
|
rm "${workspace}/${p}.tmp"
|
||||||
|
else
|
||||||
|
error "transform ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ================================== EXPORT ================================== #
|
||||||
|
|
||||||
|
checkpoint "Export"; echo
|
||||||
|
|
||||||
|
# ----------------------------- EXPORT TO STDOUT ----------------------------- #
|
||||||
|
|
||||||
|
p="csv file example"
|
||||||
|
format="tsv"
|
||||||
|
echo "export ${p} in ${format} format..."
|
||||||
|
if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data format="tsv" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
|
"${endpoint}/command/core/export-rows"
|
||||||
|
then
|
||||||
|
log "exported ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "export of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------------------------ EXPORT TO FILE ------------------------------ #
|
||||||
|
|
||||||
|
p="csv file example"
|
||||||
|
format="csv"
|
||||||
|
echo "export ${p} to ${format} file..."
|
||||||
|
if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data format="${format}" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
|
"${endpoint}/command/core/export-rows" \
|
||||||
|
> "${workspace}/${p}.${format}"
|
||||||
|
then
|
||||||
|
log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
|
||||||
|
else
|
||||||
|
error "export of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------------------- TEMPLATING EXPORT TO FILE ------------------------ #
|
||||||
|
|
||||||
|
p="csv file example"
|
||||||
|
format="json"
|
||||||
|
echo "export ${p} to ${format} file using template..."
|
||||||
|
IFS= read -r -d '' template << "TEMPLATE"
|
||||||
|
{
|
||||||
|
"a": {{cells['a'].value.jsonize()}},
|
||||||
|
"b": {{cells['b'].value.jsonize()}},
|
||||||
|
"c": {{cells['c'].value.jsonize()}}
|
||||||
|
}
|
||||||
|
TEMPLATE
|
||||||
|
if echo "${template}" | head -c -2 | curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data format="template" \
|
||||||
|
--data prefix="[
|
||||||
|
" \
|
||||||
|
--data suffix="
|
||||||
|
]" \
|
||||||
|
--data separator=",
|
||||||
|
" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
|
--data-urlencode template@- \
|
||||||
|
"${endpoint}/command/core/export-rows" \
|
||||||
|
> "${workspace}/${p}.${format}"
|
||||||
|
then
|
||||||
|
log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
|
||||||
|
else
|
||||||
|
error "export of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------------- EXPORT TO MULTIPLE FILES (PARALLEL) -------------------- #
|
||||||
|
|
||||||
|
ps=( "another csv example" "yet another csv example" )
|
||||||
|
format="tsv"
|
||||||
|
echo "export" "${ps[@]}" "to ${format} files..."
|
||||||
|
for p in "${ps[@]}"; do
|
||||||
|
(if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data format="${format}" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
|
"${endpoint}/command/core/export-rows" \
|
||||||
|
> "${workspace}/${p}.${format}"
|
||||||
|
then
|
||||||
|
log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
|
||||||
|
else
|
||||||
|
error "export of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi) &
|
||||||
|
monitor "${p}"
|
||||||
|
done
|
||||||
|
monitoring
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ================================ UTILITIES ================================= #
|
||||||
|
|
||||||
|
checkpoint "Utilities"; echo
|
||||||
|
|
||||||
|
# ------------------------------ LIST PROJECTS ------------------------------- #
|
||||||
|
|
||||||
|
# get all project metadata and reshape json to print a list (requires jq)
|
||||||
|
echo "list projects..."
|
||||||
|
if curl -fs --get \
|
||||||
|
"${endpoint}/command/core/get-all-project-metadata" \
|
||||||
|
| "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
|
||||||
|
then
|
||||||
|
: #log "printed list of projects"
|
||||||
|
else
|
||||||
|
error "getting list of projects failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------------------------- GET METADATA ------------------------------- #
|
||||||
|
|
||||||
|
# get project metadata and reshape json to include project id (requires jq)
|
||||||
|
p="csv file example"
|
||||||
|
echo "metadata for ${p}..."
|
||||||
|
if curl -fs --get \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
"${endpoint}/command/core/get-project-metadata" \
|
||||||
|
| "${jq}" "{ id: ${projects[$p]} } + ."
|
||||||
|
then
|
||||||
|
: #log "printed metadata of ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "getting metadata of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------------------------ GET ROW COUNT ------------------------------- #
|
||||||
|
|
||||||
|
# get total number of rows
|
||||||
|
p="csv file example"
|
||||||
|
echo "total number of rows in ${p}..."
|
||||||
|
if curl -fs --get \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
--data limit=0 \
|
||||||
|
"${endpoint}/command/core/get-rows" \
|
||||||
|
| tr "," "\n" | grep total | cut -d ":" -f 2
|
||||||
|
then
|
||||||
|
: #log "printed row count of ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "getting row count of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------------------------- GET COLUMNS -------------------------------- #
|
||||||
|
|
||||||
|
# get column names from project model (requires jq)
|
||||||
|
p="csv file example"
|
||||||
|
echo "column names of ${p}..."
|
||||||
|
if curl -fs --get \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
"${endpoint}/command/core/get-models" \
|
||||||
|
| "${jq}" -r '.columnModel | .columns[] | .name'
|
||||||
|
then
|
||||||
|
: #log "printed column names of ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "getting column names of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# -------------------------- GET OPERATIONS HISTORY -------------------------- #
|
||||||
|
|
||||||
|
# get operations history and reshape json to make it applicable (requires jq)
|
||||||
|
p="csv file example"
|
||||||
|
f="${workspace}/${p}_history.json"
|
||||||
|
echo "history of operations for ${p}..."
|
||||||
|
if curl -fs --get \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
"${endpoint}/command/core/get-operations" \
|
||||||
|
| "${jq}" '[ .entries[] | .operation ]' \
|
||||||
|
> "${f}"
|
||||||
|
then
|
||||||
|
log "saved ops history of ${p} (${projects[$p]}) to ${f}"
|
||||||
|
else
|
||||||
|
error "getting ops history of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ---------------------------- GET IMPORT HISTORY ---------------------------- #
|
||||||
|
|
||||||
|
# get project metadata and filter import options history (requires jq)
|
||||||
|
p="csv file example"
|
||||||
|
echo "history of import for ${p}..."
|
||||||
|
if curl -fs --get \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
"${endpoint}/command/core/get-project-metadata" \
|
||||||
|
| "${jq}" ".importOptionMetadata[0]"
|
||||||
|
then
|
||||||
|
: #log "printed import history of ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "getting import history of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ------------------------------ DELETE PROJECT ------------------------------ #
|
||||||
|
|
||||||
|
# delete a project (rarely needed for batch processing)
|
||||||
|
p="yet another csv example"
|
||||||
|
echo "delete project ${p}..."
|
||||||
|
if curl -fs \
|
||||||
|
--data project="${projects[$p]}" \
|
||||||
|
"${endpoint}/command/core/delete-project$(refine_csrf)" > /dev/null
|
||||||
|
then
|
||||||
|
log "deleted ${p} (${projects[$p]})"
|
||||||
|
else
|
||||||
|
error "deletion of ${p} (${projects[$p]}) failed!"
|
||||||
|
fi
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ================================== FINISH ================================== #
|
||||||
|
|
||||||
|
checkpoint "Finish"; echo
|
||||||
|
|
||||||
|
# stop OpenRefine server
|
||||||
|
refine_stop; echo
|
||||||
|
|
||||||
|
# calculate run time based on checkpoints
|
||||||
|
checkpoint_stats; echo
|
||||||
|
|
||||||
|
# word count on all files in workspace
|
||||||
|
count_output
|
Loading…
Reference in New Issue