split up code

2024-01-30 22:58:35 +01:00 · 2020-07-10 18:28:58 +02:00 · 2020-07-10 18:28:58 +02:00 · 09986428d1
commit 09986428d1
parent 634d61d7a6
4 changed files with 620 additions and 557 deletions
--- a/bash-refine.md
+++ b/bash-refine.md
@ -0,0 +1,28 @@
+## How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
+
+tested on Fedora 32 with bash 5.0.17 and curl 7.69.1
+
+### Quick start
+
+1. Clone this gist
+
+```
+git clone https://gist.github.com/d76bd27fbc4b8ab6d683822cdf61f81d.git bash-refine
+```
+
+2. Execute all supplied examples for a quick demo
+
+```
+cd bash-refine
+./templates.sh
+```
+
+### Build your own workflow
+
+3. Copy minimal pre-structured script to a new file
+
+```
+cp minimal.sh myworkflow.sh
+```
+
+4. Use the templates in `templates.sh` to develop your workflow
--- a/bash-refine.sh
+++ b/bash-refine.sh
@ -1,26 +1,17 @@
 #!/bin/bash
-# bash-refine.sh, Felix Lohmeier, v1.0.0, 2020-07-09
-# How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
+# bash-refine v1.1.0: bash-refine.sh, Felix Lohmeier, 2020-07-10
 # https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
-# tested on Fedora 32 with OpenRefine 3.3, bash 5.0.17, curl 7.69.1 and jq 1.4
 # license: MIT License https://choosealicense.com/licenses/mit/

 # TODO: support for macOS
-# TODO: example for setting metadata
-# TODO: example for engine config (facets)
-
-# make script executable from another directory
-cd "$(dirname "${0}")" || exit 1

 # ================================== CONFIG ================================== #

-port="3333"
-endpoint="http://localhost:${port}"
+endpoint="http://localhost:3333"
 memory="1400M" # increase to available RAM
 date="$(date +%Y%m%d_%H%M%S)"
 workspace="output/${date}"
 logfile="${workspace}/${date}.log"
-
 csrf=true # set to false for OpenRefine < 3.3
 jq="jq" # path to executable
 openrefine="openrefine/refine" # path to executable
@ -78,7 +69,7 @@ function refine_start() {
  echo "start OpenRefine server..."  
  local dir
  dir="$(readlink -f "${workspace}")"
-  ${openrefine} -v warn -m "${memory}" -p "${port}" -d "${dir}" &
+  ${openrefine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
  pid_server=${!}
  timeout 30s bash -c "until curl -s \"${endpoint}\" \
    | cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
@ -207,7 +198,7 @@ function checkpoint_stats {
  # calculate and print run time for each step
  for i in "${!keys[@]}"; do
    diffsec=$(( values[$((i + 1))] - values[i] ))
-    printf "%36s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
+    printf "%35s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
      "$(date -d @"${values[$i]}")" \
      "($(date -d @${diffsec} -u +%H:%M:%S))"
  done
@ -223,552 +214,10 @@ function count_output {
 }

 function init() {
+  # check requirements and download software if necessary
+  requirements
  # set trap, create directories and tee to log file
  trap 'error "script interrupted!"' HUP INT QUIT TERM
  mkdir -p "${workspace}"
  exec &> >(tee -a "${logfile}")
 }
-
-# ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
-
-# To increase readability, you may prefer to split up the code:
-# - move all code below to a separate script (e.g. one for each workflow)
-# - add the following lines at the beginning of the new file(s)
-#   #!/bin/bash
-#   . bash-refine.sh
-
-# ================================= STARTUP ================================== #
-
-checkpoint "Startup"
-echo
-
-# check requirements and download software if necessary
-requirements
-
-# override default config?
-#port="3333"
-#endpoint="http://localhost:${port}"
-#memory="1400M"
-#date="$(date +%Y%m%d_%H%M%S)"
-#workspace="output/${date}"
-#logfile="${workspace}/${date}.log"
-
-# set trap, create directories and tee to log file
-init
-
-# start OpenRefine server
-refine_start
-echo
-
-# ============================= MOCKUP TEST DATA ============================= #
-
-mkdir -p input
-
-cat << "DATA" > "input/example1.csv"
-a,b,c
-1,2,3
-0,0,0
-$,\,'
-DATA
-
-cat << "DATA" > "input/example2.tsv"
-a	b	c
-'	\	$
-0	0	0
-3	2	1
-DATA
-
-cat << "DATA" > "input/example-operations-history.json"
-[
-  {
-    "op": "core/column-addition",
-    "engineConfig": {
-      "mode": "row-based"
-    },
-    "newColumnName": "apply-from-file",
-    "columnInsertIndex": 2,
-    "baseColumnName": "b",
-    "expression": "grel:value.replace('2','TEST')",
-    "onError": "set-to-blank"
-  }
-]
-DATA
-
-# ================================== IMPORT ================================== #
-
-checkpoint "Import"
-echo
-
-# declare input
-projects["from heredoc"]=""
-projects["csv file example"]="input/example1.csv"
-projects["tsv file example"]="input/example2.tsv"
-projects["another csv example"]="input/example1.csv"
-projects["yet another csv example"]="input/example1.csv"
-
-# --------------------------- IMPORT FROM HEREDOC ---------------------------- #
-
-# quoted heredoc ("DATA") will not be expanded by bash (no escaping needed)
-# project id will be stored in as ${projects[csv file example]}
-p="from heredoc"
-f="" # optional filename, will be stored in OpenRefine project metadata
-echo "import heredoc..."
-if curl -fs --write-out "%{redirect_url}\n" \
-  --form project-file="@-$(if [[ -n $f ]]; then echo ";filename=${f}"; fi)" \
-  --form project-name="${p}" \
-  --form format="text/line-based/*sv" \
-  --form options='{
-                    "encoding": "UTF-8",
-                    "separator": " "
-                  }' \
-  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
-  > "${workspace}/${p}.id" \
-  << "DATA"
-a b c
-1 2 3
-0 0 0
-$ \ '
-DATA
-then
-  log "imported heredoc as ${p}"
-else
-  error "import of ${p} failed!"
-fi
-refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
-echo
-
-# ---------------------------- IMPORT FROM FILE ------------------------------ #
-
-# project id will be stored in ${projects[tsv file example]}
-p="tsv file example"
-echo "import file ${projects[$p]} ..."
-if curl -fs --write-out "%{redirect_url}\n" \
-  --form project-file="@${projects[$p]}" \
-  --form project-name="${p}" \
-  --form format="text/line-based/*sv" \
-  --form options='{
-                    "encoding": "UTF-8",
-                    "separator": "\t"
-                  }' \
-  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
-  > "${workspace}/${p}.id"
-then
-  log "imported ${projects[$p]} as ${p}"
-else
-  error "import of ${projects[$p]} failed!"
-fi
-refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
-echo
-
-# -------------------- IMPORT MULTIPLE FILES (PARALLEL) ---------------------- #
-
-# project ids will be stored in ${projects[another csv example]} etc.
-ps=( "csv file example" "another csv example" "yet another csv example" )
-echo "import files" \
-  "$(for p in "${ps[@]}"; do printf "%s" "${projects[$p]} "; done)..."
-for p in "${ps[@]}"; do
-  (if curl -fs --write-out "%{redirect_url}\n" \
-    --form project-file="@${projects[$p]}" \
-    --form project-name="${p}" \
-    --form format="line-based" \
-    --form options='{
-                    "encoding": "UTF-8",
-                    "separator": ","
-                    }' \
-    "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
-    > "${workspace}/${p}.id"
-  then
-    log "imported ${projects[$p]} as ${p}"
-  else
-    error "import of ${projects[$p]} failed!"
-  fi) &
-  monitor "${p}"
-done
-monitoring
-for p in "${ps[@]}"; do
-  refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
-done
-echo
-
-# ================================ TRANSFORM ================================= #
-
-checkpoint "Transform"
-echo
-
-# ------------------------ APPLY OPERATIONS FROM FILE ------------------------ #
-
-p="csv file example"
-f="input/example-operations-history.json"
-echo "apply ${f} to ${p}..."
-if curl -fs \
-  --data project="${projects[$p]}" \
-  --data-urlencode operations@"${f}" \
-  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
-then
-  log "transformed ${p} (${projects[$p]})"
-else
-  error "transform ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ---------------------- APPLY OPERATIONS FROM HEREDOC ----------------------- #
-
-# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
-p="csv file example"
-echo "add column apply-from-heredoc to ${p}..."
-if curl -fs \
-  --data project="${projects[$p]}" \
-  --data-urlencode "operations@-" \
-  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
-  << "JSON"
-[
-  {
-    "op": "core/column-addition",
-    "engineConfig": {
-      "mode": "row-based"
-    },
-    "newColumnName": "apply-from-heredoc",
-    "columnInsertIndex": 2,
-    "baseColumnName": "b",
-    "expression": "grel:value.replace('2','TEST')",
-    "onError": "set-to-blank"
-  }
-]
-JSON
-then
-  log "transformed ${p} (${projects[$p]})"
-else
-  error "transform ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ---------------- APPLY OPERATIONS FROM HEREDOC AND VARIABLES --------------- #
-
-# unquoted heredocs with variable and multi-line expression (requires jq)
-# \ must be used to quote the characters \, $, and `.
-p="csv file example"
-replace='TEST'
-column="apply with variables"
-echo "add column ${column} to ${p}..."
-read -r -d '' expression << EXPRESSION
-grel:value.replace(
-  '2',
-  '${replace}'
-)
-EXPRESSION
-if curl -fs \
-  --data project="${projects[$p]}" \
-  --data-urlencode "operations@-" \
-  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
-  << JSON
-[
-  {
-    "op": "core/column-addition",
-    "engineConfig": {
-      "mode": "row-based"
-    },
-    "newColumnName": "${column}",
-    "columnInsertIndex": 2,
-    "baseColumnName": "b",
-    "expression": $(echo "${expression}" | ${jq} -s -R '.'),
-    "onError": "set-to-blank"
-  }
-]
-JSON
-then
-  log "transformed ${p} (${projects[$p]})"
-else
-  error "transform ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ------ APPLY OPERATIONS FROM HEREDOC TO MULTIPLE PROJECTS (PARALLEL)  ------ #
-
-# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
-ps=( "another csv example" "yet another csv example" )
-echo "add column apply-from-heredoc to" "${ps[@]}" "..."
-for p in "${ps[@]}"; do
-  (if curl -fs \
-    --data project="${projects[$p]}" \
-    --data-urlencode "operations@-" \
-    "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
-    << "JSON"
-  [
-    {
-      "op": "core/column-addition",
-      "engineConfig": {
-        "mode": "row-based"
-      },
-      "newColumnName": "apply-from-heredoc",
-      "columnInsertIndex": 2,
-      "baseColumnName": "b",
-      "expression": "grel:value.replace('2','TEST')",
-      "onError": "set-to-blank"
-    }
-  ]
-JSON
-  then
-    log "transformed ${p} (${projects[$p]})"
-  else
-    error "transform ${p} (${projects[$p]}) failed!"
-  fi) &
-  monitor "${p}"
-done
-monitoring
-echo
-
-# ------------- APPLY MULTIPLE OPERATIONS GENERATED FROM HEREDOC ------------- #
-
-# unquoted heredoc (JSON) with variables and multiplied (requires jq)
-# \ must be used to quote the characters \, $, and `.
-p="csv file example"
-columns=( "apply-from-file" "apply-from-heredoc" )
-echo "delete columns" "${columns[@]}" "in ${p}..."
-for column in "${columns[@]}"; do
-  cat << JSON >> "${workspace}/${p}.tmp"
-[
-  {
-    "op": "core/column-removal",
-    "columnName": "${column}"
-  }
-]
-JSON
-done
-if "${jq}" -s add "${workspace}/${p}.tmp" | curl -fs \
-  --data project="${projects[$p]}" \
-  --data-urlencode operations@- \
-  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
-then
-  log "transformed ${p} (${projects[$p]})"
-  rm "${workspace}/${p}.tmp"
-else
-  error "transform ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ================================== EXPORT ================================== #
-
-checkpoint "Export"
-echo
-
-# ----------------------------- EXPORT TO STDOUT ----------------------------- #
-
-p="csv file example"
-format="tsv"
-echo "export ${p} in ${format} format..."
-if curl -fs \
-  --data project="${projects[$p]}" \
-  --data format="tsv" \
-  --data engine='{"facets":[],"mode":"row-based"}' \
-  "${endpoint}/command/core/export-rows"
-then
-  log "exported ${p} (${projects[$p]})"
-else
-  error "export of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ------------------------------ EXPORT TO FILE ------------------------------ #
-
-p="csv file example"
-format="csv"
-echo "export ${p} to ${format} file..."
-if curl -fs \
-  --data project="${projects[$p]}" \
-  --data format="${format}" \
-  --data engine='{"facets":[],"mode":"row-based"}' \
-  "${endpoint}/command/core/export-rows" \
-  > "${workspace}/${p}.${format}"
-then
-  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
-else
-  error "export of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ------------------------- TEMPLATING EXPORT TO FILE ------------------------ #
-
-p="csv file example"
-format="json"
-echo "export ${p} to ${format} file using template..."
-IFS= read -r -d '' template << "TEMPLATE"
-  {
-    "a": {{cells['a'].value.jsonize()}},
-    "b": {{cells['b'].value.jsonize()}},
-    "c": {{cells['c'].value.jsonize()}}
-  }
-TEMPLATE
-if echo "${template}" | head -c -2 | curl -fs \
-  --data project="${projects[$p]}" \
-  --data format="template" \
-  --data prefix="[
-" \
-  --data suffix="
-]" \
-  --data separator=",
-" \
-  --data engine='{"facets":[],"mode":"row-based"}' \
-  --data-urlencode template@- \
-  "${endpoint}/command/core/export-rows" \
-  > "${workspace}/${p}.${format}"
-then
-  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
-else
-  error "export of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ------------------- EXPORT TO MULTIPLE FILES (PARALLEL) -------------------- #
-
-ps=( "another csv example" "yet another csv example" )
-format="tsv"
-echo "export" "${ps[@]}" "to ${format} files..."
-for p in "${ps[@]}"; do
-  (if curl -fs \
-    --data project="${projects[$p]}" \
-    --data format="${format}" \
-    --data engine='{"facets":[],"mode":"row-based"}' \
-    "${endpoint}/command/core/export-rows" \
-    > "${workspace}/${p}.${format}"
-  then
-    log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
-  else
-    error "export of ${p} (${projects[$p]}) failed!"
-  fi) &
-  monitor "${p}"
-done
-monitoring
-echo
-
-# ================================ UTILITIES ================================= #
-
-checkpoint "Utilities"
-echo
-
-# ------------------------------ LIST PROJECTS ------------------------------- #
-
-# get all project metadata and reshape json to print a list (requires jq)
-echo "list projects..."
-if curl -fs --get \
-  "${endpoint}/command/core/get-all-project-metadata" \
-  | "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
-then
-  : #log "printed list of projects"
-else
-  error "getting list of projects failed!"
-fi
-echo
-
-# ------------------------------- GET METADATA ------------------------------- #
-
-# get project metadata and reshape json to include project id (requires jq)
-p="csv file example"
-echo "metadata for ${p}..."
-if curl -fs --get \
-  --data project="${projects[$p]}" \
-  "${endpoint}/command/core/get-project-metadata" \
-  | "${jq}" "{ id: ${projects[$p]} } + ."
-then
-  : #log "printed metadata of ${p} (${projects[$p]})"
-else
-  error "getting metadata of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ------------------------------ GET ROW COUNT ------------------------------- #
-
-# get total number of rows
-p="csv file example"
-echo "total number of rows in ${p}..."
-if curl -fs --get \
-  --data project="${projects[$p]}" \
-  --data limit=0 \
-  "${endpoint}/command/core/get-rows" \
-  | tr "," "\n" | grep total | cut -d ":" -f 2
-then
-  : #log "printed row count of ${p} (${projects[$p]})"
-else
-  error "getting row count of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ------------------------------- GET COLUMNS -------------------------------- #
-
-# get column names from project model (requires jq)
-p="csv file example"
-echo "column names of ${p}..."
-if curl -fs --get \
-  --data project="${projects[$p]}" \
-  "${endpoint}/command/core/get-models" \
-  | "${jq}" -r '.columnModel | .columns[] | .name'
-then
-  : #log "printed column names of ${p} (${projects[$p]})"
-else
-  error "getting column names of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# -------------------------- GET OPERATIONS HISTORY -------------------------- #
-
-# get operations history and reshape json to make it applicable (requires jq)
-p="csv file example"
-f="${workspace}/${p}_history.json"
-echo "history of operations for ${p}..."
-if curl -fs --get \
-  --data project="${projects[$p]}" \
-  "${endpoint}/command/core/get-operations" \
-  | "${jq}" '[ .entries[] | .operation ]' \
-  > "${f}"
-then
-  log "saved ops history of ${p} (${projects[$p]}) to ${f}"
-else
-  error "getting ops history of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ---------------------------- GET IMPORT HISTORY ---------------------------- #
-
-# get project metadata and filter import options history (requires jq)
-p="csv file example"
-echo "history of import for ${p}..."
-if curl -fs --get \
-  --data project="${projects[$p]}" \
-  "${endpoint}/command/core/get-project-metadata" \
-  | "${jq}" ".importOptionMetadata[0]"
-then
-  : #log "printed import history of ${p} (${projects[$p]})"
-else
-  error "getting import history of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ------------------------------ DELETE PROJECT ------------------------------ #
-
-# delete a project (rarely needed for batch processing)
-p="yet another csv example"
-echo "delete project ${p}..."
-if curl -fs \
-  --data project="${projects[$p]}" \
-  "${endpoint}/command/core/delete-project$(refine_csrf)" > /dev/null
-then
-  log "deleted ${p} (${projects[$p]})"
-else
-  error "deletion of ${p} (${projects[$p]}) failed!"
-fi
-echo
-
-# ================================== FINISH ================================== #
-
-checkpoint "Finish"
-echo
-
-# stop OpenRefine server
-refine_stop
-echo
-
-# calculate run time based on checkpoints
-checkpoint_stats
-echo
-
-# word count on all files in workspace
-count_output
--- a/minimal.sh
+++ b/minimal.sh
@ -0,0 +1,40 @@
+#!/bin/bash
+# bash-refine v1.1.0: minimal.sh, Felix Lohmeier, 2020-07-10
+# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
+# license: MIT License https://choosealicense.com/licenses/mit/
+
+# =============================== ENVIRONMENT ================================ #
+
+cd "${BASH_SOURCE%/*}/" || exit 1
+source bash-refine.sh
+init
+
+# ================================= STARTUP ================================== #
+
+checkpoint "Startup"; echo
+refine_start; echo
+
+# ================================== IMPORT ================================== #
+
+checkpoint "Import"; echo
+
+#                 <-- insert snippet from templates.sh here -->
+
+# ================================ TRANSFORM ================================= #
+
+checkpoint "Transform"; echo
+
+#                 <-- insert snippet from templates.sh here -->
+
+# ================================== EXPORT ================================== #
+
+checkpoint "Export"; echo
+
+#                 <-- insert snippet from templates.sh here -->
+
+# ================================== FINISH ================================== #
+
+checkpoint "Finish"; echo
+refine_stop; echo
+checkpoint_stats; echo
+count_output
--- a/templates.sh
+++ b/templates.sh
@ -0,0 +1,546 @@
+#!/bin/bash
+# bash-refine v1.1.0: templates.sh, Felix Lohmeier, 2020-07-10
+# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
+# license: MIT License https://choosealicense.com/licenses/mit/
+
+# TODO: example for setting metadata
+# TODO: example for engine config (facets)
+
+# ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
+
+# The following code shows several options for import, transform and export
+# use the templates to write your own scripts or execute this file for a demo
+
+# =============================== ENVIRONMENT ================================ #
+
+# make script executable from another directory
+cd "${BASH_SOURCE%/*}/" || exit 1
+
+# source the main script
+source bash-refine.sh
+
+### override default config?
+#endpoint="http://localhost:3333"
+#memory="1400M" # increase to available RAM
+#date="$(date +%Y%m%d_%H%M%S)"
+#workspace="output/${date}"
+#logfile="${workspace}/${date}.log"
+#csrf=true # set to false for OpenRefine < 3.3
+#jq="jq" # path to executable
+#openrefine="openrefine/refine" # path to executable
+
+# check requirements, set trap, create workspace and tee to logfile
+init
+
+# ================================= STARTUP ================================== #
+
+checkpoint "Startup"; echo
+
+# start OpenRefine server
+refine_start; echo
+
+# ============================= MOCKUP TEST DATA ============================= #
+
+mkdir -p input
+
+cat << "DATA" > "input/example1.csv"
+a,b,c
+1,2,3
+0,0,0
+$,\,'
+DATA
+
+cat << "DATA" > "input/example2.tsv"
+a	b	c
+'	\	$
+0	0	0
+3	2	1
+DATA
+
+cat << "DATA" > "input/example-operations-history.json"
+[
+  {
+    "op": "core/column-addition",
+    "engineConfig": {
+      "mode": "row-based"
+    },
+    "newColumnName": "apply-from-file",
+    "columnInsertIndex": 2,
+    "baseColumnName": "b",
+    "expression": "grel:value.replace('2','TEST')",
+    "onError": "set-to-blank"
+  }
+]
+DATA
+
+# ================================== IMPORT ================================== #
+
+checkpoint "Import"; echo
+
+# declare input
+projects["from heredoc"]=""
+projects["csv file example"]="input/example1.csv"
+projects["tsv file example"]="input/example2.tsv"
+projects["another csv example"]="input/example1.csv"
+projects["yet another csv example"]="input/example1.csv"
+
+# --------------------------- IMPORT FROM HEREDOC ---------------------------- #
+
+# quoted heredoc ("DATA") will not be expanded by bash (no escaping needed)
+# project id will be stored in as ${projects[csv file example]}
+p="from heredoc"
+f="" # optional filename, will be stored in OpenRefine project metadata
+echo "import heredoc..."
+if curl -fs --write-out "%{redirect_url}\n" \
+  --form project-file="@-$(if [[ -n $f ]]; then echo ";filename=${f}"; fi)" \
+  --form project-name="${p}" \
+  --form format="text/line-based/*sv" \
+  --form options='{
+                    "encoding": "UTF-8",
+                    "separator": " "
+                  }' \
+  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
+  > "${workspace}/${p}.id" \
+  << "DATA"
+a b c
+1 2 3
+0 0 0
+$ \ '
+DATA
+then
+  log "imported heredoc as ${p}"
+else
+  error "import of ${p} failed!"
+fi
+refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
+echo
+
+# ---------------------------- IMPORT FROM FILE ------------------------------ #
+
+# project id will be stored in ${projects[tsv file example]}
+p="tsv file example"
+echo "import file ${projects[$p]} ..."
+if curl -fs --write-out "%{redirect_url}\n" \
+  --form project-file="@${projects[$p]}" \
+  --form project-name="${p}" \
+  --form format="text/line-based/*sv" \
+  --form options='{
+                    "encoding": "UTF-8",
+                    "separator": "\t"
+                  }' \
+  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
+  > "${workspace}/${p}.id"
+then
+  log "imported ${projects[$p]} as ${p}"
+else
+  error "import of ${projects[$p]} failed!"
+fi
+refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
+echo
+
+# -------------------- IMPORT MULTIPLE FILES (PARALLEL) ---------------------- #
+
+# project ids will be stored in ${projects[another csv example]} etc.
+ps=( "csv file example" "another csv example" "yet another csv example" )
+echo "import files" \
+  "$(for p in "${ps[@]}"; do printf "%s" "${projects[$p]} "; done)..."
+for p in "${ps[@]}"; do
+  (if curl -fs --write-out "%{redirect_url}\n" \
+    --form project-file="@${projects[$p]}" \
+    --form project-name="${p}" \
+    --form format="line-based" \
+    --form options='{
+                    "encoding": "UTF-8",
+                    "separator": ","
+                    }' \
+    "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
+    > "${workspace}/${p}.id"
+  then
+    log "imported ${projects[$p]} as ${p}"
+  else
+    error "import of ${projects[$p]} failed!"
+  fi) &
+  monitor "${p}"
+done
+monitoring
+for p in "${ps[@]}"; do
+  refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
+done
+echo
+
+# ================================ TRANSFORM ================================= #
+
+checkpoint "Transform"; echo
+
+# ------------------------ APPLY OPERATIONS FROM FILE ------------------------ #
+
+p="csv file example"
+f="input/example-operations-history.json"
+echo "apply ${f} to ${p}..."
+if curl -fs \
+  --data project="${projects[$p]}" \
+  --data-urlencode operations@"${f}" \
+  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
+then
+  log "transformed ${p} (${projects[$p]})"
+else
+  error "transform ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ---------------------- APPLY OPERATIONS FROM HEREDOC ----------------------- #
+
+# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
+p="csv file example"
+echo "add column apply-from-heredoc to ${p}..."
+if curl -fs \
+  --data project="${projects[$p]}" \
+  --data-urlencode "operations@-" \
+  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
+  << "JSON"
+[
+  {
+    "op": "core/column-addition",
+    "engineConfig": {
+      "mode": "row-based"
+    },
+    "newColumnName": "apply-from-heredoc",
+    "columnInsertIndex": 2,
+    "baseColumnName": "b",
+    "expression": "grel:value.replace('2','TEST')",
+    "onError": "set-to-blank"
+  }
+]
+JSON
+then
+  log "transformed ${p} (${projects[$p]})"
+else
+  error "transform ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ---------------- APPLY OPERATIONS FROM HEREDOC AND VARIABLES --------------- #
+
+# unquoted heredocs with variable and multi-line expression (requires jq)
+# \ must be used to quote the characters \, $, and `.
+p="csv file example"
+replace='TEST'
+column="apply with variables"
+echo "add column ${column} to ${p}..."
+read -r -d '' expression << EXPRESSION
+grel:value.replace(
+  '2',
+  '${replace}'
+)
+EXPRESSION
+if curl -fs \
+  --data project="${projects[$p]}" \
+  --data-urlencode "operations@-" \
+  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
+  << JSON
+[
+  {
+    "op": "core/column-addition",
+    "engineConfig": {
+      "mode": "row-based"
+    },
+    "newColumnName": "${column}",
+    "columnInsertIndex": 2,
+    "baseColumnName": "b",
+    "expression": $(echo "${expression}" | ${jq} -s -R '.'),
+    "onError": "set-to-blank"
+  }
+]
+JSON
+then
+  log "transformed ${p} (${projects[$p]})"
+else
+  error "transform ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ------ APPLY OPERATIONS FROM HEREDOC TO MULTIPLE PROJECTS (PARALLEL)  ------ #
+
+# quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
+ps=( "another csv example" "yet another csv example" )
+echo "add column apply-from-heredoc to" "${ps[@]}" "..."
+for p in "${ps[@]}"; do
+  (if curl -fs \
+    --data project="${projects[$p]}" \
+    --data-urlencode "operations@-" \
+    "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
+    << "JSON"
+  [
+    {
+      "op": "core/column-addition",
+      "engineConfig": {
+        "mode": "row-based"
+      },
+      "newColumnName": "apply-from-heredoc",
+      "columnInsertIndex": 2,
+      "baseColumnName": "b",
+      "expression": "grel:value.replace('2','TEST')",
+      "onError": "set-to-blank"
+    }
+  ]
+JSON
+  then
+    log "transformed ${p} (${projects[$p]})"
+  else
+    error "transform ${p} (${projects[$p]}) failed!"
+  fi) &
+  monitor "${p}"
+done
+monitoring
+echo
+
+# ------------- APPLY MULTIPLE OPERATIONS GENERATED FROM HEREDOC ------------- #
+
+# unquoted heredoc (JSON) with variables and multiplied (requires jq)
+# \ must be used to quote the characters \, $, and `.
+p="csv file example"
+columns=( "apply-from-file" "apply-from-heredoc" )
+echo "delete columns" "${columns[@]}" "in ${p}..."
+for column in "${columns[@]}"; do
+  cat << JSON >> "${workspace}/${p}.tmp"
+[
+  {
+    "op": "core/column-removal",
+    "columnName": "${column}"
+  }
+]
+JSON
+done
+if "${jq}" -s add "${workspace}/${p}.tmp" | curl -fs \
+  --data project="${projects[$p]}" \
+  --data-urlencode operations@- \
+  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
+then
+  log "transformed ${p} (${projects[$p]})"
+  rm "${workspace}/${p}.tmp"
+else
+  error "transform ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ================================== EXPORT ================================== #
+
+checkpoint "Export"; echo
+
+# ----------------------------- EXPORT TO STDOUT ----------------------------- #
+
+p="csv file example"
+format="tsv"
+echo "export ${p} in ${format} format..."
+if curl -fs \
+  --data project="${projects[$p]}" \
+  --data format="tsv" \
+  --data engine='{"facets":[],"mode":"row-based"}' \
+  "${endpoint}/command/core/export-rows"
+then
+  log "exported ${p} (${projects[$p]})"
+else
+  error "export of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ------------------------------ EXPORT TO FILE ------------------------------ #
+
+p="csv file example"
+format="csv"
+echo "export ${p} to ${format} file..."
+if curl -fs \
+  --data project="${projects[$p]}" \
+  --data format="${format}" \
+  --data engine='{"facets":[],"mode":"row-based"}' \
+  "${endpoint}/command/core/export-rows" \
+  > "${workspace}/${p}.${format}"
+then
+  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
+else
+  error "export of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ------------------------- TEMPLATING EXPORT TO FILE ------------------------ #
+
+p="csv file example"
+format="json"
+echo "export ${p} to ${format} file using template..."
+IFS= read -r -d '' template << "TEMPLATE"
+  {
+    "a": {{cells['a'].value.jsonize()}},
+    "b": {{cells['b'].value.jsonize()}},
+    "c": {{cells['c'].value.jsonize()}}
+  }
+TEMPLATE
+if echo "${template}" | head -c -2 | curl -fs \
+  --data project="${projects[$p]}" \
+  --data format="template" \
+  --data prefix="[
+" \
+  --data suffix="
+]" \
+  --data separator=",
+" \
+  --data engine='{"facets":[],"mode":"row-based"}' \
+  --data-urlencode template@- \
+  "${endpoint}/command/core/export-rows" \
+  > "${workspace}/${p}.${format}"
+then
+  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
+else
+  error "export of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ------------------- EXPORT TO MULTIPLE FILES (PARALLEL) -------------------- #
+
+ps=( "another csv example" "yet another csv example" )
+format="tsv"
+echo "export" "${ps[@]}" "to ${format} files..."
+for p in "${ps[@]}"; do
+  (if curl -fs \
+    --data project="${projects[$p]}" \
+    --data format="${format}" \
+    --data engine='{"facets":[],"mode":"row-based"}' \
+    "${endpoint}/command/core/export-rows" \
+    > "${workspace}/${p}.${format}"
+  then
+    log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
+  else
+    error "export of ${p} (${projects[$p]}) failed!"
+  fi) &
+  monitor "${p}"
+done
+monitoring
+echo
+
+# ================================ UTILITIES ================================= #
+
+checkpoint "Utilities"; echo
+
+# ------------------------------ LIST PROJECTS ------------------------------- #
+
+# get all project metadata and reshape json to print a list (requires jq)
+echo "list projects..."
+if curl -fs --get \
+  "${endpoint}/command/core/get-all-project-metadata" \
+  | "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
+then
+  : #log "printed list of projects"
+else
+  error "getting list of projects failed!"
+fi
+echo
+
+# ------------------------------- GET METADATA ------------------------------- #
+
+# get project metadata and reshape json to include project id (requires jq)
+p="csv file example"
+echo "metadata for ${p}..."
+if curl -fs --get \
+  --data project="${projects[$p]}" \
+  "${endpoint}/command/core/get-project-metadata" \
+  | "${jq}" "{ id: ${projects[$p]} } + ."
+then
+  : #log "printed metadata of ${p} (${projects[$p]})"
+else
+  error "getting metadata of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ------------------------------ GET ROW COUNT ------------------------------- #
+
+# get total number of rows
+p="csv file example"
+echo "total number of rows in ${p}..."
+if curl -fs --get \
+  --data project="${projects[$p]}" \
+  --data limit=0 \
+  "${endpoint}/command/core/get-rows" \
+  | tr "," "\n" | grep total | cut -d ":" -f 2
+then
+  : #log "printed row count of ${p} (${projects[$p]})"
+else
+  error "getting row count of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ------------------------------- GET COLUMNS -------------------------------- #
+
+# get column names from project model (requires jq)
+p="csv file example"
+echo "column names of ${p}..."
+if curl -fs --get \
+  --data project="${projects[$p]}" \
+  "${endpoint}/command/core/get-models" \
+  | "${jq}" -r '.columnModel | .columns[] | .name'
+then
+  : #log "printed column names of ${p} (${projects[$p]})"
+else
+  error "getting column names of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# -------------------------- GET OPERATIONS HISTORY -------------------------- #
+
+# get operations history and reshape json to make it applicable (requires jq)
+p="csv file example"
+f="${workspace}/${p}_history.json"
+echo "history of operations for ${p}..."
+if curl -fs --get \
+  --data project="${projects[$p]}" \
+  "${endpoint}/command/core/get-operations" \
+  | "${jq}" '[ .entries[] | .operation ]' \
+  > "${f}"
+then
+  log "saved ops history of ${p} (${projects[$p]}) to ${f}"
+else
+  error "getting ops history of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ---------------------------- GET IMPORT HISTORY ---------------------------- #
+
+# get project metadata and filter import options history (requires jq)
+p="csv file example"
+echo "history of import for ${p}..."
+if curl -fs --get \
+  --data project="${projects[$p]}" \
+  "${endpoint}/command/core/get-project-metadata" \
+  | "${jq}" ".importOptionMetadata[0]"
+then
+  : #log "printed import history of ${p} (${projects[$p]})"
+else
+  error "getting import history of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ------------------------------ DELETE PROJECT ------------------------------ #
+
+# delete a project (rarely needed for batch processing)
+p="yet another csv example"
+echo "delete project ${p}..."
+if curl -fs \
+  --data project="${projects[$p]}" \
+  "${endpoint}/command/core/delete-project$(refine_csrf)" > /dev/null
+then
+  log "deleted ${p} (${projects[$p]})"
+else
+  error "deletion of ${p} (${projects[$p]}) failed!"
+fi
+echo
+
+# ================================== FINISH ================================== #
+
+checkpoint "Finish"; echo
+
+# stop OpenRefine server
+refine_stop; echo
+
+# calculate run time based on checkpoints
+checkpoint_stats; echo
+
+# word count on all files in workspace
+count_output