split up code

2020-07-10 18:28:58 +02:00 · 2020-07-10 18:28:58 +02:00 · 09986428d1
parent 634d61d7a6
commit 09986428d1
4 changed files with 620 additions and 557 deletions
--- a/bash-refine.md
+++ b/bash-refine.md
@ -0,0 +1,28 @@
 ## How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
 tested on Fedora 32 with bash 5.0.17 and curl 7.69.1
 ### Quick start
 1. Clone this gist
 ```
 git clone https://gist.github.com/d76bd27fbc4b8ab6d683822cdf61f81d.git bash-refine
 ```
 2. Execute all supplied examples for a quick demo
 ```
 cd bash-refine
 ./templates.sh
 ```
 ### Build your own workflow
 3. Copy minimal pre-structured script to a new file
 ```
 cp minimal.sh myworkflow.sh
 ```
 4. Use the templates in `templates.sh` to develop your workflow
--- a/bash-refine.sh
+++ b/bash-refine.sh
@ -1,26 +1,17 @@
 #!/bin/bash
-# bash-refine.sh, Felix Lohmeier, v1.0.0, 2020-07-09
+# bash-refine v1.1.0: bash-refine.sh, Felix Lohmeier, 2020-07-10
 # How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
 # https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
 # tested on Fedora 32 with OpenRefine 3.3, bash 5.0.17, curl 7.69.1 and jq 1.4
 # license: MIT License https://choosealicense.com/licenses/mit/
 # TODO: support for macOS
 # TODO: example for setting metadata
 # TODO: example for engine config (facets)
 # make script executable from another directory
 cd "$(dirname "${0}")" || exit 1
 # ================================== CONFIG ================================== #
-port="3333"
+endpoint="http://localhost:3333"
 endpoint="http://localhost:${port}"
 memory="1400M" # increase to available RAM
 date="$(date +%Y%m%d_%H%M%S)"
 workspace="output/${date}"
 logfile="${workspace}/${date}.log"
 csrf=true # set to false for OpenRefine < 3.3
 jq="jq" # path to executable
 openrefine="openrefine/refine" # path to executable
@ -78,7 +69,7 @@ function refine_start() {
  echo "start OpenRefine server..."  
  local dir
  dir="$(readlink -f "${workspace}")"
-  ${openrefine} -v warn -m "${memory}" -p "${port}" -d "${dir}" &
+  ${openrefine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
  pid_server=${!}
  timeout 30s bash -c "until curl -s \"${endpoint}\" \
    | cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
@ -207,7 +198,7 @@ function checkpoint_stats {
  # calculate and print run time for each step
  for i in "${!keys[@]}"; do
    diffsec=$(( values[$((i + 1))] - values[i] ))
-    printf "%36s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
+    printf "%35s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
      "$(date -d @"${values[$i]}")" \
      "($(date -d @${diffsec} -u +%H:%M:%S))"
  done
@ -223,552 +214,10 @@ function count_output {
 }
 function init() {
  # check requirements and download software if necessary
  requirements
  # set trap, create directories and tee to log file
  trap 'error "script interrupted!"' HUP INT QUIT TERM
  mkdir -p "${workspace}"
  exec &> >(tee -a "${logfile}")
 }
 # ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
 # To increase readability, you may prefer to split up the code:
 # - move all code below to a separate script (e.g. one for each workflow)
 # - add the following lines at the beginning of the new file(s)
 #   #!/bin/bash
 #   . bash-refine.sh
 # ================================= STARTUP ================================== #
 checkpoint "Startup"
 echo
 # check requirements and download software if necessary
 requirements
 # override default config?
 #port="3333"
 #endpoint="http://localhost:${port}"
 #memory="1400M"
 #date="$(date +%Y%m%d_%H%M%S)"
 #workspace="output/${date}"
 #logfile="${workspace}/${date}.log"
 # set trap, create directories and tee to log file
 init
 # start OpenRefine server
 refine_start
 echo
 # ============================= MOCKUP TEST DATA ============================= #
 mkdir -p input
 cat << "DATA" > "input/example1.csv"
 a,b,c
 1,2,3
 0,0,0
 $,\,'
 DATA
 cat << "DATA" > "input/example2.tsv"
 a	b	c
 '	\	$
 0	0	0
 3	2	1
 DATA
 cat << "DATA" > "input/example-operations-history.json"
 [
  {
    "op": "core/column-addition",
    "engineConfig": {
      "mode": "row-based"
    },
    "newColumnName": "apply-from-file",
    "columnInsertIndex": 2,
    "baseColumnName": "b",
    "expression": "grel:value.replace('2','TEST')",
    "onError": "set-to-blank"
  }
 ]
 DATA
 # ================================== IMPORT ================================== #
 checkpoint "Import"
 echo
 # declare input
 projects["from heredoc"]=""
 projects["csv file example"]="input/example1.csv"
 projects["tsv file example"]="input/example2.tsv"
 projects["another csv example"]="input/example1.csv"
 projects["yet another csv example"]="input/example1.csv"
 # --------------------------- IMPORT FROM HEREDOC ---------------------------- #
 # quoted heredoc ("DATA") will not be expanded by bash (no escaping needed)
 # project id will be stored in as ${projects[csv file example]}
 p="from heredoc"
 f="" # optional filename, will be stored in OpenRefine project metadata
 echo "import heredoc..."
 if curl -fs --write-out "%{redirect_url}\n" \
  --form project-file="@-$(if [[ -n $f ]]; then echo ";filename=${f}"; fi)" \
  --form project-name="${p}" \
  --form format="text/line-based/*sv" \
  --form options='{
                    "encoding": "UTF-8",
                    "separator": " "
                  }' \
  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
  > "${workspace}/${p}.id" \
  << "DATA"
 a b c
 1 2 3
 0 0 0
 $ \ '
 DATA
 then
  log "imported heredoc as ${p}"
 else
  error "import of ${p} failed!"
 fi
 refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
 echo
 # ---------------------------- IMPORT FROM FILE ------------------------------ #
 # project id will be stored in ${projects[tsv file example]}
 p="tsv file example"
 echo "import file ${projects[$p]} ..."
 if curl -fs --write-out "%{redirect_url}\n" \
  --form project-file="@${projects[$p]}" \
  --form project-name="${p}" \
  --form format="text/line-based/*sv" \
  --form options='{
                    "encoding": "UTF-8",
                    "separator": "\t"
                  }' \
  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
  > "${workspace}/${p}.id"
 then
  log "imported ${projects[$p]} as ${p}"
 else
  error "import of ${projects[$p]} failed!"
 fi
 refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
 echo
 # -------------------- IMPORT MULTIPLE FILES (PARALLEL) ---------------------- #
 # project ids will be stored in ${projects[another csv example]} etc.
 ps=( "csv file example" "another csv example" "yet another csv example" )
 echo "import files" \
  "$(for p in "${ps[@]}"; do printf "%s" "${projects[$p]} "; done)..."
 for p in "${ps[@]}"; do
  (if curl -fs --write-out "%{redirect_url}\n" \
    --form project-file="@${projects[$p]}" \
    --form project-name="${p}" \
    --form format="line-based" \
    --form options='{
                    "encoding": "UTF-8",
                    "separator": ","
                    }' \
    "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
    > "${workspace}/${p}.id"
  then
    log "imported ${projects[$p]} as ${p}"
  else
    error "import of ${projects[$p]} failed!"
  fi) &
  monitor "${p}"
 done
 monitoring
 for p in "${ps[@]}"; do
  refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
 done
 echo
 # ================================ TRANSFORM ================================= #
 checkpoint "Transform"
 echo
 # ------------------------ APPLY OPERATIONS FROM FILE ------------------------ #
 p="csv file example"
 f="input/example-operations-history.json"
 echo "apply ${f} to ${p}..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode operations@"${f}" \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
 then
  log "transformed ${p} (${projects[$p]})"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ---------------------- APPLY OPERATIONS FROM HEREDOC ----------------------- #
 # quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
 p="csv file example"
 echo "add column apply-from-heredoc to ${p}..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode "operations@-" \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
  << "JSON"
 [
  {
    "op": "core/column-addition",
    "engineConfig": {
      "mode": "row-based"
    },
    "newColumnName": "apply-from-heredoc",
    "columnInsertIndex": 2,
    "baseColumnName": "b",
    "expression": "grel:value.replace('2','TEST')",
    "onError": "set-to-blank"
  }
 ]
 JSON
 then
  log "transformed ${p} (${projects[$p]})"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ---------------- APPLY OPERATIONS FROM HEREDOC AND VARIABLES --------------- #
 # unquoted heredocs with variable and multi-line expression (requires jq)
 # \ must be used to quote the characters \, $, and `.
 p="csv file example"
 replace='TEST'
 column="apply with variables"
 echo "add column ${column} to ${p}..."
 read -r -d '' expression << EXPRESSION
 grel:value.replace(
  '2',
  '${replace}'
 )
 EXPRESSION
 if curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode "operations@-" \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
  << JSON
 [
  {
    "op": "core/column-addition",
    "engineConfig": {
      "mode": "row-based"
    },
    "newColumnName": "${column}",
    "columnInsertIndex": 2,
    "baseColumnName": "b",
    "expression": $(echo "${expression}" | ${jq} -s -R '.'),
    "onError": "set-to-blank"
  }
 ]
 JSON
 then
  log "transformed ${p} (${projects[$p]})"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------ APPLY OPERATIONS FROM HEREDOC TO MULTIPLE PROJECTS (PARALLEL)  ------ #
 # quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
 ps=( "another csv example" "yet another csv example" )
 echo "add column apply-from-heredoc to" "${ps[@]}" "..."
 for p in "${ps[@]}"; do
  (if curl -fs \
    --data project="${projects[$p]}" \
    --data-urlencode "operations@-" \
    "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
    << "JSON"
  [
    {
      "op": "core/column-addition",
      "engineConfig": {
        "mode": "row-based"
      },
      "newColumnName": "apply-from-heredoc",
      "columnInsertIndex": 2,
      "baseColumnName": "b",
      "expression": "grel:value.replace('2','TEST')",
      "onError": "set-to-blank"
    }
  ]
 JSON
  then
    log "transformed ${p} (${projects[$p]})"
  else
    error "transform ${p} (${projects[$p]}) failed!"
  fi) &
  monitor "${p}"
 done
 monitoring
 echo
 # ------------- APPLY MULTIPLE OPERATIONS GENERATED FROM HEREDOC ------------- #
 # unquoted heredoc (JSON) with variables and multiplied (requires jq)
 # \ must be used to quote the characters \, $, and `.
 p="csv file example"
 columns=( "apply-from-file" "apply-from-heredoc" )
 echo "delete columns" "${columns[@]}" "in ${p}..."
 for column in "${columns[@]}"; do
  cat << JSON >> "${workspace}/${p}.tmp"
 [
  {
    "op": "core/column-removal",
    "columnName": "${column}"
  }
 ]
 JSON
 done
 if "${jq}" -s add "${workspace}/${p}.tmp" | curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode operations@- \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
 then
  log "transformed ${p} (${projects[$p]})"
  rm "${workspace}/${p}.tmp"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ================================== EXPORT ================================== #
 checkpoint "Export"
 echo
 # ----------------------------- EXPORT TO STDOUT ----------------------------- #
 p="csv file example"
 format="tsv"
 echo "export ${p} in ${format} format..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data format="tsv" \
  --data engine='{"facets":[],"mode":"row-based"}' \
  "${endpoint}/command/core/export-rows"
 then
  log "exported ${p} (${projects[$p]})"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------ EXPORT TO FILE ------------------------------ #
 p="csv file example"
 format="csv"
 echo "export ${p} to ${format} file..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data format="${format}" \
  --data engine='{"facets":[],"mode":"row-based"}' \
  "${endpoint}/command/core/export-rows" \
  > "${workspace}/${p}.${format}"
 then
  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------- TEMPLATING EXPORT TO FILE ------------------------ #
 p="csv file example"
 format="json"
 echo "export ${p} to ${format} file using template..."
 IFS= read -r -d '' template << "TEMPLATE"
  {
    "a": {{cells['a'].value.jsonize()}},
    "b": {{cells['b'].value.jsonize()}},
    "c": {{cells['c'].value.jsonize()}}
  }
 TEMPLATE
 if echo "${template}" | head -c -2 | curl -fs \
  --data project="${projects[$p]}" \
  --data format="template" \
  --data prefix="[
 " \
  --data suffix="
 ]" \
  --data separator=",
 " \
  --data engine='{"facets":[],"mode":"row-based"}' \
  --data-urlencode template@- \
  "${endpoint}/command/core/export-rows" \
  > "${workspace}/${p}.${format}"
 then
  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------- EXPORT TO MULTIPLE FILES (PARALLEL) -------------------- #
 ps=( "another csv example" "yet another csv example" )
 format="tsv"
 echo "export" "${ps[@]}" "to ${format} files..."
 for p in "${ps[@]}"; do
  (if curl -fs \
    --data project="${projects[$p]}" \
    --data format="${format}" \
    --data engine='{"facets":[],"mode":"row-based"}' \
    "${endpoint}/command/core/export-rows" \
    > "${workspace}/${p}.${format}"
  then
    log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
  else
    error "export of ${p} (${projects[$p]}) failed!"
  fi) &
  monitor "${p}"
 done
 monitoring
 echo
 # ================================ UTILITIES ================================= #
 checkpoint "Utilities"
 echo
 # ------------------------------ LIST PROJECTS ------------------------------- #
 # get all project metadata and reshape json to print a list (requires jq)
 echo "list projects..."
 if curl -fs --get \
  "${endpoint}/command/core/get-all-project-metadata" \
  | "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
 then
  : #log "printed list of projects"
 else
  error "getting list of projects failed!"
 fi
 echo
 # ------------------------------- GET METADATA ------------------------------- #
 # get project metadata and reshape json to include project id (requires jq)
 p="csv file example"
 echo "metadata for ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-project-metadata" \
  | "${jq}" "{ id: ${projects[$p]} } + ."
 then
  : #log "printed metadata of ${p} (${projects[$p]})"
 else
  error "getting metadata of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------ GET ROW COUNT ------------------------------- #
 # get total number of rows
 p="csv file example"
 echo "total number of rows in ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  --data limit=0 \
  "${endpoint}/command/core/get-rows" \
  | tr "," "\n" | grep total | cut -d ":" -f 2
 then
  : #log "printed row count of ${p} (${projects[$p]})"
 else
  error "getting row count of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------- GET COLUMNS -------------------------------- #
 # get column names from project model (requires jq)
 p="csv file example"
 echo "column names of ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-models" \
  | "${jq}" -r '.columnModel | .columns[] | .name'
 then
  : #log "printed column names of ${p} (${projects[$p]})"
 else
  error "getting column names of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # -------------------------- GET OPERATIONS HISTORY -------------------------- #
 # get operations history and reshape json to make it applicable (requires jq)
 p="csv file example"
 f="${workspace}/${p}_history.json"
 echo "history of operations for ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-operations" \
  | "${jq}" '[ .entries[] | .operation ]' \
  > "${f}"
 then
  log "saved ops history of ${p} (${projects[$p]}) to ${f}"
 else
  error "getting ops history of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ---------------------------- GET IMPORT HISTORY ---------------------------- #
 # get project metadata and filter import options history (requires jq)
 p="csv file example"
 echo "history of import for ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-project-metadata" \
  | "${jq}" ".importOptionMetadata[0]"
 then
  : #log "printed import history of ${p} (${projects[$p]})"
 else
  error "getting import history of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------ DELETE PROJECT ------------------------------ #
 # delete a project (rarely needed for batch processing)
 p="yet another csv example"
 echo "delete project ${p}..."
 if curl -fs \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/delete-project$(refine_csrf)" > /dev/null
 then
  log "deleted ${p} (${projects[$p]})"
 else
  error "deletion of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ================================== FINISH ================================== #
 checkpoint "Finish"
 echo
 # stop OpenRefine server
 refine_stop
 echo
 # calculate run time based on checkpoints
 checkpoint_stats
 echo
 # word count on all files in workspace
 count_output
--- a/minimal.sh
+++ b/minimal.sh
@ -0,0 +1,40 @@
 #!/bin/bash
 # bash-refine v1.1.0: minimal.sh, Felix Lohmeier, 2020-07-10
 # https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
 # license: MIT License https://choosealicense.com/licenses/mit/
 # =============================== ENVIRONMENT ================================ #
 cd "${BASH_SOURCE%/*}/" || exit 1
 source bash-refine.sh
 init
 # ================================= STARTUP ================================== #
 checkpoint "Startup"; echo
 refine_start; echo
 # ================================== IMPORT ================================== #
 checkpoint "Import"; echo
 #                 <-- insert snippet from templates.sh here -->
 # ================================ TRANSFORM ================================= #
 checkpoint "Transform"; echo
 #                 <-- insert snippet from templates.sh here -->
 # ================================== EXPORT ================================== #
 checkpoint "Export"; echo
 #                 <-- insert snippet from templates.sh here -->
 # ================================== FINISH ================================== #
 checkpoint "Finish"; echo
 refine_stop; echo
 checkpoint_stats; echo
 count_output
--- a/templates.sh
+++ b/templates.sh
@ -0,0 +1,546 @@
 #!/bin/bash
 # bash-refine v1.1.0: templates.sh, Felix Lohmeier, 2020-07-10
 # https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
 # license: MIT License https://choosealicense.com/licenses/mit/
 # TODO: example for setting metadata
 # TODO: example for engine config (facets)
 # ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
 # The following code shows several options for import, transform and export
 # use the templates to write your own scripts or execute this file for a demo
 # =============================== ENVIRONMENT ================================ #
 # make script executable from another directory
 cd "${BASH_SOURCE%/*}/" || exit 1
 # source the main script
 source bash-refine.sh
 ### override default config?
 #endpoint="http://localhost:3333"
 #memory="1400M" # increase to available RAM
 #date="$(date +%Y%m%d_%H%M%S)"
 #workspace="output/${date}"
 #logfile="${workspace}/${date}.log"
 #csrf=true # set to false for OpenRefine < 3.3
 #jq="jq" # path to executable
 #openrefine="openrefine/refine" # path to executable
 # check requirements, set trap, create workspace and tee to logfile
 init
 # ================================= STARTUP ================================== #
 checkpoint "Startup"; echo
 # start OpenRefine server
 refine_start; echo
 # ============================= MOCKUP TEST DATA ============================= #
 mkdir -p input
 cat << "DATA" > "input/example1.csv"
 a,b,c
 1,2,3
 0,0,0
 $,\,'
 DATA
 cat << "DATA" > "input/example2.tsv"
 a	b	c
 '	\	$
 0	0	0
 3	2	1
 DATA
 cat << "DATA" > "input/example-operations-history.json"
 [
  {
    "op": "core/column-addition",
    "engineConfig": {
      "mode": "row-based"
    },
    "newColumnName": "apply-from-file",
    "columnInsertIndex": 2,
    "baseColumnName": "b",
    "expression": "grel:value.replace('2','TEST')",
    "onError": "set-to-blank"
  }
 ]
 DATA
 # ================================== IMPORT ================================== #
 checkpoint "Import"; echo
 # declare input
 projects["from heredoc"]=""
 projects["csv file example"]="input/example1.csv"
 projects["tsv file example"]="input/example2.tsv"
 projects["another csv example"]="input/example1.csv"
 projects["yet another csv example"]="input/example1.csv"
 # --------------------------- IMPORT FROM HEREDOC ---------------------------- #
 # quoted heredoc ("DATA") will not be expanded by bash (no escaping needed)
 # project id will be stored in as ${projects[csv file example]}
 p="from heredoc"
 f="" # optional filename, will be stored in OpenRefine project metadata
 echo "import heredoc..."
 if curl -fs --write-out "%{redirect_url}\n" \
  --form project-file="@-$(if [[ -n $f ]]; then echo ";filename=${f}"; fi)" \
  --form project-name="${p}" \
  --form format="text/line-based/*sv" \
  --form options='{
                    "encoding": "UTF-8",
                    "separator": " "
                  }' \
  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
  > "${workspace}/${p}.id" \
  << "DATA"
 a b c
 1 2 3
 0 0 0
 $ \ '
 DATA
 then
  log "imported heredoc as ${p}"
 else
  error "import of ${p} failed!"
 fi
 refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
 echo
 # ---------------------------- IMPORT FROM FILE ------------------------------ #
 # project id will be stored in ${projects[tsv file example]}
 p="tsv file example"
 echo "import file ${projects[$p]} ..."
 if curl -fs --write-out "%{redirect_url}\n" \
  --form project-file="@${projects[$p]}" \
  --form project-name="${p}" \
  --form format="text/line-based/*sv" \
  --form options='{
                    "encoding": "UTF-8",
                    "separator": "\t"
                  }' \
  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
  > "${workspace}/${p}.id"
 then
  log "imported ${projects[$p]} as ${p}"
 else
  error "import of ${projects[$p]} failed!"
 fi
 refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
 echo
 # -------------------- IMPORT MULTIPLE FILES (PARALLEL) ---------------------- #
 # project ids will be stored in ${projects[another csv example]} etc.
 ps=( "csv file example" "another csv example" "yet another csv example" )
 echo "import files" \
  "$(for p in "${ps[@]}"; do printf "%s" "${projects[$p]} "; done)..."
 for p in "${ps[@]}"; do
  (if curl -fs --write-out "%{redirect_url}\n" \
    --form project-file="@${projects[$p]}" \
    --form project-name="${p}" \
    --form format="line-based" \
    --form options='{
                    "encoding": "UTF-8",
                    "separator": ","
                    }' \
    "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
    > "${workspace}/${p}.id"
  then
    log "imported ${projects[$p]} as ${p}"
  else
    error "import of ${projects[$p]} failed!"
  fi) &
  monitor "${p}"
 done
 monitoring
 for p in "${ps[@]}"; do
  refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
 done
 echo
 # ================================ TRANSFORM ================================= #
 checkpoint "Transform"; echo
 # ------------------------ APPLY OPERATIONS FROM FILE ------------------------ #
 p="csv file example"
 f="input/example-operations-history.json"
 echo "apply ${f} to ${p}..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode operations@"${f}" \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
 then
  log "transformed ${p} (${projects[$p]})"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ---------------------- APPLY OPERATIONS FROM HEREDOC ----------------------- #
 # quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
 p="csv file example"
 echo "add column apply-from-heredoc to ${p}..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode "operations@-" \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
  << "JSON"
 [
  {
    "op": "core/column-addition",
    "engineConfig": {
      "mode": "row-based"
    },
    "newColumnName": "apply-from-heredoc",
    "columnInsertIndex": 2,
    "baseColumnName": "b",
    "expression": "grel:value.replace('2','TEST')",
    "onError": "set-to-blank"
  }
 ]
 JSON
 then
  log "transformed ${p} (${projects[$p]})"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ---------------- APPLY OPERATIONS FROM HEREDOC AND VARIABLES --------------- #
 # unquoted heredocs with variable and multi-line expression (requires jq)
 # \ must be used to quote the characters \, $, and `.
 p="csv file example"
 replace='TEST'
 column="apply with variables"
 echo "add column ${column} to ${p}..."
 read -r -d '' expression << EXPRESSION
 grel:value.replace(
  '2',
  '${replace}'
 )
 EXPRESSION
 if curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode "operations@-" \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
  << JSON
 [
  {
    "op": "core/column-addition",
    "engineConfig": {
      "mode": "row-based"
    },
    "newColumnName": "${column}",
    "columnInsertIndex": 2,
    "baseColumnName": "b",
    "expression": $(echo "${expression}" | ${jq} -s -R '.'),
    "onError": "set-to-blank"
  }
 ]
 JSON
 then
  log "transformed ${p} (${projects[$p]})"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------ APPLY OPERATIONS FROM HEREDOC TO MULTIPLE PROJECTS (PARALLEL)  ------ #
 # quoted heredoc ("JSON") will not be expanded by bash (no escaping needed)
 ps=( "another csv example" "yet another csv example" )
 echo "add column apply-from-heredoc to" "${ps[@]}" "..."
 for p in "${ps[@]}"; do
  (if curl -fs \
    --data project="${projects[$p]}" \
    --data-urlencode "operations@-" \
    "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
    << "JSON"
  [
    {
      "op": "core/column-addition",
      "engineConfig": {
        "mode": "row-based"
      },
      "newColumnName": "apply-from-heredoc",
      "columnInsertIndex": 2,
      "baseColumnName": "b",
      "expression": "grel:value.replace('2','TEST')",
      "onError": "set-to-blank"
    }
  ]
 JSON
  then
    log "transformed ${p} (${projects[$p]})"
  else
    error "transform ${p} (${projects[$p]}) failed!"
  fi) &
  monitor "${p}"
 done
 monitoring
 echo
 # ------------- APPLY MULTIPLE OPERATIONS GENERATED FROM HEREDOC ------------- #
 # unquoted heredoc (JSON) with variables and multiplied (requires jq)
 # \ must be used to quote the characters \, $, and `.
 p="csv file example"
 columns=( "apply-from-file" "apply-from-heredoc" )
 echo "delete columns" "${columns[@]}" "in ${p}..."
 for column in "${columns[@]}"; do
  cat << JSON >> "${workspace}/${p}.tmp"
 [
  {
    "op": "core/column-removal",
    "columnName": "${column}"
  }
 ]
 JSON
 done
 if "${jq}" -s add "${workspace}/${p}.tmp" | curl -fs \
  --data project="${projects[$p]}" \
  --data-urlencode operations@- \
  "${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
 then
  log "transformed ${p} (${projects[$p]})"
  rm "${workspace}/${p}.tmp"
 else
  error "transform ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ================================== EXPORT ================================== #
 checkpoint "Export"; echo
 # ----------------------------- EXPORT TO STDOUT ----------------------------- #
 p="csv file example"
 format="tsv"
 echo "export ${p} in ${format} format..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data format="tsv" \
  --data engine='{"facets":[],"mode":"row-based"}' \
  "${endpoint}/command/core/export-rows"
 then
  log "exported ${p} (${projects[$p]})"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------ EXPORT TO FILE ------------------------------ #
 p="csv file example"
 format="csv"
 echo "export ${p} to ${format} file..."
 if curl -fs \
  --data project="${projects[$p]}" \
  --data format="${format}" \
  --data engine='{"facets":[],"mode":"row-based"}' \
  "${endpoint}/command/core/export-rows" \
  > "${workspace}/${p}.${format}"
 then
  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------- TEMPLATING EXPORT TO FILE ------------------------ #
 p="csv file example"
 format="json"
 echo "export ${p} to ${format} file using template..."
 IFS= read -r -d '' template << "TEMPLATE"
  {
    "a": {{cells['a'].value.jsonize()}},
    "b": {{cells['b'].value.jsonize()}},
    "c": {{cells['c'].value.jsonize()}}
  }
 TEMPLATE
 if echo "${template}" | head -c -2 | curl -fs \
  --data project="${projects[$p]}" \
  --data format="template" \
  --data prefix="[
 " \
  --data suffix="
 ]" \
  --data separator=",
 " \
  --data engine='{"facets":[],"mode":"row-based"}' \
  --data-urlencode template@- \
  "${endpoint}/command/core/export-rows" \
  > "${workspace}/${p}.${format}"
 then
  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------- EXPORT TO MULTIPLE FILES (PARALLEL) -------------------- #
 ps=( "another csv example" "yet another csv example" )
 format="tsv"
 echo "export" "${ps[@]}" "to ${format} files..."
 for p in "${ps[@]}"; do
  (if curl -fs \
    --data project="${projects[$p]}" \
    --data format="${format}" \
    --data engine='{"facets":[],"mode":"row-based"}' \
    "${endpoint}/command/core/export-rows" \
    > "${workspace}/${p}.${format}"
  then
    log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
  else
    error "export of ${p} (${projects[$p]}) failed!"
  fi) &
  monitor "${p}"
 done
 monitoring
 echo
 # ================================ UTILITIES ================================= #
 checkpoint "Utilities"; echo
 # ------------------------------ LIST PROJECTS ------------------------------- #
 # get all project metadata and reshape json to print a list (requires jq)
 echo "list projects..."
 if curl -fs --get \
  "${endpoint}/command/core/get-all-project-metadata" \
  | "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
 then
  : #log "printed list of projects"
 else
  error "getting list of projects failed!"
 fi
 echo
 # ------------------------------- GET METADATA ------------------------------- #
 # get project metadata and reshape json to include project id (requires jq)
 p="csv file example"
 echo "metadata for ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-project-metadata" \
  | "${jq}" "{ id: ${projects[$p]} } + ."
 then
  : #log "printed metadata of ${p} (${projects[$p]})"
 else
  error "getting metadata of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------ GET ROW COUNT ------------------------------- #
 # get total number of rows
 p="csv file example"
 echo "total number of rows in ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  --data limit=0 \
  "${endpoint}/command/core/get-rows" \
  | tr "," "\n" | grep total | cut -d ":" -f 2
 then
  : #log "printed row count of ${p} (${projects[$p]})"
 else
  error "getting row count of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------- GET COLUMNS -------------------------------- #
 # get column names from project model (requires jq)
 p="csv file example"
 echo "column names of ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-models" \
  | "${jq}" -r '.columnModel | .columns[] | .name'
 then
  : #log "printed column names of ${p} (${projects[$p]})"
 else
  error "getting column names of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # -------------------------- GET OPERATIONS HISTORY -------------------------- #
 # get operations history and reshape json to make it applicable (requires jq)
 p="csv file example"
 f="${workspace}/${p}_history.json"
 echo "history of operations for ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-operations" \
  | "${jq}" '[ .entries[] | .operation ]' \
  > "${f}"
 then
  log "saved ops history of ${p} (${projects[$p]}) to ${f}"
 else
  error "getting ops history of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ---------------------------- GET IMPORT HISTORY ---------------------------- #
 # get project metadata and filter import options history (requires jq)
 p="csv file example"
 echo "history of import for ${p}..."
 if curl -fs --get \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/get-project-metadata" \
  | "${jq}" ".importOptionMetadata[0]"
 then
  : #log "printed import history of ${p} (${projects[$p]})"
 else
  error "getting import history of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ------------------------------ DELETE PROJECT ------------------------------ #
 # delete a project (rarely needed for batch processing)
 p="yet another csv example"
 echo "delete project ${p}..."
 if curl -fs \
  --data project="${projects[$p]}" \
  "${endpoint}/command/core/delete-project$(refine_csrf)" > /dev/null
 then
  log "deleted ${p} (${projects[$p]})"
 else
  error "deletion of ${p} (${projects[$p]}) failed!"
 fi
 echo
 # ================================== FINISH ================================== #
 checkpoint "Finish"; echo
 # stop OpenRefine server
 refine_stop; echo
 # calculate run time based on checkpoints
 checkpoint_stats; echo
 # word count on all files in workspace
 count_output