Refactoring mit task runner

2025-05-25 00:01:02 +02:00 · 2020-08-01 02:04:39 +02:00 · 2020-08-01 02:04:39 +02:00 · 8005060d10
commit 8005060d10
parent c612b1c67c
9 changed files with 270 additions and 138 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,5 +1,5 @@
-input/*
-output/*
-log/*
-openrefine/
-jq
+input
+lib
+log
+output
+.task
--- a/Taskfile.yml
+++ b/Taskfile.yml
@ -0,0 +1,85 @@
+# https://taskfile.dev
+
+version: '3'
+
+output: 'group'
+
+vars:
+  DATE:
+    sh: date +%Y%m%d_%H%M%S
+
+env:
+  REFINE_MEMORY: 8g
+  REFINE_ENDPOINT: http://localhost:3334
+
+tasks:
+  default:
+    desc: Workflow
+    deps: [bibliotheca, mkdir]
+    cmds:
+      - tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
+    sources:
+      - output/02-bibliotheca-main/bibliotheca.csv
+    generates:
+      - output/03-ba-sachsen/ba-sachsen.pic
+    env:
+      REFINE_WORKDIR: output/03-ba-sachsen
+      REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
+
+  glauchau:
+    desc: Glauchau
+    deps: [mkdir]
+    cmds:
+      - tasks/01-bibliotheca-pre.sh "input/glauchau.imp"
+    sources:
+      - input/glauchau.imp
+    generates:
+      - output/01-bibliotheca-pre/glauchau.tsv
+    env:
+      REFINE_MEMORY: 6G
+      REFINE_ENDPOINT: http://localhost:3334
+      REFINE_WORKDIR: output/01-bibliotheca-pre
+      REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
+
+  plauen:
+    desc: Plauen
+    deps: [mkdir]
+    cmds:
+      - mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
+      - tasks/01-bibliotheca-pre.sh "input/plauen.imp"
+    sources:
+      - input/plauen.imp
+    generates:
+      - output/01-bibliotheca-pre/plauen.tsv
+    env:
+      REFINE_MEMORY: 4G
+      REFINE_ENDPOINT: http://localhost:3335
+      REFINE_WORKDIR: output/01-bibliotheca-pre
+      REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
+
+  bibliotheca:
+    desc: Hauptverarbeitung
+    deps: [glauchau, plauen, mkdir]
+    cmds:
+      - tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
+    sources:
+      - output/01-bibliotheca-pre/*.tsv
+    generates:
+      - output/02-bibliotheca-main/bibliotheca.csv
+    env:
+      REFINE_WORKDIR: output/02-bibliotheca-main
+      REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
+
+  mkdir:
+    desc: Ordner erstellen
+    cmds:
+      - mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
+      - mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
+      - mkdir -p output/03-ba-sachsen log/03-ba-sachsen
+    status:
+      - test -d output/01-bibliotheca-pre
+      - test -d log/01-bibliotheca-pre
+      - test -d output/02-bibliotheca-main
+      - test -d log/02-bibliotheca-main
+      - test -d output/03-ba-sachsen
+      - test -d log/03-ba-sachsen
--- a/bash-refine.sh
+++ b/bash-refine.sh
@ -1,5 +1,5 @@
 #!/bin/bash
-# bash-refine v1.1.1: bash-refine.sh, Felix Lohmeier, 2020-07-22
+# bash-refine v1.3.2: bash-refine.sh, Felix Lohmeier, 2020-08-01
 # https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
 # license: MIT License https://choosealicense.com/licenses/mit/

@ -7,14 +7,30 @@

 # ================================== CONFIG ================================== #

-endpoint="http://localhost:3333"
-memory="1400M" # increase to available RAM
+endpoint="${REFINE_ENDPOINT:-http://localhost:3333}"
+memory="${REFINE_MEMORY:-1400M}"
+csrf="${REFINE_CSRF:-true}"
 date="$(date +%Y%m%d_%H%M%S)"
-workspace="output/${date}"
-logfile="${workspace}/${date}.log"
-csrf=true # set to false for OpenRefine < 3.3
-jq="jq" # path to executable
-openrefine="openrefine/refine" # path to executable
+if [[ -n "$(readlink -e "${REFINE_WORKDIR}")" ]]; then
+  workdir="$(readlink -e "${REFINE_WORKDIR}")"
+else
+  workdir="$(readlink -m "${BASH_SOURCE%/*}/output/${date}")"
+fi
+if [[ -n "$(readlink -f "${REFINE_LOGFILE}")" ]]; then
+  logfile="$(readlink -f "${REFINE_LOGFILE}")"
+else
+  logfile="$(readlink -m "${BASH_SOURCE%/*}/log/${date}.log")"
+fi
+if [[ -n "$(readlink -e "${REFINE_JQ}")" ]]; then
+  jq="$(readlink -e "${REFINE_JQ}")"
+else
+  jq="$(readlink -m "${BASH_SOURCE%/*}/lib/jq")"
+fi
+if [[ -n "$(readlink -e "${REFINE_REFINE}")" ]]; then
+  refine="$(readlink -e "${REFINE_REFINE}")"
+else
+  refine="$(readlink -m "${BASH_SOURCE%/*}/lib/openrefine/refine")"
+fi

 declare -A checkpoints # associative array for stats
 declare -A pids # associative array for monitoring background jobs
@ -37,28 +53,29 @@ function requirements {
  # download jq and OpenRefine if necessary
  if [[ -z "$(readlink -e "${jq}")" ]]; then
    echo "Download jq..."
+    mkdir -p "$(dirname "${jq}")"
    # jq 1.4 has much faster startup time than 1.5 and 1.6
    curl -L --output "${jq}" \
      "https://github.com/stedolan/jq/releases/download/jq-1.4/jq-linux-x86_64"
    chmod +x "${jq}"; echo
  fi
-  if [[ -z "$(readlink -e "${openrefine}")" ]]; then
+  if [[ -z "$(readlink -e "${refine}")" ]]; then
    echo "Download OpenRefine..."
-    mkdir -p "$(dirname "${openrefine}")"
+    mkdir -p "$(dirname "${refine}")"
    curl -L --output openrefine.tar.gz \
      "https://github.com/OpenRefine/OpenRefine/releases/download/3.3/openrefine-linux-3.3.tar.gz"
-    echo "Install OpenRefine in subdirectory $(dirname "${openrefine}")..."
-    tar -xzf openrefine.tar.gz -C "$(dirname "${openrefine}")" --strip 1 --totals
+    echo "Install OpenRefine in subdirectory $(dirname "${refine}")..."
+    tar -xzf openrefine.tar.gz -C "$(dirname "${refine}")" --strip 1 --totals
    rm -f openrefine.tar.gz
    # do not try to open OpenRefine in browser
    sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' \
-      "$(dirname "${openrefine}")"/refine.ini
+      "$(dirname "${refine}")"/refine.ini
    # set min java heap space to allocated memory
    sed -i 's/-Xms$REFINE_MIN_MEMORY/-Xms$REFINE_MEMORY/' \
-      "$(dirname "${openrefine}")"/refine
+      "$(dirname "${refine}")"/refine
    # set autosave period from 5 minutes to 25 hours
    sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1500/' \
-      "$(dirname "${openrefine}")"/refine.ini  
+      "$(dirname "${refine}")"/refine.ini
    echo
  fi
 }
@ -66,10 +83,10 @@ function requirements {
 # ============================== OPENREFINE API ============================== #

 function refine_start {
-  echo "start OpenRefine server..."  
+  echo "start OpenRefine server..."
  local dir
-  dir="$(readlink -f "${workspace}")"
-  ${openrefine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
+  dir="$(readlink -e "${workdir}")"
+  ${refine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
  pid_server=${!}
  timeout 30s bash -c "until curl -s \"${endpoint}\" \
    | cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
@ -85,7 +102,7 @@ function refine_kill {
  # kill OpenRefine immediately; SIGKILL (kill -9) prevents saving projects
  { kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null
  # delete temporary OpenRefine projects
-  (cd "${workspace}" && rm -rf ./*.project* && rm -f workspace.json)
+  (cd "${workdir}" && rm -rf ./*.project* && rm -f workspace.json)
 }

 function refine_check {
@ -208,9 +225,9 @@ function checkpoint_stats {
 }

 function count_output {
-  # word count on all files in workspace
-  echo "files (number of lines / size in bytes) in ${workspace}..."
-  (cd "${workspace}" && wc -c -l ./*)
+  # word count on all files in workdir
+  echo "files (number of lines / size in bytes) in ${workdir}..."
+  (cd "${workdir}" && wc -c -l ./*)
 }

 function init {
@ -218,6 +235,6 @@ function init {
  requirements
  # set trap, create directories and tee to log file
  trap 'error "script interrupted!"' HUP INT QUIT TERM
-  mkdir -p "${workspace}"
+  mkdir -p "${workdir}" "$(dirname "${logfile}")"
  exec &> >(tee -i -a "${logfile}")
 }
--- a/config/alephino-01.sh
+++ b/config/alephino-01.sh
@ -1,22 +0,0 @@
-# Alephino Vorverarbeitung
-# - Exporte der fünf Standorte importieren
-# - in Tabellenformat umwandeln
-# - als eine Datei exportieren
-
-
-
-
-
-
-
-# Alephino
-for i in leipzig riesa; do
-    echo "===== ${i} ====="
-    date
-    openrefine/openrefine-client -P ${port} --create input/${i}-titel.txt --format=fixed-width --columnWidths=5 --columnWidths=1000000 --storeBlankRows=false --encoding=UTF-8 --projectName=${i}-titel
-    openrefine/openrefine-client -P ${port} --create input/${i}-exemplare.txt --format=fixed-width --columnWidths=5 --columnWidths=1000000 --storeBlankRows=false --encoding=UTF-8 --projectName=${i}-exemplare
-    openrefine/openrefine-client -P ${port} --apply config/alephino-01-titel.json ${i}-titel
-    openrefine/openrefine-client -P ${port} --apply config/alephino-01-exemplare-${i}.json ${i}-exemplare
-    openrefine/openrefine-client -P ${port} --export --output${workspace}/${date}/${i}.tsv ${i}-exemplare
-    echo ""
-done
--- a/config/alephino-02.sh
+++ b/config/alephino-02.sh
@ -1,13 +0,0 @@
-# Alephino
-# - ...
-
-
-
-
-
-
-echo "===== Alephino zusammenführen ====="
-date
-zip -j${workspace}/${date}/alephino.zip${workspace}/${date}/riesa.tsv${workspace}/${date}/leipzig.tsv
-openrefine/openrefine-client -P ${port} --create${workspace}/${date}/alephino.zip --format=tsv --encoding=UTF-8 --includeFileSources=true --projectName=alephino
-openrefine/openrefine-client -P ${port} --export --output${workspace}/${date}/alephino.tsv alephino
--- a/main.sh
+++ b/main.sh
@ -1,36 +1,16 @@
 #!/bin/bash
 # Scripte zur Transformation von Bibliotheca und Alephino nach PICA+

-# ================================ ENVIRONMENT =============================== #
+# download task if necessary
+task="$(readlink -m "${BASH_SOURCE%/*}/lib/task")"
+if [[ -z "$(readlink -e "${task}")" ]]; then
+  echo "Download task..."
+  mkdir -p "$(dirname "${task}")"
+  curl -L --output task.tar.gz \
+    "https://github.com/go-task/task/releases/download/v3.0.0-preview4/task_linux_amd64.tar.gz"
+  tar -xzf task.tar.gz -C "$(dirname "${task}")" task --totals
+  rm -f task.tar.gz
+fi

-# make script executable from another directory
-cd "${BASH_SOURCE%/*}/" || exit 1
-
-# source the main script
-source bash-refine.sh
-
-# override default config
-memory="8G"
-endpoint="http://localhost:3334"
-
-# check requirements, set trap, create workspace and tee to logfile
-init
-
-# ================================= WORKFLOW ================================= #
-
-checkpoint "Bibliotheca Vorverarbeitung"; echo
-source config/bibliotheca-01.sh
-
-checkpoint "Bibliotheca Hauptverarbeitung"; echo
-source config/bibliotheca-02.sh
-
-checkpoint "PICA+ generieren"; echo
-source config/ba-sachsen.sh
-
-# ================================= STATS ================================= #
-
-# calculate run time based on checkpoints
-checkpoint_stats; echo
-
-# word count on all files in workspace
-count_output
+# execute default task (cf. Taskfile.yml)
+"${task}"
--- a/tasks/01-bibliotheca-pre.sh
+++ b/tasks/01-bibliotheca-pre.sh
@ -1,28 +1,39 @@
+#!/bin/bash
 # Bibliotheca Vorverarbeitung
-# - Exporte der fünf Standorte importieren
+# - Export von einer der Bibliotheken importieren
 # - in Tabellenformat umwandeln
-# - als eine Datei exportieren
+# - als TSV exportieren

-# ================================== CONFIG ================================== #
+# =============================== ENVIRONMENT ================================ #

-projects["bautzen"]="input/bautzen.imp"
-projects["breitenbrunn"]="input/breitenbrunn.imp"
-projects["dresden"]="input/dresden.imp"
-projects["glauchau"]="input/glauchau.imp"
-projects["plauen"]="input/plauen.imp"
+# source the main script
+source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1

-# ================================ BEGIN LOOP ================================ #
+# read input
+if [[ $1 ]]; then
+  p="$(basename "$1" .imp)"
+  projects[$p]="$(readlink -e "$1")"
+else
+  echo 1>&2 "Please provide path to input file"; exit 1
+fi

-for p in "${!projects[@]}"; do
+# make script executable from another directory
+cd "${BASH_SOURCE%/*}/" || exit 1

-checkpoint "${p}"; echo
+# check requirements, set trap, create workdir and tee to logfile
+init

 # ================================= STARTUP ================================== #

+checkpoint "Startup"; echo
+
+# start OpenRefine server
 refine_start; echo

 # ================================== IMPORT ================================== #

+checkpoint "Import"; echo
+
 # Line-based text files
 # Character encoding: ISO-8859-1
 # Store blank rows deaktivieren
@ -39,17 +50,19 @@ if curl -fs --write-out "%{redirect_url}\n" \
                   "ignoreLines": 1
                  }' \
  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
-  > "${workspace}/${p}.id"
+  > "${workdir}/${p}.id"
 then
  log "imported ${projects[$p]} as ${p}"
 else
  error "import of ${projects[$p]} failed!"
 fi
-refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
+refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
 echo

 # ================================ TRANSFORM ================================= #

+checkpoint "Transform"; echo
+
 # -------------------- 01 Mehrzeilige Inhalte extrahieren -------------------- #

 # - Column 1 > Text filter > regular expression aktivieren > ^\* > invert
@ -485,6 +498,8 @@ echo

 # ================================== EXPORT ================================== #

+checkpoint "Export"; echo
+
 format="tsv"
 echo "export ${p} to ${format} file..."
 if curl -fs \
@ -492,9 +507,9 @@ if curl -fs \
  --data format="${format}" \
  --data engine='{"facets":[],"mode":"row-based"}' \
  "${endpoint}/command/core/export-rows" \
-  > "${workspace}/${p}.${format}"
+  > "${workdir}/${p}.${format}"
 then
-  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
+  log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
@ -502,8 +517,13 @@ echo

 # ================================== FINISH ================================== #

+checkpoint "Finish"; echo
+
+# stop OpenRefine server
 refine_stop; echo

-# ================================= END LOOP ================================= #
+# calculate run time based on checkpoints
+checkpoint_stats; echo

-done
+# word count on all files in workdir
+count_output
--- a/tasks/02-bibliotheca-main.sh
+++ b/tasks/02-bibliotheca-main.sh
@ -1,28 +1,43 @@
+#!/bin/bash
 # Bibliotheca Hauptverarbeitung
 # - Datenbereinigungen
 # - Mapping auf PICA3
-# - PICA3-Spalten als CSV (via Template) exportieren
+# - PICA3 als CSV (via Template) exportieren

-# ================================== CONFIG ================================== #
+# =============================== ENVIRONMENT ================================ #

-# TSV-Exporte aller Einzelprojekte in ein Zip-Archiv packen
-zip -j "${workspace}/bibliotheca.zip" \
-  "${workspace}/bautzen.tsv" \
-  "${workspace}/breitenbrunn.tsv" \
-  "${workspace}/dresden.tsv" \
-  "${workspace}/glauchau.tsv" \
-  "${workspace}/plauen.tsv"
+# source the main script
+source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1

-projects["bibliotheca"]="${workspace}/bibliotheca.zip"
+# read input
+if [[ $1 ]]; then
+  inputdir="$(readlink -e "$1")"
+else
+  echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
+fi
+
+# make script executable from another directory
+cd "${BASH_SOURCE%/*}/" || exit 1
+
+# check requirements, set trap, create workdir and tee to logfile
+init

 # ================================= STARTUP ================================== #

+checkpoint "Startup"; echo
+
+# start OpenRefine server
 refine_start; echo

 # ================================== IMPORT ================================== #

-# Neues Projekt erstellen aus Zip-Archiv
+checkpoint "Import"; echo

+# TSV-Exporte aller Einzelprojekte in ein Zip-Archiv packen
+zip -j "${workdir}/bibliotheca.zip" "${inputdir}"/*.tsv
+projects["bibliotheca"]="${workdir}/bibliotheca.zip"
+
+# Neues Projekt erstellen aus Zip-Archiv
 p="bibliotheca"
 echo "import file" "${projects[$p]}" "..."
 if curl -fs --write-out "%{redirect_url}\n" \
@ -35,17 +50,19 @@ if curl -fs --write-out "%{redirect_url}\n" \
                   "separator": "\t"
                  }' \
  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
-  > "${workspace}/${p}.id"
+  > "${workdir}/${p}.id"
 then
  log "imported ${projects[$p]} as ${p}"
 else
  error "import of ${projects[$p]} failed!"
 fi
-refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
+refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
 echo

 # ================================ TRANSFORM ================================= #

+checkpoint "Transform"; echo
+
 # --------------------------- 01 Spalten sortieren --------------------------- #

 # damit Records-Mode erhalten bleibt
@ -552,6 +569,8 @@ echo

 # ================================== EXPORT ================================== #

+checkpoint "Export"; echo
+
 # Export der PICA3-Spalten als CSV
 format="csv"
 echo "export ${p} to ${format} file using template..."
@ -626,9 +645,9 @@ if echo "${template}" | head -c -2 | curl -fs \
  --data engine='{"facets":[],"mode":"row-based"}' \
  --data-urlencode template@- \
  "${endpoint}/command/core/export-rows" \
-  > "${workspace}/${p}.${format}"
+  > "${workdir}/${p}.${format}"
 then
-  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
+  log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
@ -636,4 +655,13 @@ echo

 # ================================== FINISH ================================== #

+checkpoint "Finish"; echo
+
+# stop OpenRefine server
 refine_stop; echo
+
+# calculate run time based on checkpoints
+checkpoint_stats; echo
+
+# word count on all files in workdir
+count_output
--- a/tasks/03-ba-sachsen.sh
+++ b/tasks/03-ba-sachsen.sh
@ -1,21 +1,45 @@
-# Generierung PICA+ aus CSV-Exporten
+#!/bin/bash
+# Generierung PICA+
+# - PPNs anreichern und Exemplare clustern
+# - als PICA+ exportieren

-# ================================== CONFIG ================================== #
+# =============================== ENVIRONMENT ================================ #

-# TODO: Zusammenführung mit Alephino
-zip -j "${workspace}/ba-sachsen.zip" \
-  "${workspace}/bibliotheca.csv"
+# source the main script
+source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1

-projects["ba-sachsen"]="${workspace}/ba-sachsen.zip"
+# read input
+if [[ $1 ]]; then
+  inputdir1="$(readlink -e "$1")"
+else
+  echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
+fi
+if [[ $2 ]]; then
+  inputdir2="$(readlink -e "$2")"
+fi
+
+# make script executable from another directory
+cd "${BASH_SOURCE%/*}/" || exit 1
+
+# check requirements, set trap, create workdir and tee to logfile
+init

 # ================================= STARTUP ================================== #

+checkpoint "Startup"; echo
+
+# start OpenRefine server
 refine_start; echo

 # ================================== IMPORT ================================== #

-# Neues Projekt erstellen aus Zip-Archiv
+checkpoint "Import"; echo

+# TODO: Zusammenführung mit Alephino
+zip -j "${workdir}/ba-sachsen.zip" "${inputdir1}"/*.csv
+projects["ba-sachsen"]="${workdir}/ba-sachsen.zip"
+
+# Neues Projekt erstellen aus Zip-Archiv
 p="ba-sachsen"
 echo "import file" "${projects[$p]}" "..."
 if curl -fs --write-out "%{redirect_url}\n" \
@ -28,17 +52,19 @@ if curl -fs --write-out "%{redirect_url}\n" \
                   "separator": ","
                  }' \
  "${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
-  > "${workspace}/${p}.id"
+  > "${workdir}/${p}.id"
 then
  log "imported ${projects[$p]} as ${p}"
 else
  error "import of ${projects[$p]} failed!"
 fi
-refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
+refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
 echo

 # ================================ TRANSFORM ================================= #

+checkpoint "Transform"; echo
+
 # ------------------------ 01 PPN anreichern über ISBN ----------------------- #

 # TODO: Anreicherung für 0110
@ -377,6 +403,8 @@ echo

 # ================================== EXPORT ================================== #

+checkpoint "Export"; echo
+
 # Export in PICA+
 format="pic"
 echo "export ${p} to pica+ file using template..."
@ -405,9 +433,9 @@ if echo "${template}" | head -c -2 | curl -fs \
  --data engine='{"facets":[],"mode":"row-based"}' \
  --data-urlencode template@- \
  "${endpoint}/command/core/export-rows" \
-  > "${workspace}/${p}.${format}"
+  > "${workdir}/${p}.${format}"
 then
-  log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
+  log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
 else
  error "export of ${p} (${projects[$p]}) failed!"
 fi
@ -415,4 +443,13 @@ echo

 # ================================== FINISH ================================== #

+checkpoint "Finish"; echo
+
+# stop OpenRefine server
 refine_stop; echo
+
+# calculate run time based on checkpoints
+checkpoint_stats; echo
+
+# word count on all files in workdir
+count_output