Felix Lohmeier 2 years ago
parent
commit
9bad6aeb17
  1. 9
      .gitignore
  2. 73
      README.md
  3. 301
      Taskfile.yml
  4. 146
      alephino/Taskfile.yml
  5. 14
      alephino/config/main/7100a.json
  6. 14
      alephino/config/main/file.json
  7. 27
      alephino/config/main/sortieren.json
  8. 15
      alephino/config/pre/exemplare-id-separieren.json
  9. 65
      alephino/config/pre/exemplare-loeschen.json
  10. 18
      alephino/config/pre/exemplare-mehrfachbelegungen.json
  11. 15
      alephino/config/pre/exemplare-prefix.json
  12. 80
      alephino/config/pre/exemplare-sortieren.json
  13. 9
      alephino/config/pre/exemplare-transponieren.json
  14. 822
      alephino/config/pre/titel-anreichern.json
  15. 22
      alephino/config/pre/titel-einzelfaelle.json
  16. 15
      alephino/config/pre/titel-id-separieren.json
  17. 148
      alephino/config/pre/titel-loeschen.json
  18. 18
      alephino/config/pre/titel-mehrfachbelegungen.json
  19. 15
      alephino/config/pre/titel-prefix.json
  20. 80
      alephino/config/pre/titel-sortieren.json
  21. 9
      alephino/config/pre/titel-transponieren.json
  22. 38
      barcodes/Taskfile.yml
  23. 240
      bash-refine.sh
  24. 204
      bibliotheca/Taskfile.yml
  25. 719
      bibliotheca/config/main/0100-0110.json
  26. 2407
      bibliotheca/config/main/0500-1140.json
  27. 158
      bibliotheca/config/main/0500-lax.json
  28. 34
      bibliotheca/config/main/0500-ueber.json
  29. 1978
      bibliotheca/config/main/0501-0502-0503-0999.json
  30. 139
      bibliotheca/config/main/1100a-1100n.json
  31. 89
      bibliotheca/config/main/1131.json
  32. 77
      bibliotheca/config/main/1500.json
  33. 14
      bibliotheca/config/main/2000.json
  34. 14
      bibliotheca/config/main/2199.json
  35. 65
      bibliotheca/config/main/4000a.json
  36. 14
      bibliotheca/config/main/4000d.json
  37. 14
      bibliotheca/config/main/4020a.json
  38. 14
      bibliotheca/config/main/4030n.json
  39. 14
      bibliotheca/config/main/4030p.json
  40. 14
      bibliotheca/config/main/4060a.json
  41. 14
      bibliotheca/config/main/7100a.json
  42. 878
      bibliotheca/config/main/7100d.json
  43. 1391
      bibliotheca/config/main/7100f.json
  44. 14
      bibliotheca/config/main/7100j.json
  45. 14
      bibliotheca/config/main/8011.json
  46. 36
      bibliotheca/config/main/8100.json
  47. 14
      bibliotheca/config/main/8200.json
  48. 34
      bibliotheca/config/main/8515.json
  49. 89
      bibliotheca/config/main/8600.json
  50. 68
      bibliotheca/config/main/abschluss.json
  51. 34
      bibliotheca/config/main/e0xx.json
  52. 34
      bibliotheca/config/main/e0xxb.json
  53. 29
      bibliotheca/config/main/ebooks.json
  54. 14
      bibliotheca/config/main/file.json
  55. 17
      bibliotheca/config/main/sortieren.json
  56. 87
      bibliotheca/config/main/template.txt
  57. 165
      bibliotheca/config/main/zeitschriften.json
  58. 99
      bibliotheca/config/pre/acq.json
  59. 51
      bibliotheca/config/pre/feld-wert-separieren.json
  60. 85
      bibliotheca/config/pre/feldname-prefix.json
  61. 29
      bibliotheca/config/pre/leerzeilen.json
  62. 99
      bibliotheca/config/pre/makuliert.json
  63. 16
      bibliotheca/config/pre/mehrfachbelegungen.json
  64. 46
      bibliotheca/config/pre/mehrzeiliges-extrahieren.json
  65. 8
      bibliotheca/config/pre/mehrzeiliges-zusammen.json
  66. 29
      bibliotheca/config/pre/reduzieren.json
  67. 8
      bibliotheca/config/pre/transponieren.json
  68. 26
      main.sh
  69. 78
      pica+/Taskfile.yml
  70. 138
      pica+/config/clustern.json
  71. 35
      pica+/config/dedup.json
  72. 292
      pica+/config/ppn.json
  73. 33
      pica+/config/template.txt
  74. 868
      tasks/01-alephino-pre.sh
  75. 767
      tasks/01-bibliotheca-pre.sh
  76. 198
      tasks/02-alephino-main.sh
  77. 9373
      tasks/02-bibliotheca-main.sh
  78. 688
      tasks/03-ba-sachsen.sh

9
.gitignore vendored

@ -1,5 +1,6 @@
input
lib
log
output
.task
.openrefine
*/input
*/output
*/tmp
*/log/*

73
README.md

@ -1,29 +1,68 @@
# Transformation von Bibliotheca und Alephino nach PICA+ für die Bibliotheken der Berufsakademie Sachsen
## Nutzung
## Vorbereitung
1. Exporte bereitstellen mit folgenden Dateinamen:
* input/bautzen.imp
* input/breitenbrunn.imp
* input/dresden.imp
* input/glauchau.imp
* input/leipzig-exemplare.txt
* input/leipzig-titel.txt
* input/plauen.imp
* input/riesa-exemplare.txt
* input/riesa-titel.txt
2. Installation und initiale Datenverarbeitung: `./main.sh`
3. Weitere Datenverarbeitungen:
* `lib/task` um den gesamten Workflow zu starten
* `lib/task --list` für eine Liste der verfügbaren Tasks
* alephino/input/leipzig-exemplare.txt
* alephino/input/leipzig-titel.txt
* alephino/input/riesa-exemplare.txt
* alephino/input/riesa-titel.txt
* bibliotheca/input/bautzen.imp
* bibliotheca/input/breitenbrunn.imp
* bibliotheca/input/dresden.imp
* bibliotheca/input/glauchau.imp
* bibliotheca/input/plauen.imp
2. Installation Task 3.2.2
a) RPM-based (Fedora, CentOS, SLES, etc.)
```sh
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.rpm
sudo dnf install ./task_linux_amd64.rpm && rm task_linux_amd64.rpm
```
b) DEB-based (Debian, Ubuntu etc.)
```sh
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.deb
sudo apt install ./task_linux_amd64.deb && rm task_linux_amd64.deb
```
3. Installation OpenRefine 3.4.1 und openrefine-client 0.3.10
```
task install
```
## Nutzung
Datenverarbeitung sequentiell
```
task default
```
Datenverarbeitung (teil)parallelisiert (benötigt bis zu 16 GB RAM)
```
task pica+:main
```
Analyse dubletter Barcodes
```
task barcodes:main
```
## Systemvoraussetzungen
* Linux mit Bash, cURL und JAVA (getestet auf Fedora 32)
* 7 GB freien Arbeitsspeicher
* GNU/Linux (getestet auf Fedora 32)
* JAVA 8+ (für OpenReifne)
* 8 GB freien Arbeitsspeicher
## Verwendete Tools
* [OpenRefine](https://openrefine.org/)
* [bash-refine](https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d)
* [openrefine-client](https://github.com/opencultureconsulting/openrefine-client)
* [Task](https://github.com/go-task/task)

301
Taskfile.yml

@ -1,234 +1,99 @@
# https://taskfile.dev
# https://github.com/opencultureconsulting/openrefine-task-runner
version: '3'
output: 'group'
includes:
alephino: alephino
barcodes: barcodes
bibliotheca: bibliotheca
pica+: pica+
vars:
DATE: '{{ now | date "20060102_150405"}}'
silent: true
output: prefixed
env:
REFINE_MEMORY: 8g
REFINE_ENDPOINT: http://localhost:3334
OPENREFINE:
sh: readlink -m .openrefine/refine
CLIENT:
sh: readlink -m .openrefine/client
tasks:
default:
desc: Generierung PICA+
# deps: [bibliotheca, alephino]
desc: Datenverarbeitung sequentiell
cmds:
- task: alephino
- task: bibliotheca
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
sources:
- tasks/03-ba-sachsen.sh
# - output/02-alephino-main/alephino.csv
- output/02-bibliotheca-main/bibliotheca.csv
generates:
- output/03-ba-sachsen/ba-sachsen.pic
- output/03-ba-sachsen/ba-sachsen.openrefine.tar.gz
env:
REFINE_WORKDIR: output/03-ba-sachsen
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
- task: alephino:main
- task: bibliotheca:main
- task: pica+:refine
alephino:
desc: Alephino Hauptverarbeitung
# deps: [leipzig, riesa]
install:
desc: (re)install OpenRefine and openrefine-client into subdirectory .openrefine
cmds:
- task: leipzig
- task: riesa
- tasks/02-alephino-main.sh "output/01-alephino-pre"
sources:
- tasks/02-alephino-main.sh
- output/01-alephino-pre/*.tsv
generates:
# - output/02-alephino-main/alephino.csv
- output/02-alephino-main/alephino.openrefine.tar.gz
env:
REFINE_ENDPOINT: http://localhost:3334
REFINE_WORKDIR: output/02-alephino-main
REFINE_LOGFILE: log/02-alephino-main/{{.DATE}}.log
bibliotheca:
desc: Bibliotheca Hauptverarbeitung
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
cmds:
- task: bautzen
- task: breitenbrunn
- task: dresden
- task: glauchau
# - task: plauen
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
sources:
- tasks/01-bibliotheca-pre.sh
- tasks/02-bibliotheca-main.sh
- output/01-bibliotheca-pre/*.tsv
generates:
- output/02-bibliotheca-main/bibliotheca.csv
- output/02-bibliotheca-main/bibliotheca.openrefine.tar.gz
env:
REFINE_ENDPOINT: http://localhost:3335
REFINE_WORKDIR: output/02-bibliotheca-main
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
bautzen:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/bautzen.tsv
vars:
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "6G"}}'
REFINE_ENDPOINT: http://localhost:3334
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_bautzen.log
breitenbrunn:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/breitenbrunn.tsv
vars:
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
REFINE_ENDPOINT: http://localhost:3335
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
dresden:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/dresden.tsv
vars:
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3336
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
leipzig:
desc: Alephino Vorverarbeitung
- | # delete existing install and recreate folder
rm -rf .openrefine
mkdir -p .openrefine
- > # download OpenRefine archive
wget --no-verbose -O openrefine.tar.gz
https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz
- | # install OpenRefine into subdirectory .openrefine
tar -xzf openrefine.tar.gz -C .openrefine --strip 1
rm openrefine.tar.gz
- | # optimize OpenRefine for batch processing
sed -i 's/cd `dirname $0`/cd "$(dirname "$0")"/' ".openrefine/refine" # fix path issue in OpenRefine startup file
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' ".openrefine/refine.ini" # do not try to open OpenRefine in browser
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' ".openrefine/refine.ini" # set autosave period from 5 minutes to 25 hours
- > # download openrefine-client into subdirectory .openrefine
wget --no-verbose -O .openrefine/client
https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux
- chmod +x .openrefine/client # make client executable
start:
dir: ./{{.DIR}}
cmds:
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
sources:
- tasks/01-alephino-pre.sh
- '{{.TITEL}}'
- '{{.EXEMPLARE}}'
generates:
- output/01-alephino-pre/leipzig.tsv
vars:
TITEL: '{{.TITEL | default "input/leipzig-titel.txt"}}'
EXEMPLARE: '{{.EXEMPLARE | default "input/leipzig-exemplare.txt"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3337
REFINE_WORKDIR: output/01-alephino-pre
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_leipzig.log
glauchau:
desc: Bibliotheca Vorverarbeitung
- | # verify that OpenRefine is installed
if [ ! -f "$OPENREFINE" ]; then
echo 1>&2 "OpenRefine missing; try task install"; exit 1
fi
- | # delete temporary files and log file of previous run
rm -rf ./*.project* workspace.json
rm -rf "{{.PROJECT}}.log"
- > # launch OpenRefine with specific data directory and redirect its output to a log file
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
-d ../{{.DIR}}
>> "{{.PROJECT}}.log" 2>&1 &
- | # wait until OpenRefine API is available
timeout 30s bash -c "until
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
do sleep 1
done"
stop:
dir: ./{{.DIR}}
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/glauchau.tsv
vars:
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
REFINE_ENDPOINT: http://localhost:3338
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
plauen:
desc: Bibliotheca Vorverarbeitung
- | # shut down OpenRefine gracefully
PID=$(lsof -t -i:{{.PORT}})
kill $PID
while ps -p $PID > /dev/null; do sleep 1; done
- > # archive the OpenRefine project
tar cfz
"{{.PROJECT}}.openrefine.tar.gz"
-C $(grep -l "{{.PROJECT}}" *.project/metadata.json | cut -d '/' -f 1)
.
- rm -rf ./*.project* workspace.json # delete temporary files
kill:
dir: ./{{.DIR}}
cmds:
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
sources:
- tasks/01-bibliotheca-pre.sh
- input/plauen.imp
generates:
- output/01-bibliotheca-pre/plauen.tsv
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
REFINE_ENDPOINT: http://localhost:3339
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
riesa:
desc: Alephino Vorverarbeitung
cmds:
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
sources:
- tasks/01-alephino-pre.sh
- '{{.TITEL}}'
- '{{.EXEMPLARE}}'
generates:
- output/01-alephino-pre/riesa.tsv
vars:
TITEL: '{{.TITEL | default "input/riesa-titel.txt"}}'
EXEMPLARE: '{{.EXEMPLARE | default "input/riesa-exemplare.txt"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3340
REFINE_WORKDIR: output/01-alephino-pre
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_riesa.log
clean:
desc: Alle Daten löschen (reset auf Ausgangszustand)
cmds:
- rm -r lib log output
mkdir:
desc: Ordner erstellen
cmds:
- mkdir -p output/01-alephino-pre log/01-alephino-pre
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
- mkdir -p output/02-alephino-main log/02-alephino-main
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
barcodes:
desc: Ermitteln von Dubletten
deps: [default]
- | # shut down OpenRefine immediately to save time and disk space
PID=$(lsof -t -i:{{.PORT}})
kill -9 $PID
while ps -p $PID > /dev/null; do sleep 1; done
- rm -rf ./*.project* workspace.json # delete temporary files
check:
desc: check OpenRefine log for any warnings and exit on error
dir: ./{{.DIR}}
cmds:
- mkdir -p output/barcodes
# Bibliotheca Barcodes extrahieren
- for f in input/*.imp; do grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).raw"; done
# Alephino Barcodes extrahieren
- for f in input/*-exemplare.txt; do grep '^120 ' "$f" | cut -c 6- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%-*}).raw"; done
# Extrahierte Barcodes gegen generiertes PICA+ abgleichen
- for f in output/barcodes/*.raw; do comm -12 "$f" <(sort output/03-ba-sachsen/barcodes.txt) > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).filtered"; done
# Plauen, Leipzig, Riesa vorübergehend nicht filtern
- for f in leipzig riesa plauen; do cp output/barcodes/$f.raw output/barcodes/$f.filtered; done
# Dublette Barcodes Gesamtdubletten ermitteln
- sort output/barcodes/*.filtered | uniq -d > output/barcodes/duplicates
# Dubletten für jeden Teil ermitteln
- (cd output/barcodes && for f in *.filtered ; do grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp; done)
# Ergebnisse in Tabelle zusammenführen
- paste output/barcodes/duplicates output/barcodes/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/barcodes/duplicates.tsv && rm output/barcodes/*.tmp
# Bearbeitungsstand
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l < output/barcodes/duplicates)"'
# sources:
# - input/*
# generates:
# - output/barcodes/duplicates.tsv
- | # find log file(s) and check for "exception" or "error"
if grep -i 'exception\|error' $(find . -name '*.log'); then
echo 1>&2 "log contains warnings!"; exit 1
fi

146
alephino/Taskfile.yml

@ -0,0 +1,146 @@
version: '3'
tasks:
main:
desc: Konvertierung von Alephino nach PICA3/CSV
vars:
DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name
cmds:
- task: refine-pre
vars: {PROJECT: leipzig}
- task: refine-pre
vars: {PROJECT: riesa}
- task: refine-main
refine-pre:
dir: ./{{.DIR}}
label: '{{.TASK}}-{{.PROJECT}}'
vars:
DIR: '{{splitList ":" .TASK | first}}'
PORT: 3335 # assign a different port for each project
RAM: 8192M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
cmds:
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # Import Titel
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/{{.PROJECT}}-titel.txt)"
--format fixed-width
--encoding UTF-8
--columnWidths 5
--skipDataLines 0
--storeBlankRows false
--projectName titel
> {{.LOG}}
- > # Import Exemplare
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/{{.PROJECT}}-exemplare.txt)"
--format fixed-width
--encoding UTF-8
--columnWidths 5
--skipDataLines 0
--storeBlankRows false
--projectName exemplare
> {{.LOG}}
- | # Titel: Korrekturen Einzelfälle
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-einzelfaelle.json > {{.LOG}}
- | # Prefix M bzw. E für Feldnamen
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-prefix.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-prefix.json > {{.LOG}}
- | # Datensätze und Feldnamen sortieren
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-sortieren.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-sortieren.json > {{.LOG}}
- | # Mehrfachbelegungen zusammenführen
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-mehrfachbelegungen.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-mehrfachbelegungen.json > {{.LOG}}
- | # Felder löschen
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-loeschen.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-loeschen.json > {{.LOG}}
- | # Transponieren
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-transponieren.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-transponieren.json > {{.LOG}}
- | # Titel-ID separieren
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-id-separieren.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-id-separieren.json > {{.LOG}}
- | # Titel: Exemplare anreichern
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-anreichern.json > {{.LOG}}
- mkdir -p output
- > # Export
"$CLIENT" -P {{.PORT}} titel
--output "$(readlink -m output/{{.PROJECT}}.tsv)"
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :kill # shut down OpenRefine immediately to save time and disk space
vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}'}
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- Taskfile.yml
- input/{{.PROJECT}}.imp
- config/pre/**
generates:
- output/{{.PROJECT}}.tsv
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
refine-main:
dir: ./{{.DIR}}
vars:
DIR: '{{splitList ":" .TASK | first}}'
PROJECT: alephino
PORT: 3335 # assign a different port for each project
RAM: 8192M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
cmds:
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # Zip-Archiv mit Output der Vorverarbeitung erstellen
zip -j tmp.zip
output/leipzig.tsv
output/riesa.tsv
- > # Import Zip-Archiv
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m tmp.zip)"
--format tsv
--includeFileSources true
--projectName {{.PROJECT}}
> {{.LOG}}
&& rm tmp.zip
- > # Spalten sortieren: Beginnen mit 1. M|001, 2. E|001, 3. File; damit Records-Mode erhalten bleibt
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}}
- > # Bibliothekskürzel aus Import-Dateiname
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
- > # spec_A_E_01: Signatur 7100a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
# - > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
# mkdir -p output &&
# "$CLIENT" -P {{.PORT}} {{.PROJECT}}
# --output "$(readlink -m output/{{.PROJECT}}.csv)"
# --template "$(< config/main/template.txt)"
# --rowSeparator ""
# > {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- Taskfile.yml
- output/*.tsv
- config/main/**
generates:
- log/{{.PROJECT}}.openrefine.tar.gz
# - output/{{.PROJECT}}.csv
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
default: # enable standalone execution (running `task` in project directory)
cmds:
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"

14
alephino/config/main/7100a.json

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|100",
"expression": "grel:value.split('\u001f')[0].slice(1)",
"onError": "set-to-blank",
"newColumnName": "7100a",
"columnInsertIndex": 5
}
]

14
alephino/config/main/file.json

@ -0,0 +1,14 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "File",
"expression": "grel:with([ ['leipzig.tsv','LE'], ['riesa.tsv','RS'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

27
alephino/config/main/sortieren.json

@ -0,0 +1,27 @@
[
{
"op": "core/column-move",
"columnName": "File",
"index": 0
},
{
"op": "core/column-move",
"columnName": "E|001",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|029",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|026f",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|IDN",
"index": 0
}
]

15
alephino/config/pre/exemplare-id-separieren.json

@ -0,0 +1,15 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|BIB",
"expression": "grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')",
"onError": "set-to-blank",
"newColumnName": "titel_id",
"columnInsertIndex": 18,
"description": "Create column titel_id at index 18 based on column E|BIB using expression grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')"
}
]

65
alephino/config/pre/exemplare-loeschen.json

@ -0,0 +1,65 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E|A02",
"l": "E|A02"
}
},
{
"v": {
"v": "E|A86",
"l": "E|A86"
}
},
{
"v": {
"v": "E|SUB",
"l": "E|SUB"
}
},
{
"v": {
"v": "E|FMT",
"l": "E|FMT"
}
},
{
"v": {
"v": "E|CAT",
"l": "E|CAT"
}
},
{
"v": {
"v": "E|027",
"l": "E|027"
}
},
{
"v": {
"v": "E|123",
"l": "E|123"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
}
]

18
alephino/config/pre/exemplare-mehrfachbelegungen.json

@ -0,0 +1,18 @@
[
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"description": "Blank down cells in column Column 1"
},
{
"op": "core/multivalued-cell-join",
"columnName": "Column 2",
"keyColumnName": "Column 1",
"separator": "␟",
"description": "Join multi-valued cells in column Column 2"
}
]

15
alephino/config/pre/exemplare-prefix.json

@ -0,0 +1,15 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:'E|' + value.replace(' ','')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column Column 1 using expression grel:'E|' + value.replace(' ','')"
}
]

80
alephino/config/pre/exemplare-sortieren.json

@ -0,0 +1,80 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E|IDN",
"l": "E|IDN"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "Column 2",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 2,
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
},
{
"op": "core/column-move",
"columnName": "id",
"index": 0,
"description": "Move column id to position 0"
},
{
"op": "core/fill-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id",
"description": "Fill down cells in column id"
},
{
"op": "core/row-reorder",
"mode": "row-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "id",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
},
{
"valueType": "string",
"column": "Column 1",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
},
"description": "Reorder rows"
},
{
"op": "core/column-removal",
"columnName": "id",
"description": "Remove column id"
}
]

9
alephino/config/pre/exemplare-transponieren.json

@ -0,0 +1,9 @@
[
{
"op": "core/key-value-columnize",
"keyColumnName": "Column 1",
"valueColumnName": "Column 2",
"noteColumnName": "",
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
}
]

822
alephino/config/pre/titel-anreichern.json

@ -0,0 +1,822 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|001'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|001",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|001",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|002a'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|002a",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|002a",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|003'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|003",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|003",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|004'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|004",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|004",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|027'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|027",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|027",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|030'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|030",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|030",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|050'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|050",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|050",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|100'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|100",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|100",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|115'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|115",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|115",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|120'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|120",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|120",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|123'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|123",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|123",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A02'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A02",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A02",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A72'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A72",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A72",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A73'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A73",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A73",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A87'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A87",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A87",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A91'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A91",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A91",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A95'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A95",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A95",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|BIB'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|BIB",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|BIB",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|CAT'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|CAT",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|CAT",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|FMT'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|FMT",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|FMT",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|IDN'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|IDN",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|IDN",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|LDR'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|LDR",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|LDR",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|STA'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|STA",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|STA",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|SUB'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|SUB",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|SUB",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|105'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|105",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|105",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|107'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|107",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|107",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A94'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A94",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A94",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|125'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|125",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|125",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|072'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|072",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|072",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A98'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A98",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A98",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",