refactoring mit openrefine-task runner
https://github.com/opencultureconsulting/openrefine-task-runner
This commit is contained in:
parent
b188267640
commit
9bad6aeb17
|
@ -1,5 +1,6 @@
|
|||
input
|
||||
lib
|
||||
log
|
||||
output
|
||||
.task
|
||||
.openrefine
|
||||
*/input
|
||||
*/output
|
||||
*/tmp
|
||||
*/log/*
|
||||
|
|
73
README.md
73
README.md
|
@ -1,29 +1,68 @@
|
|||
# Transformation von Bibliotheca und Alephino nach PICA+ für die Bibliotheken der Berufsakademie Sachsen
|
||||
|
||||
## Vorbereitung
|
||||
|
||||
1. Exporte bereitstellen mit folgenden Dateinamen:
|
||||
* alephino/input/leipzig-exemplare.txt
|
||||
* alephino/input/leipzig-titel.txt
|
||||
* alephino/input/riesa-exemplare.txt
|
||||
* alephino/input/riesa-titel.txt
|
||||
* bibliotheca/input/bautzen.imp
|
||||
* bibliotheca/input/breitenbrunn.imp
|
||||
* bibliotheca/input/dresden.imp
|
||||
* bibliotheca/input/glauchau.imp
|
||||
* bibliotheca/input/plauen.imp
|
||||
|
||||
2. Installation Task 3.2.2
|
||||
|
||||
a) RPM-based (Fedora, CentOS, SLES, etc.)
|
||||
|
||||
```sh
|
||||
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.rpm
|
||||
sudo dnf install ./task_linux_amd64.rpm && rm task_linux_amd64.rpm
|
||||
```
|
||||
|
||||
b) DEB-based (Debian, Ubuntu etc.)
|
||||
|
||||
```sh
|
||||
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.deb
|
||||
sudo apt install ./task_linux_amd64.deb && rm task_linux_amd64.deb
|
||||
```
|
||||
|
||||
3. Installation OpenRefine 3.4.1 und openrefine-client 0.3.10
|
||||
|
||||
```
|
||||
task install
|
||||
```
|
||||
|
||||
## Nutzung
|
||||
|
||||
1. Exporte bereitstellen mit folgenden Dateinamen:
|
||||
* input/bautzen.imp
|
||||
* input/breitenbrunn.imp
|
||||
* input/dresden.imp
|
||||
* input/glauchau.imp
|
||||
* input/leipzig-exemplare.txt
|
||||
* input/leipzig-titel.txt
|
||||
* input/plauen.imp
|
||||
* input/riesa-exemplare.txt
|
||||
* input/riesa-titel.txt
|
||||
2. Installation und initiale Datenverarbeitung: `./main.sh`
|
||||
3. Weitere Datenverarbeitungen:
|
||||
* `lib/task` um den gesamten Workflow zu starten
|
||||
* `lib/task --list` für eine Liste der verfügbaren Tasks
|
||||
Datenverarbeitung sequentiell
|
||||
|
||||
```
|
||||
task default
|
||||
```
|
||||
|
||||
Datenverarbeitung (teil)parallelisiert (benötigt bis zu 16 GB RAM)
|
||||
|
||||
```
|
||||
task pica+:main
|
||||
```
|
||||
|
||||
Analyse dubletter Barcodes
|
||||
|
||||
```
|
||||
task barcodes:main
|
||||
```
|
||||
|
||||
## Systemvoraussetzungen
|
||||
|
||||
* Linux mit Bash, cURL und JAVA (getestet auf Fedora 32)
|
||||
* 7 GB freien Arbeitsspeicher
|
||||
* GNU/Linux (getestet auf Fedora 32)
|
||||
* JAVA 8+ (für OpenReifne)
|
||||
* 8 GB freien Arbeitsspeicher
|
||||
|
||||
## Verwendete Tools
|
||||
|
||||
* [OpenRefine](https://openrefine.org/)
|
||||
* [bash-refine](https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d)
|
||||
* [openrefine-client](https://github.com/opencultureconsulting/openrefine-client)
|
||||
* [Task](https://github.com/go-task/task)
|
||||
|
|
293
Taskfile.yml
293
Taskfile.yml
|
@ -1,234 +1,99 @@
|
|||
# https://taskfile.dev
|
||||
# https://github.com/opencultureconsulting/openrefine-task-runner
|
||||
|
||||
version: '3'
|
||||
|
||||
output: 'group'
|
||||
includes:
|
||||
alephino: alephino
|
||||
barcodes: barcodes
|
||||
bibliotheca: bibliotheca
|
||||
pica+: pica+
|
||||
|
||||
vars:
|
||||
DATE: '{{ now | date "20060102_150405"}}'
|
||||
silent: true
|
||||
output: prefixed
|
||||
|
||||
env:
|
||||
REFINE_MEMORY: 8g
|
||||
REFINE_ENDPOINT: http://localhost:3334
|
||||
OPENREFINE:
|
||||
sh: readlink -m .openrefine/refine
|
||||
CLIENT:
|
||||
sh: readlink -m .openrefine/client
|
||||
|
||||
tasks:
|
||||
default:
|
||||
desc: Generierung PICA+
|
||||
# deps: [bibliotheca, alephino]
|
||||
desc: Datenverarbeitung sequentiell
|
||||
cmds:
|
||||
- task: alephino
|
||||
- task: bibliotheca
|
||||
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
|
||||
sources:
|
||||
- tasks/03-ba-sachsen.sh
|
||||
# - output/02-alephino-main/alephino.csv
|
||||
- output/02-bibliotheca-main/bibliotheca.csv
|
||||
generates:
|
||||
- output/03-ba-sachsen/ba-sachsen.pic
|
||||
- output/03-ba-sachsen/ba-sachsen.openrefine.tar.gz
|
||||
env:
|
||||
REFINE_WORKDIR: output/03-ba-sachsen
|
||||
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
|
||||
- task: alephino:main
|
||||
- task: bibliotheca:main
|
||||
- task: pica+:refine
|
||||
|
||||
alephino:
|
||||
desc: Alephino Hauptverarbeitung
|
||||
# deps: [leipzig, riesa]
|
||||
install:
|
||||
desc: (re)install OpenRefine and openrefine-client into subdirectory .openrefine
|
||||
cmds:
|
||||
- task: leipzig
|
||||
- task: riesa
|
||||
- tasks/02-alephino-main.sh "output/01-alephino-pre"
|
||||
sources:
|
||||
- tasks/02-alephino-main.sh
|
||||
- output/01-alephino-pre/*.tsv
|
||||
generates:
|
||||
# - output/02-alephino-main/alephino.csv
|
||||
- output/02-alephino-main/alephino.openrefine.tar.gz
|
||||
env:
|
||||
REFINE_ENDPOINT: http://localhost:3334
|
||||
REFINE_WORKDIR: output/02-alephino-main
|
||||
REFINE_LOGFILE: log/02-alephino-main/{{.DATE}}.log
|
||||
- | # delete existing install and recreate folder
|
||||
rm -rf .openrefine
|
||||
mkdir -p .openrefine
|
||||
- > # download OpenRefine archive
|
||||
wget --no-verbose -O openrefine.tar.gz
|
||||
https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz
|
||||
- | # install OpenRefine into subdirectory .openrefine
|
||||
tar -xzf openrefine.tar.gz -C .openrefine --strip 1
|
||||
rm openrefine.tar.gz
|
||||
- | # optimize OpenRefine for batch processing
|
||||
sed -i 's/cd `dirname $0`/cd "$(dirname "$0")"/' ".openrefine/refine" # fix path issue in OpenRefine startup file
|
||||
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' ".openrefine/refine.ini" # do not try to open OpenRefine in browser
|
||||
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' ".openrefine/refine.ini" # set autosave period from 5 minutes to 25 hours
|
||||
- > # download openrefine-client into subdirectory .openrefine
|
||||
wget --no-verbose -O .openrefine/client
|
||||
https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux
|
||||
- chmod +x .openrefine/client # make client executable
|
||||
|
||||
bibliotheca:
|
||||
desc: Bibliotheca Hauptverarbeitung
|
||||
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
|
||||
start:
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- task: bautzen
|
||||
- task: breitenbrunn
|
||||
- task: dresden
|
||||
- task: glauchau
|
||||
# - task: plauen
|
||||
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- tasks/02-bibliotheca-main.sh
|
||||
- output/01-bibliotheca-pre/*.tsv
|
||||
generates:
|
||||
- output/02-bibliotheca-main/bibliotheca.csv
|
||||
- output/02-bibliotheca-main/bibliotheca.openrefine.tar.gz
|
||||
env:
|
||||
REFINE_ENDPOINT: http://localhost:3335
|
||||
REFINE_WORKDIR: output/02-bibliotheca-main
|
||||
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
|
||||
- | # verify that OpenRefine is installed
|
||||
if [ ! -f "$OPENREFINE" ]; then
|
||||
echo 1>&2 "OpenRefine missing; try task install"; exit 1
|
||||
fi
|
||||
- | # delete temporary files and log file of previous run
|
||||
rm -rf ./*.project* workspace.json
|
||||
rm -rf "{{.PROJECT}}.log"
|
||||
- > # launch OpenRefine with specific data directory and redirect its output to a log file
|
||||
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
|
||||
-d ../{{.DIR}}
|
||||
>> "{{.PROJECT}}.log" 2>&1 &
|
||||
- | # wait until OpenRefine API is available
|
||||
timeout 30s bash -c "until
|
||||
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
|
||||
do sleep 1
|
||||
done"
|
||||
|
||||
bautzen:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
stop:
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/bautzen.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "6G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3334
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_bautzen.log
|
||||
- | # shut down OpenRefine gracefully
|
||||
PID=$(lsof -t -i:{{.PORT}})
|
||||
kill $PID
|
||||
while ps -p $PID > /dev/null; do sleep 1; done
|
||||
- > # archive the OpenRefine project
|
||||
tar cfz
|
||||
"{{.PROJECT}}.openrefine.tar.gz"
|
||||
-C $(grep -l "{{.PROJECT}}" *.project/metadata.json | cut -d '/' -f 1)
|
||||
.
|
||||
- rm -rf ./*.project* workspace.json # delete temporary files
|
||||
|
||||
breitenbrunn:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
kill:
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/breitenbrunn.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3335
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
|
||||
- | # shut down OpenRefine immediately to save time and disk space
|
||||
PID=$(lsof -t -i:{{.PORT}})
|
||||
kill -9 $PID
|
||||
while ps -p $PID > /dev/null; do sleep 1; done
|
||||
- rm -rf ./*.project* workspace.json # delete temporary files
|
||||
|
||||
dresden:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
check:
|
||||
desc: check OpenRefine log for any warnings and exit on error
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/dresden.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3336
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
|
||||
|
||||
leipzig:
|
||||
desc: Alephino Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
|
||||
sources:
|
||||
- tasks/01-alephino-pre.sh
|
||||
- '{{.TITEL}}'
|
||||
- '{{.EXEMPLARE}}'
|
||||
generates:
|
||||
- output/01-alephino-pre/leipzig.tsv
|
||||
vars:
|
||||
TITEL: '{{.TITEL | default "input/leipzig-titel.txt"}}'
|
||||
EXEMPLARE: '{{.EXEMPLARE | default "input/leipzig-exemplare.txt"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3337
|
||||
REFINE_WORKDIR: output/01-alephino-pre
|
||||
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_leipzig.log
|
||||
|
||||
glauchau:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/glauchau.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3338
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
|
||||
|
||||
plauen:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- input/plauen.imp
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/plauen.tsv
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3339
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
|
||||
|
||||
riesa:
|
||||
desc: Alephino Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
|
||||
sources:
|
||||
- tasks/01-alephino-pre.sh
|
||||
- '{{.TITEL}}'
|
||||
- '{{.EXEMPLARE}}'
|
||||
generates:
|
||||
- output/01-alephino-pre/riesa.tsv
|
||||
vars:
|
||||
TITEL: '{{.TITEL | default "input/riesa-titel.txt"}}'
|
||||
EXEMPLARE: '{{.EXEMPLARE | default "input/riesa-exemplare.txt"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3340
|
||||
REFINE_WORKDIR: output/01-alephino-pre
|
||||
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_riesa.log
|
||||
|
||||
clean:
|
||||
desc: Alle Daten löschen (reset auf Ausgangszustand)
|
||||
cmds:
|
||||
- rm -r lib log output
|
||||
|
||||
mkdir:
|
||||
desc: Ordner erstellen
|
||||
cmds:
|
||||
- mkdir -p output/01-alephino-pre log/01-alephino-pre
|
||||
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
||||
- mkdir -p output/02-alephino-main log/02-alephino-main
|
||||
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
|
||||
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
|
||||
|
||||
barcodes:
|
||||
desc: Ermitteln von Dubletten
|
||||
deps: [default]
|
||||
cmds:
|
||||
- mkdir -p output/barcodes
|
||||
# Bibliotheca Barcodes extrahieren
|
||||
- for f in input/*.imp; do grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).raw"; done
|
||||
# Alephino Barcodes extrahieren
|
||||
- for f in input/*-exemplare.txt; do grep '^120 ' "$f" | cut -c 6- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%-*}).raw"; done
|
||||
# Extrahierte Barcodes gegen generiertes PICA+ abgleichen
|
||||
- for f in output/barcodes/*.raw; do comm -12 "$f" <(sort output/03-ba-sachsen/barcodes.txt) > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).filtered"; done
|
||||
# Plauen, Leipzig, Riesa vorübergehend nicht filtern
|
||||
- for f in leipzig riesa plauen; do cp output/barcodes/$f.raw output/barcodes/$f.filtered; done
|
||||
# Dublette Barcodes Gesamtdubletten ermitteln
|
||||
- sort output/barcodes/*.filtered | uniq -d > output/barcodes/duplicates
|
||||
# Dubletten für jeden Teil ermitteln
|
||||
- (cd output/barcodes && for f in *.filtered ; do grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp; done)
|
||||
# Ergebnisse in Tabelle zusammenführen
|
||||
- paste output/barcodes/duplicates output/barcodes/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/barcodes/duplicates.tsv && rm output/barcodes/*.tmp
|
||||
# Bearbeitungsstand
|
||||
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
||||
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
||||
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l < output/barcodes/duplicates)"'
|
||||
# sources:
|
||||
# - input/*
|
||||
# generates:
|
||||
# - output/barcodes/duplicates.tsv
|
||||
- | # find log file(s) and check for "exception" or "error"
|
||||
if grep -i 'exception\|error' $(find . -name '*.log'); then
|
||||
echo 1>&2 "log contains warnings!"; exit 1
|
||||
fi
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
version: '3'
|
||||
|
||||
tasks:
|
||||
main:
|
||||
desc: Konvertierung von Alephino nach PICA3/CSV
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name
|
||||
cmds:
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: leipzig}
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: riesa}
|
||||
- task: refine-main
|
||||
|
||||
refine-pre:
|
||||
dir: ./{{.DIR}}
|
||||
label: '{{.TASK}}-{{.PROJECT}}'
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
PORT: 3335 # assign a different port for each project
|
||||
RAM: 8192M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # Import Titel
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/{{.PROJECT}}-titel.txt)"
|
||||
--format fixed-width
|
||||
--encoding UTF-8
|
||||
--columnWidths 5
|
||||
--skipDataLines 0
|
||||
--storeBlankRows false
|
||||
--projectName titel
|
||||
> {{.LOG}}
|
||||
- > # Import Exemplare
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/{{.PROJECT}}-exemplare.txt)"
|
||||
--format fixed-width
|
||||
--encoding UTF-8
|
||||
--columnWidths 5
|
||||
--skipDataLines 0
|
||||
--storeBlankRows false
|
||||
--projectName exemplare
|
||||
> {{.LOG}}
|
||||
- | # Titel: Korrekturen Einzelfälle
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-einzelfaelle.json > {{.LOG}}
|
||||
- | # Prefix M bzw. E für Feldnamen
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-prefix.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-prefix.json > {{.LOG}}
|
||||
- | # Datensätze und Feldnamen sortieren
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-sortieren.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-sortieren.json > {{.LOG}}
|
||||
- | # Mehrfachbelegungen zusammenführen
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-mehrfachbelegungen.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-mehrfachbelegungen.json > {{.LOG}}
|
||||
- | # Felder löschen
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-loeschen.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-loeschen.json > {{.LOG}}
|
||||
- | # Transponieren
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-transponieren.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-transponieren.json > {{.LOG}}
|
||||
- | # Titel-ID separieren
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-id-separieren.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-id-separieren.json > {{.LOG}}
|
||||
- | # Titel: Exemplare anreichern
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-anreichern.json > {{.LOG}}
|
||||
- mkdir -p output
|
||||
- > # Export
|
||||
"$CLIENT" -P {{.PORT}} titel
|
||||
--output "$(readlink -m output/{{.PROJECT}}.tsv)"
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
PID="$(lsof -t -i:{{.PORT}})"
|
||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||
- task: :kill # shut down OpenRefine immediately to save time and disk space
|
||||
vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}'}
|
||||
- task: :check # check OpenRefine log for any warnings and exit on error
|
||||
vars: {DIR: '{{.DIR}}'}
|
||||
sources:
|
||||
- Taskfile.yml
|
||||
- input/{{.PROJECT}}.imp
|
||||
- config/pre/**
|
||||
generates:
|
||||
- output/{{.PROJECT}}.tsv
|
||||
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
|
||||
|
||||
refine-main:
|
||||
dir: ./{{.DIR}}
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
PROJECT: alephino
|
||||
PORT: 3335 # assign a different port for each project
|
||||
RAM: 8192M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # Zip-Archiv mit Output der Vorverarbeitung erstellen
|
||||
zip -j tmp.zip
|
||||
output/leipzig.tsv
|
||||
output/riesa.tsv
|
||||
- > # Import Zip-Archiv
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m tmp.zip)"
|
||||
--format tsv
|
||||
--includeFileSources true
|
||||
--projectName {{.PROJECT}}
|
||||
> {{.LOG}}
|
||||
&& rm tmp.zip
|
||||
- > # Spalten sortieren: Beginnen mit 1. M|001, 2. E|001, 3. File; damit Records-Mode erhalten bleibt
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}}
|
||||
- > # Bibliothekskürzel aus Import-Dateiname
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
|
||||
- > # spec_A_E_01: Signatur 7100a
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
|
||||
# - > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
|
||||
# mkdir -p output &&
|
||||
# "$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
# --output "$(readlink -m output/{{.PROJECT}}.csv)"
|
||||
# --template "$(< config/main/template.txt)"
|
||||
# --rowSeparator ""
|
||||
# > {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
PID="$(lsof -t -i:{{.PORT}})"
|
||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||||
- task: :check # check OpenRefine log for any warnings and exit on error
|
||||
vars: {DIR: '{{.DIR}}'}
|
||||
sources:
|
||||
- Taskfile.yml
|
||||
- output/*.tsv
|
||||
- config/main/**
|
||||
generates:
|
||||
- log/{{.PROJECT}}.openrefine.tar.gz
|
||||
# - output/{{.PROJECT}}.csv
|
||||
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
|
||||
|
||||
default: # enable standalone execution (running `task` in project directory)
|
||||
cmds:
|
||||
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|100",
|
||||
"expression": "grel:value.split('\u001f')[0].slice(1)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "7100a",
|
||||
"columnInsertIndex": 5
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "File",
|
||||
"expression": "grel:with([ ['leipzig.tsv','LE'], ['riesa.tsv','RS'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,27 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "File",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "E|001",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|029",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|026f",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|IDN",
|
||||
"index": 0
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|BIB",
|
||||
"expression": "grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "titel_id",
|
||||
"columnInsertIndex": 18,
|
||||
"description": "Create column titel_id at index 18 based on column E|BIB using expression grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,65 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E|A02",
|
||||
"l": "E|A02"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|A86",
|
||||
"l": "E|A86"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|SUB",
|
||||
"l": "E|SUB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|FMT",
|
||||
"l": "E|FMT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|CAT",
|
||||
"l": "E|CAT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|027",
|
||||
"l": "E|027"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|123",
|
||||
"l": "E|123"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,18 @@
|
|||
[
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"description": "Blank down cells in column Column 1"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "Column 2",
|
||||
"keyColumnName": "Column 1",
|
||||
"separator": "␟",
|
||||
"description": "Join multi-valued cells in column Column 2"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:'E|' + value.replace(' ','')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column Column 1 using expression grel:'E|' + value.replace(' ','')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,80 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E|IDN",
|
||||
"l": "E|IDN"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "Column 2",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "id",
|
||||
"index": 0,
|
||||
"description": "Move column id to position 0"
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id",
|
||||
"description": "Fill down cells in column id"
|
||||
},
|
||||
{
|
||||
"op": "core/row-reorder",
|
||||
"mode": "row-based",
|
||||
"sorting": {
|
||||
"criteria": [
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "id",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "Column 1",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Reorder rows"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "id",
|
||||
"description": "Remove column id"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,9 @@
|
|||
[
|
||||
{
|
||||
"op": "core/key-value-columnize",
|
||||
"keyColumnName": "Column 1",
|
||||
"valueColumnName": "Column 2",
|
||||
"noteColumnName": "",
|
||||
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
|
||||
}
|
||||
]
|
|
@ -0,0 +1,822 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|001'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|001",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|001",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|002a'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|002a",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|002a",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|003'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|003",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|003",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|004'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|004",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|004",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|027'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|027",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|027",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|030'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|030",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|030",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|050'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|050",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|050",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|100'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|100",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|100",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|115'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|115",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|115",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|120'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|120",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|120",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|123'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|123",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|123",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A02'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A02",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A02",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A72'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A72",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A72",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A73'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A73",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A73",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A87'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A87",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A87",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A91'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A91",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A91",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A95'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A95",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A95",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|BIB'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|BIB",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|BIB",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|CAT'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|CAT",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|CAT",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|FMT'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|FMT",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|FMT",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|IDN'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|IDN",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|IDN",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|LDR'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|LDR",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|LDR",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|STA'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|STA",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|STA",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|SUB'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|SUB",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|SUB",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|105'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|105",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|105",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|107'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|107",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|107",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A94'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A94",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A94",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|125'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|125",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|125",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|072'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|072",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|072",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A98'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A98",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A98",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|HOL'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|HOL",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|HOL",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A86'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A86",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A86",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A63'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A63",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A63",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A70'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A70",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A70",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A83'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A83",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A83",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A85'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A85",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A85",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|ABO'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|ABO",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|ABO",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exem |