refactoring mit openrefine-task runner

https://github.com/opencultureconsulting/openrefine-task-runner
This commit is contained in:
Felix Lohmeier 2021-02-25 16:45:46 +01:00
parent b188267640
commit 9bad6aeb17
78 changed files with 11656 additions and 12395 deletions

9
.gitignore vendored
View File

@ -1,5 +1,6 @@
input
lib
log
output
.task
.openrefine
*/input
*/output
*/tmp
*/log/*

View File

@ -1,29 +1,68 @@
# Transformation von Bibliotheca und Alephino nach PICA+ für die Bibliotheken der Berufsakademie Sachsen
## Vorbereitung
1. Exporte bereitstellen mit folgenden Dateinamen:
* alephino/input/leipzig-exemplare.txt
* alephino/input/leipzig-titel.txt
* alephino/input/riesa-exemplare.txt
* alephino/input/riesa-titel.txt
* bibliotheca/input/bautzen.imp
* bibliotheca/input/breitenbrunn.imp
* bibliotheca/input/dresden.imp
* bibliotheca/input/glauchau.imp
* bibliotheca/input/plauen.imp
2. Installation Task 3.2.2
a) RPM-based (Fedora, CentOS, SLES, etc.)
```sh
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.rpm
sudo dnf install ./task_linux_amd64.rpm && rm task_linux_amd64.rpm
```
b) DEB-based (Debian, Ubuntu etc.)
```sh
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.deb
sudo apt install ./task_linux_amd64.deb && rm task_linux_amd64.deb
```
3. Installation OpenRefine 3.4.1 und openrefine-client 0.3.10
```
task install
```
## Nutzung
1. Exporte bereitstellen mit folgenden Dateinamen:
* input/bautzen.imp
* input/breitenbrunn.imp
* input/dresden.imp
* input/glauchau.imp
* input/leipzig-exemplare.txt
* input/leipzig-titel.txt
* input/plauen.imp
* input/riesa-exemplare.txt
* input/riesa-titel.txt
2. Installation und initiale Datenverarbeitung: `./main.sh`
3. Weitere Datenverarbeitungen:
* `lib/task` um den gesamten Workflow zu starten
* `lib/task --list` für eine Liste der verfügbaren Tasks
Datenverarbeitung sequentiell
```
task default
```
Datenverarbeitung (teil)parallelisiert (benötigt bis zu 16 GB RAM)
```
task pica+:main
```
Analyse dubletter Barcodes
```
task barcodes:main
```
## Systemvoraussetzungen
* Linux mit Bash, cURL und JAVA (getestet auf Fedora 32)
* 7 GB freien Arbeitsspeicher
* GNU/Linux (getestet auf Fedora 32)
* JAVA 8+ (für OpenReifne)
* 8 GB freien Arbeitsspeicher
## Verwendete Tools
* [OpenRefine](https://openrefine.org/)
* [bash-refine](https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d)
* [openrefine-client](https://github.com/opencultureconsulting/openrefine-client)
* [Task](https://github.com/go-task/task)

View File

@ -1,234 +1,99 @@
# https://taskfile.dev
# https://github.com/opencultureconsulting/openrefine-task-runner
version: '3'
output: 'group'
includes:
alephino: alephino
barcodes: barcodes
bibliotheca: bibliotheca
pica+: pica+
vars:
DATE: '{{ now | date "20060102_150405"}}'
silent: true
output: prefixed
env:
REFINE_MEMORY: 8g
REFINE_ENDPOINT: http://localhost:3334
OPENREFINE:
sh: readlink -m .openrefine/refine
CLIENT:
sh: readlink -m .openrefine/client
tasks:
default:
desc: Generierung PICA+
# deps: [bibliotheca, alephino]
desc: Datenverarbeitung sequentiell
cmds:
- task: alephino
- task: bibliotheca
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
sources:
- tasks/03-ba-sachsen.sh
# - output/02-alephino-main/alephino.csv
- output/02-bibliotheca-main/bibliotheca.csv
generates:
- output/03-ba-sachsen/ba-sachsen.pic
- output/03-ba-sachsen/ba-sachsen.openrefine.tar.gz
env:
REFINE_WORKDIR: output/03-ba-sachsen
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
- task: alephino:main
- task: bibliotheca:main
- task: pica+:refine
alephino:
desc: Alephino Hauptverarbeitung
# deps: [leipzig, riesa]
install:
desc: (re)install OpenRefine and openrefine-client into subdirectory .openrefine
cmds:
- task: leipzig
- task: riesa
- tasks/02-alephino-main.sh "output/01-alephino-pre"
sources:
- tasks/02-alephino-main.sh
- output/01-alephino-pre/*.tsv
generates:
# - output/02-alephino-main/alephino.csv
- output/02-alephino-main/alephino.openrefine.tar.gz
env:
REFINE_ENDPOINT: http://localhost:3334
REFINE_WORKDIR: output/02-alephino-main
REFINE_LOGFILE: log/02-alephino-main/{{.DATE}}.log
- | # delete existing install and recreate folder
rm -rf .openrefine
mkdir -p .openrefine
- > # download OpenRefine archive
wget --no-verbose -O openrefine.tar.gz
https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz
- | # install OpenRefine into subdirectory .openrefine
tar -xzf openrefine.tar.gz -C .openrefine --strip 1
rm openrefine.tar.gz
- | # optimize OpenRefine for batch processing
sed -i 's/cd `dirname $0`/cd "$(dirname "$0")"/' ".openrefine/refine" # fix path issue in OpenRefine startup file
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' ".openrefine/refine.ini" # do not try to open OpenRefine in browser
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' ".openrefine/refine.ini" # set autosave period from 5 minutes to 25 hours
- > # download openrefine-client into subdirectory .openrefine
wget --no-verbose -O .openrefine/client
https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux
- chmod +x .openrefine/client # make client executable
bibliotheca:
desc: Bibliotheca Hauptverarbeitung
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
start:
dir: ./{{.DIR}}
cmds:
- task: bautzen
- task: breitenbrunn
- task: dresden
- task: glauchau
# - task: plauen
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
sources:
- tasks/01-bibliotheca-pre.sh
- tasks/02-bibliotheca-main.sh
- output/01-bibliotheca-pre/*.tsv
generates:
- output/02-bibliotheca-main/bibliotheca.csv
- output/02-bibliotheca-main/bibliotheca.openrefine.tar.gz
env:
REFINE_ENDPOINT: http://localhost:3335
REFINE_WORKDIR: output/02-bibliotheca-main
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
- | # verify that OpenRefine is installed
if [ ! -f "$OPENREFINE" ]; then
echo 1>&2 "OpenRefine missing; try task install"; exit 1
fi
- | # delete temporary files and log file of previous run
rm -rf ./*.project* workspace.json
rm -rf "{{.PROJECT}}.log"
- > # launch OpenRefine with specific data directory and redirect its output to a log file
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
-d ../{{.DIR}}
>> "{{.PROJECT}}.log" 2>&1 &
- | # wait until OpenRefine API is available
timeout 30s bash -c "until
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
do sleep 1
done"
bautzen:
desc: Bibliotheca Vorverarbeitung
stop:
dir: ./{{.DIR}}
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/bautzen.tsv
vars:
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "6G"}}'
REFINE_ENDPOINT: http://localhost:3334
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_bautzen.log
- | # shut down OpenRefine gracefully
PID=$(lsof -t -i:{{.PORT}})
kill $PID
while ps -p $PID > /dev/null; do sleep 1; done
- > # archive the OpenRefine project
tar cfz
"{{.PROJECT}}.openrefine.tar.gz"
-C $(grep -l "{{.PROJECT}}" *.project/metadata.json | cut -d '/' -f 1)
.
- rm -rf ./*.project* workspace.json # delete temporary files
breitenbrunn:
desc: Bibliotheca Vorverarbeitung
kill:
dir: ./{{.DIR}}
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/breitenbrunn.tsv
vars:
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
REFINE_ENDPOINT: http://localhost:3335
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
- | # shut down OpenRefine immediately to save time and disk space
PID=$(lsof -t -i:{{.PORT}})
kill -9 $PID
while ps -p $PID > /dev/null; do sleep 1; done
- rm -rf ./*.project* workspace.json # delete temporary files
dresden:
desc: Bibliotheca Vorverarbeitung
check:
desc: check OpenRefine log for any warnings and exit on error
dir: ./{{.DIR}}
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/dresden.tsv
vars:
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3336
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
leipzig:
desc: Alephino Vorverarbeitung
cmds:
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
sources:
- tasks/01-alephino-pre.sh
- '{{.TITEL}}'
- '{{.EXEMPLARE}}'
generates:
- output/01-alephino-pre/leipzig.tsv
vars:
TITEL: '{{.TITEL | default "input/leipzig-titel.txt"}}'
EXEMPLARE: '{{.EXEMPLARE | default "input/leipzig-exemplare.txt"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3337
REFINE_WORKDIR: output/01-alephino-pre
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_leipzig.log
glauchau:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- tasks/01-bibliotheca-pre.sh
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/glauchau.tsv
vars:
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
REFINE_ENDPOINT: http://localhost:3338
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
plauen:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
sources:
- tasks/01-bibliotheca-pre.sh
- input/plauen.imp
generates:
- output/01-bibliotheca-pre/plauen.tsv
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
REFINE_ENDPOINT: http://localhost:3339
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
riesa:
desc: Alephino Vorverarbeitung
cmds:
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
sources:
- tasks/01-alephino-pre.sh
- '{{.TITEL}}'
- '{{.EXEMPLARE}}'
generates:
- output/01-alephino-pre/riesa.tsv
vars:
TITEL: '{{.TITEL | default "input/riesa-titel.txt"}}'
EXEMPLARE: '{{.EXEMPLARE | default "input/riesa-exemplare.txt"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3340
REFINE_WORKDIR: output/01-alephino-pre
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_riesa.log
clean:
desc: Alle Daten löschen (reset auf Ausgangszustand)
cmds:
- rm -r lib log output
mkdir:
desc: Ordner erstellen
cmds:
- mkdir -p output/01-alephino-pre log/01-alephino-pre
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
- mkdir -p output/02-alephino-main log/02-alephino-main
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
barcodes:
desc: Ermitteln von Dubletten
deps: [default]
cmds:
- mkdir -p output/barcodes
# Bibliotheca Barcodes extrahieren
- for f in input/*.imp; do grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).raw"; done
# Alephino Barcodes extrahieren
- for f in input/*-exemplare.txt; do grep '^120 ' "$f" | cut -c 6- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%-*}).raw"; done
# Extrahierte Barcodes gegen generiertes PICA+ abgleichen
- for f in output/barcodes/*.raw; do comm -12 "$f" <(sort output/03-ba-sachsen/barcodes.txt) > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).filtered"; done
# Plauen, Leipzig, Riesa vorübergehend nicht filtern
- for f in leipzig riesa plauen; do cp output/barcodes/$f.raw output/barcodes/$f.filtered; done
# Dublette Barcodes Gesamtdubletten ermitteln
- sort output/barcodes/*.filtered | uniq -d > output/barcodes/duplicates
# Dubletten für jeden Teil ermitteln
- (cd output/barcodes && for f in *.filtered ; do grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp; done)
# Ergebnisse in Tabelle zusammenführen
- paste output/barcodes/duplicates output/barcodes/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/barcodes/duplicates.tsv && rm output/barcodes/*.tmp
# Bearbeitungsstand
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l < output/barcodes/duplicates)"'
# sources:
# - input/*
# generates:
# - output/barcodes/duplicates.tsv
- | # find log file(s) and check for "exception" or "error"
if grep -i 'exception\|error' $(find . -name '*.log'); then
echo 1>&2 "log contains warnings!"; exit 1
fi

146
alephino/Taskfile.yml Normal file
View File

@ -0,0 +1,146 @@
version: '3'
tasks:
main:
desc: Konvertierung von Alephino nach PICA3/CSV
vars:
DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name
cmds:
- task: refine-pre
vars: {PROJECT: leipzig}
- task: refine-pre
vars: {PROJECT: riesa}
- task: refine-main
refine-pre:
dir: ./{{.DIR}}
label: '{{.TASK}}-{{.PROJECT}}'
vars:
DIR: '{{splitList ":" .TASK | first}}'
PORT: 3335 # assign a different port for each project
RAM: 8192M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
cmds:
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # Import Titel
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/{{.PROJECT}}-titel.txt)"
--format fixed-width
--encoding UTF-8
--columnWidths 5
--skipDataLines 0
--storeBlankRows false
--projectName titel
> {{.LOG}}
- > # Import Exemplare
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/{{.PROJECT}}-exemplare.txt)"
--format fixed-width
--encoding UTF-8
--columnWidths 5
--skipDataLines 0
--storeBlankRows false
--projectName exemplare
> {{.LOG}}
- | # Titel: Korrekturen Einzelfälle
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-einzelfaelle.json > {{.LOG}}
- | # Prefix M bzw. E für Feldnamen
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-prefix.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-prefix.json > {{.LOG}}
- | # Datensätze und Feldnamen sortieren
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-sortieren.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-sortieren.json > {{.LOG}}
- | # Mehrfachbelegungen zusammenführen
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-mehrfachbelegungen.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-mehrfachbelegungen.json > {{.LOG}}
- | # Felder löschen
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-loeschen.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-loeschen.json > {{.LOG}}
- | # Transponieren
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-transponieren.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-transponieren.json > {{.LOG}}
- | # Titel-ID separieren
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-id-separieren.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-id-separieren.json > {{.LOG}}
- | # Titel: Exemplare anreichern
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-anreichern.json > {{.LOG}}
- mkdir -p output
- > # Export
"$CLIENT" -P {{.PORT}} titel
--output "$(readlink -m output/{{.PROJECT}}.tsv)"
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :kill # shut down OpenRefine immediately to save time and disk space
vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}'}
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- Taskfile.yml
- input/{{.PROJECT}}.imp
- config/pre/**
generates:
- output/{{.PROJECT}}.tsv
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
refine-main:
dir: ./{{.DIR}}
vars:
DIR: '{{splitList ":" .TASK | first}}'
PROJECT: alephino
PORT: 3335 # assign a different port for each project
RAM: 8192M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
cmds:
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # Zip-Archiv mit Output der Vorverarbeitung erstellen
zip -j tmp.zip
output/leipzig.tsv
output/riesa.tsv
- > # Import Zip-Archiv
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m tmp.zip)"
--format tsv
--includeFileSources true
--projectName {{.PROJECT}}
> {{.LOG}}
&& rm tmp.zip
- > # Spalten sortieren: Beginnen mit 1. M|001, 2. E|001, 3. File; damit Records-Mode erhalten bleibt
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}}
- > # Bibliothekskürzel aus Import-Dateiname
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
- > # spec_A_E_01: Signatur 7100a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
# - > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
# mkdir -p output &&
# "$CLIENT" -P {{.PORT}} {{.PROJECT}}
# --output "$(readlink -m output/{{.PROJECT}}.csv)"
# --template "$(< config/main/template.txt)"
# --rowSeparator ""
# > {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- Taskfile.yml
- output/*.tsv
- config/main/**
generates:
- log/{{.PROJECT}}.openrefine.tar.gz
# - output/{{.PROJECT}}.csv
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
default: # enable standalone execution (running `task` in project directory)
cmds:
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|100",
"expression": "grel:value.split('\u001f')[0].slice(1)",
"onError": "set-to-blank",
"newColumnName": "7100a",
"columnInsertIndex": 5
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "File",
"expression": "grel:with([ ['leipzig.tsv','LE'], ['riesa.tsv','RS'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

View File

@ -0,0 +1,27 @@
[
{
"op": "core/column-move",
"columnName": "File",
"index": 0
},
{
"op": "core/column-move",
"columnName": "E|001",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|029",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|026f",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|IDN",
"index": 0
}
]

View File

@ -0,0 +1,15 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|BIB",
"expression": "grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')",
"onError": "set-to-blank",
"newColumnName": "titel_id",
"columnInsertIndex": 18,
"description": "Create column titel_id at index 18 based on column E|BIB using expression grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')"
}
]

View File

@ -0,0 +1,65 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E|A02",
"l": "E|A02"
}
},
{
"v": {
"v": "E|A86",
"l": "E|A86"
}
},
{
"v": {
"v": "E|SUB",
"l": "E|SUB"
}
},
{
"v": {
"v": "E|FMT",
"l": "E|FMT"
}
},
{
"v": {
"v": "E|CAT",
"l": "E|CAT"
}
},
{
"v": {
"v": "E|027",
"l": "E|027"
}
},
{
"v": {
"v": "E|123",
"l": "E|123"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
}
]

View File

@ -0,0 +1,18 @@
[
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"description": "Blank down cells in column Column 1"
},
{
"op": "core/multivalued-cell-join",
"columnName": "Column 2",
"keyColumnName": "Column 1",
"separator": "␟",
"description": "Join multi-valued cells in column Column 2"
}
]

View File

@ -0,0 +1,15 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:'E|' + value.replace(' ','')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column Column 1 using expression grel:'E|' + value.replace(' ','')"
}
]

View File

@ -0,0 +1,80 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E|IDN",
"l": "E|IDN"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "Column 2",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 2,
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
},
{
"op": "core/column-move",
"columnName": "id",
"index": 0,
"description": "Move column id to position 0"
},
{
"op": "core/fill-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id",
"description": "Fill down cells in column id"
},
{
"op": "core/row-reorder",
"mode": "row-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "id",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
},
{
"valueType": "string",
"column": "Column 1",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
},
"description": "Reorder rows"
},
{
"op": "core/column-removal",
"columnName": "id",
"description": "Remove column id"
}
]

View File

@ -0,0 +1,9 @@
[
{
"op": "core/key-value-columnize",
"keyColumnName": "Column 1",
"valueColumnName": "Column 2",
"noteColumnName": "",
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
}
]

View File

@ -0,0 +1,822 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|001'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|001",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|001",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|002a'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|002a",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|002a",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|003'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|003",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|003",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|004'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|004",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|004",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|027'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|027",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|027",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|030'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|030",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|030",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|050'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|050",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|050",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|100'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|100",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|100",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|115'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|115",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|115",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|120'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|120",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|120",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|123'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|123",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|123",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A02'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A02",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A02",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A72'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A72",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A72",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A73'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A73",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A73",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A87'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A87",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A87",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A91'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A91",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A91",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A95'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A95",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A95",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|BIB'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|BIB",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|BIB",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|CAT'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|CAT",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|CAT",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|FMT'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|FMT",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|FMT",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|IDN'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|IDN",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|IDN",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|LDR'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|LDR",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|LDR",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|STA'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|STA",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|STA",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|SUB'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|SUB",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|SUB",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|105'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|105",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|105",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|107'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|107",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|107",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A94'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A94",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A94",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|125'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|125",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|125",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|072'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|072",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|072",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A98'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A98",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A98",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|HOL'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|HOL",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|HOL",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A86'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A86",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A86",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A63'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A63",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A63",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A70'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A70",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A70",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A83'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A83",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A83",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A85'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A85",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A85",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|ABO'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|ABO",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|ABO",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A97'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A97",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A97",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A82'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|A82",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|A82",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|002'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|002",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|002",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|ORD'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "E|ORD",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "E|ORD",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
}
]

View File

@ -0,0 +1,22 @@
[
{
"op": "core/mass-edit",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "value",
"edits": [
{
"from": [
"001st"
],
"fromBlank": false,
"fromError": false,
"to": "001"
}
],
"description": "Mass edit cells in column Column 1"
}
]

View File

@ -0,0 +1,15 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|IDN",
"expression": "grel:value.replace(/^0+/,'')",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 12,
"description": "Create column id at index 12 based on column M|IDN using expression grel:value.replace(/^0+/,'')"
}
]

View File

@ -0,0 +1,148 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "M|025_",
"l": "M|025_"
}
},
{
"v": {
"v": "M|025e",
"l": "M|025e"
}
},
{
"v": {
"v": "M|004",
"l": "M|004"
}
},
{
"v": {
"v": "M|011",
"l": "M|011"
}
},
{
"v": {
"v": "M|026_",
"l": "M|026_"
}
},
{
"v": {
"v": "M|026a",
"l": "M|026a"
}
},
{
"v": {
"v": "M|026d",
"l": "M|026d"
}
},
{
"v": {
"v": "M|026g",
"l": "M|026g"
}
},
{
"v": {
"v": "M|030",
"l": "M|030"
}
},
{
"v": {
"v": "M|037z",
"l": "M|037z"
}
},
{
"v": {
"v": "M|038b",
"l": "M|038b"
}
},
{
"v": {
"v": "M|070",
"l": "M|070"
}
},
{
"v": {
"v": "M|073",
"l": "M|073"
}
},
{
"v": {
"v": "M|076z",
"l": "M|076z"
}
},
{
"v": {
"v": "M|080",
"l": "M|080"
}
},
{
"v": {
"v": "M|800s",
"l": "M|800s"
}
},
{
"v": {
"v": "M|802",
"l": "M|802"
}
},
{
"v": {
"v": "M|808b",
"l": "M|808b"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "^M\\|9",
"mode": "regex",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
}
}
]

View File

@ -0,0 +1,18 @@
[
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"description": "Blank down cells in column Column 1"
},
{
"op": "core/multivalued-cell-join",
"columnName": "Column 2",
"keyColumnName": "Column 1",
"separator": "␟",
"description": "Join multi-valued cells in column Column 2"
}
]

View File

@ -0,0 +1,15 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:'M|' + value.replace(' ','')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column Column 1 using expression grel:'M|' + value.replace(' ','')"
}
]

View File

@ -0,0 +1,80 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "M|IDN",
"l": "M|IDN"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "Column 2",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 2,
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
},
{
"op": "core/column-move",
"columnName": "id",
"index": 0,
"description": "Move column id to position 0"
},
{
"op": "core/fill-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id",
"description": "Fill down cells in column id"
},
{
"op": "core/row-reorder",
"mode": "row-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "id",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
},
{
"valueType": "string",
"column": "Column 1",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
},
"description": "Reorder rows"
},
{
"op": "core/column-removal",
"columnName": "id",
"description": "Remove column id"
}
]

View File

@ -0,0 +1,9 @@
[
{
"op": "core/key-value-columnize",
"keyColumnName": "Column 1",
"valueColumnName": "Column 2",
"noteColumnName": "",
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
}
]

38
barcodes/Taskfile.yml Normal file
View File

@ -0,0 +1,38 @@
version: '3'
tasks:
main:
desc: Ermitteln von dubletten Barcodes
dir: ./{{.DIR}}
vars:
DIR: '{{splitList ":" .TASK | first}}'
cmds:
- rm -rf tmp
- mkdir -p tmp output
# Bibliotheca Barcodes extrahieren
- for f in ../bibliotheca/input/*.imp; do grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- | sort > "tmp/$(f=${f##*/}; echo ${f%.*}).raw"; done
# Alephino Barcodes extrahieren
- for f in ../alephino/input/*-exemplare.txt; do grep '^120 ' "$f" | cut -c 6- | sort > "tmp/$(f=${f##*/}; echo ${f%-*}).raw"; done
# Extrahierte Barcodes gegen generiertes PICA+ abgleichen
- for f in tmp/*.raw; do comm -12 "$f" <(sort ../pica+/output/barcodes.txt) > "tmp/$(f=${f##*/}; echo ${f%.*}).filtered"; done
# Plauen, Leipzig, Riesa vorübergehend nicht filtern
- for f in leipzig riesa plauen; do cp tmp/$f.raw tmp/$f.filtered; done
# Dublette Barcodes Gesamtdubletten ermitteln
- sort tmp/*.filtered | uniq -d > tmp/duplicates
# Dubletten für jeden Teil ermitteln
- (cd tmp && for f in *.filtered ; do grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp; done)
# Ergebnisse in Tabelle zusammenführen
- paste tmp/duplicates tmp/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/duplicates.tsv
# Datei mit Gesamtdubletten in Verzeichnis output schieben
- mv tmp/duplicates output/duplicates
# Bearbeitungsstand ausgeben
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/duplicates | wc -l)"'
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/duplicates | wc -l)"'
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l < output/duplicates)"'
preconditions:
- sh: test -f ../pica+/output/barcodes.txt
msg: Barcode-Datei fehlt; versuche "task default"
default: # enable standalone execution (running `task` in project directory)
cmds:
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"

View File

@ -1,240 +0,0 @@
#!/bin/bash
# bash-refine v1.3.4: bash-refine.sh, Felix Lohmeier, 2020-11-02
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
# license: MIT License https://choosealicense.com/licenses/mit/
# TODO: support for macOS
# ================================== CONFIG ================================== #
endpoint="${REFINE_ENDPOINT:-http://localhost:3333}"
memory="${REFINE_MEMORY:-1400M}"
csrf="${REFINE_CSRF:-true}"
date="$(date +%Y%m%d_%H%M%S)"
if [[ -n "$(readlink -e "${REFINE_WORKDIR}")" ]]; then
workdir="$(readlink -e "${REFINE_WORKDIR}")"
else
workdir="$(readlink -m "${BASH_SOURCE%/*}/output/${date}")"
fi
if [[ -n "$(readlink -f "${REFINE_LOGFILE}")" ]]; then
logfile="$(readlink -f "${REFINE_LOGFILE}")"
else
logfile="$(readlink -m "${BASH_SOURCE%/*}/log/${date}.log")"
fi
if [[ -n "$(readlink -e "${REFINE_JQ}")" ]]; then
jq="$(readlink -e "${REFINE_JQ}")"
else
jq="$(readlink -m "${BASH_SOURCE%/*}/lib/jq")"
fi
if [[ -n "$(readlink -e "${REFINE_REFINE}")" ]]; then
refine="$(readlink -e "${REFINE_REFINE}")"
else
refine="$(readlink -m "${BASH_SOURCE%/*}/lib/openrefine/refine")"
fi
declare -A checkpoints # associative array for stats
declare -A pids # associative array for monitoring background jobs
declare -A projects # associative array for OpenRefine projects
# =============================== REQUIREMENTS =============================== #
function requirements {
# check existence of java and cURL
if [[ -z "$(command -v java 2> /dev/null)" ]] ; then
echo 1>&2 "ERROR: OpenRefine requires JAVA runtime environment (jre)" \
"https://openjdk.java.net/install/"
exit 1
fi
if [[ -z "$(command -v curl 2> /dev/null)" ]] ; then
echo 1>&2 "ERROR: This shell script requires cURL" \
"https://curl.haxx.se/download.html"
exit 1
fi
# download jq and OpenRefine if necessary
if [[ -z "$(readlink -e "${jq}")" ]]; then
echo "Download jq..."
mkdir -p "$(dirname "${jq}")"
# jq 1.4 has much faster startup time than 1.5 and 1.6
curl -L --output "${jq}" \
"https://github.com/stedolan/jq/releases/download/jq-1.4/jq-linux-x86_64"
chmod +x "${jq}"; echo
fi
if [[ -z "$(readlink -e "${refine}")" ]]; then
echo "Download OpenRefine..."
mkdir -p "$(dirname "${refine}")"
curl -L --output openrefine.tar.gz \
"https://github.com/OpenRefine/OpenRefine/releases/download/3.4/openrefine-linux-3.4.tar.gz"
echo "Install OpenRefine in subdirectory $(dirname "${refine}")..."
tar -xzf openrefine.tar.gz -C "$(dirname "${refine}")" --strip 1 --totals
rm -f openrefine.tar.gz
# do not try to open OpenRefine in browser
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' \
"$(dirname "${refine}")"/refine.ini
# set min java heap space to allocated memory
sed -i 's/-Xms$REFINE_MIN_MEMORY/-Xms$REFINE_MEMORY/' \
"$(dirname "${refine}")"/refine
# set autosave period from 5 minutes to 25 hours
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1500/' \
"$(dirname "${refine}")"/refine.ini
echo
fi
}
# ============================== OPENREFINE API ============================== #
function refine_start {
echo "start OpenRefine server..."
local dir
dir="$(readlink -e "${workdir}")"
${refine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
pid_server=${!}
timeout 30s bash -c "until curl -s \"${endpoint}\" \
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|| error "starting OpenRefine server failed!"
}
function refine_stats {
# print server load
ps -o start,etime,%mem,%cpu,rss -p "${pid_server}"
}
function refine_kill {
# kill OpenRefine immediately; SIGKILL (kill -9) prevents saving projects
{ kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null
# delete temporary OpenRefine projects
(cd "${workdir}" && rm -rf ./*.project* && rm -f workspace.json)
}
function refine_check {
if grep -i 'exception\|error' "${logfile}"; then
error "log contains warnings!"
else
log "checked log file, all good!"
fi
}
function refine_stop {
echo "stop OpenRefine server and print server load..."
refine_stats
echo
refine_kill
echo "check log for any warnings..."
refine_check
}
function refine_csrf {
# get CSRF token (introduced in OpenRefine 3.3)
if [[ "${csrf}" = true ]]; then
local response
response=$(curl -fs "${endpoint}/command/core/get-csrf-token")
if [[ "${response}" != '{"token":"'* ]]; then
error "getting CSRF token failed!"
else
echo "?csrf_token=$(echo "$response" | cut -d \" -f 4)"
fi
fi
}
function refine_store {
# check and store project id from import in associative array projects
if [[ $# = 2 ]]; then
projects[$1]=$(cut -d '=' -f 2 "$2")
else
error "invalid arguments supplied to import function!"
fi
if [[ "${#projects[$1]}" != 13 ]]; then
error "returned project id is not valid!"
else
rm "$2"
fi
# check if project contains at least one row (may be skipped to gain ~40ms)
local rows
rows=$(curl -fs --get \
--data project="${projects[$1]}" \
--data limit=0 \
"${endpoint}/command/core/get-rows" \
| tr "," "\n" | grep total | cut -d ":" -f 2)
if [[ "$rows" = "0" ]]; then
error "imported project contains 0 rows!"
fi
}
# ============================ SCRIPT ENVIRONMENT ============================ #
function log {
# log status message
echo "$(date +%H:%M:%S.%3N) [ client] $1"
}
function error {
# log error message and exit
echo 1>&2 "ERROR: $1"
refine_kill; pkill -P $$; exit 1
}
function monitor {
# store pid of last execution
pids[$1]="$!"
}
function monitoring {
# wait for stored pids, remove them from array and check log for errors
for pid in "${!pids[@]}"; do
wait "${pids[$pid]}" \
|| error "${pid} (${projects[$pid]}) failed!" \
&& unset pids["$pid"]
done
refine_check
}
function checkpoint {
# store timestamp in associative array checkpoints and print checkpoint
checkpoints[$1]=$(date +%s.%3N)
printf '%*.*s %s %*.*s\n' \
0 "$(((80-2-${#1})/2))" "$(printf '%0.1s' ={1..40})" \
"${#checkpoints[@]}. $1" \
0 "$(((80-1-${#1})/2))" "$(printf '%0.1s' ={1..40})"
}
function checkpoint_stats {
# calculate run time based on checkpoints
local k keys values i diffsec
echo "starting time and run time (hh:mm:ss) of each step..."
# sort keys by value and store in array key
readarray -t keys < <(
for k in "${!checkpoints[@]}"; do
echo "${checkpoints[$k]}:::$k"
done | sort | awk -F::: '{print $2}')
# remove milliseconds from corresponding values and store in array values
readarray -t values < <(
for k in "${keys[@]}" ; do
echo "${checkpoints[$k]%.*}"
done)
# add final timestamp for calculation
values+=("$(date +%s)")
# calculate and print run time for each step
for i in "${!keys[@]}"; do
diffsec=$(( values[$((i + 1))] - values[i] ))
printf "%35s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
"$(date -d @"${values[$i]}")" \
"($(date -d @${diffsec} -u +%H:%M:%S))"
done
# calculate and print total run time
diffsec=$(( values[${#keys[@]}] - values[0] ))
printf "%80s\n%80s\n" "----------" "($(date -d @${diffsec} -u +%H:%M:%S))"
}
function count_output {
# word count on all files in workdir
echo "files (number of lines / size in bytes) in ${workdir}..."
(cd "${workdir}" && wc -c -l ./*)
}
function init {
# check requirements and download software if necessary
requirements
# set trap, create directories and tee to log file
trap 'error "script interrupted!"' HUP INT QUIT TERM
mkdir -p "${workdir}" "$(dirname "${logfile}")"
exec &> >(tee -i -a "${logfile}")
}

204
bibliotheca/Taskfile.yml Normal file
View File

@ -0,0 +1,204 @@
version: '3'
tasks:
main:
desc: Konvertierung von BIBLIOTHECA nach PICA3/CSV
vars:
DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name
cmds:
- task: refine-pre
vars: {PROJECT: bautzen}
- task: refine-pre
vars: {PROJECT: breitenbrunn}
- task: refine-pre
vars: {PROJECT: dresden}
- task: refine-pre
vars: {PROJECT: glauchau}
# - task: refine-pre
# vars: {PROJECT: plauen}
- task: refine-main
refine-pre:
dir: ./{{.DIR}}
label: '{{.TASK}}-{{.PROJECT}}'
vars:
DIR: '{{splitList ":" .TASK | first}}'
PORT: 3334 # assign a different port for each project
RAM: 8192M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
cmds:
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # Import
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/{{.PROJECT}}.imp)"
--encoding ISO-8859-1
--ignoreLines 1
--storeBlankRows false
--projectName {{.PROJECT}}
> {{.LOG}}
- > # spec_Z_03: Makulierte Medien löschen; löscht alle Titel und deren Exemplare, die nur makulierte Ex. enthalten; löscht dann alle verbliebenen makulierten Ex.
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/makuliert.json > {{.LOG}}
- > # ACQ Datensätze löschen; löscht alle Titel und deren Exemplare, die das Kennzeichen ACQ enthalten; löscht dann alle verbliebenen Exemplare mit Kennzeichen ACQ
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/acq.json > {{.LOG}}
- > # Mehrzeilige Inhalte extrahieren
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/mehrzeiliges-extrahieren.json > {{.LOG}}
- > # Leerzeilen löschen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/leerzeilen.json > {{.LOG}}
- > # Felder und Werte aufteilen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/feld-wert-separieren.json > {{.LOG}}
- > # Mehrzeilige Inhalte (mit #) zusammenführen; Trennzeichen: U+241F
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/mehrzeiliges-zusammen.json > {{.LOG}}
- > # Feldnamen um M| oder E| ergänzen, weil gleiche Feldnamen in Medien und Exemplaren vorkommen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/feldname-prefix.json > {{.LOG}}
- > # Mehrfachbelegungen zusammenführen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/mehrfachbelegungen.json > {{.LOG}}
- > # Titeldaten-Felder mit Zahlen löschen (außer 025z 026 026k 052 076b 076d)
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/reduzieren.json > {{.LOG}}
- > # Transponieren
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/transponieren.json > {{.LOG}}
- mkdir -p output
- > # Export
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/{{.PROJECT}}.tsv)"
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :kill # shut down OpenRefine immediately to save time and disk space
vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}'}
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- Taskfile.yml
- input/{{.PROJECT}}.imp
- config/pre/**
generates:
- output/{{.PROJECT}}.tsv
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
refine-main:
dir: ./{{.DIR}}
vars:
DIR: '{{splitList ":" .TASK | first}}'
PROJECT: bibliotheca
PORT: 3334 # assign a different port for each project
RAM: 8192M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
cmds:
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # Zip-Archiv mit Output der Vorverarbeitung erstellen
zip -j tmp.zip
output/bautzen.tsv
output/breitenbrunn.tsv
output/dresden.tsv
output/glauchau.tsv
# output/plauen.tsv
- > # Import Zip-Archiv
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m tmp.zip)"
--format tsv
--includeFileSources true
--projectName {{.PROJECT}}
> {{.LOG}}
&& rm tmp.zip
- > # Spalten sortieren: Beginnen mit 1. M|MEDNR, 2. E|EXNR, 3. File, damit Records-Mode erhalten bleibt
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}}
- > # spec_Z_01: E-Books löschen (Bautzen)
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/ebooks.json > {{.LOG}}
- > # spec_Z_02: Zeitschriften und Teile von MTM löschen; siehe auch Spezifikation in CBS-Titeldaten Bibliotheca
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/zeitschriften.json > {{.LOG}}
- > # Bibliothekskürzel aus Import-Dateiname
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
- > # spec_B_T_01: PPNs in 0100 (K10plus) und 0110 (SWB); 8-stellige aus Dresden sind SWN ohne Prüfziffer, dort wird Prüfziffer ergänzt; Zuordnung 9-stellige abhängig von ersten Zeichen und M026 / M026k; Zuordnung 10-stellige abhängig von erstem Zeichen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0100-0110.json > {{.LOG}}
- > # spec_B_T_49: Nummern aus Datenkonversion 2199
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/2199.json > {{.LOG}}
- > # spec_B_E_15: Abteilungsnummer 7100j
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100j.json > {{.LOG}}
- > # spec_B_E_13, spec_Z_03 und spec_B_E_08: Zweigstelle 7100f
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100f.json > {{.LOG}}
- > # spec_B_E_07: Standort 7100a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
- > # spec_B_T_04, spec_B_T_05: ISBN 2000
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/2000.json > {{.LOG}}
# TODO: ISMN in 2020
- > # spec_B_E_10: Zugangsdatum E0XX
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/e0xx.json > {{.LOG}}
- > # spec_B_E_14, spec_Z_03, spec_B_E16: Selektionsschlüssel E0XXb
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/e0xxb.json > {{.LOG}}
# TODO: Selektionsschlüssel für Abschlussarbeiten
- > # spec_B_T_56_1: Gattung/Status 0500 und Veröffentlichungsart 1140
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0500-1140.json > {{.LOG}}
# TODO: ART = S
- > # spec_B_T_56_2: F/f für Überordnungen 0500
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0500-ueber.json > {{.LOG}}
- > # spec_B_T_56_3: Lax für Abschlussarbeiten 0500
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0500-lax.json > {{.LOG}}
- > # spec_B_T_50, spec_B_T_51, spec_B_T_52, spec_B_T_56: IMD-Felder 0501a, 0501b, 0502a, 0502b, 0503a, 0503b, 0999
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0501-0502-0503-0999.json > {{.LOG}}
- > # spec_B_T_17: Haupttitel 4000a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4000a.json > {{.LOG}}
- > # spec_B_T_18: Titelzusatz 4000d
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4000d.json > {{.LOG}}
- > # spec_B_T_20: Ausgabevermerk 4020a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4020a.json > {{.LOG}}
- > # spec_B_T_16: Verlagsname 4030n
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4030n.json > {{.LOG}}
- > # spec_B_T_21: Erscheinungsort 4030p
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4030p.json > {{.LOG}}
- > # spec_B_T_22: Umfang 4060a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4060a.json > {{.LOG}}
- > # spec_B_E_02: Verbuchungsnummer 8200
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8200.json > {{.LOG}}
- > # spec_B_T_02: Jahresangaben 1100a und 1100n; 1100a normiert mit zahlreichen Ersetzungen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/1100a-1100n.json > {{.LOG}}
# TODO: Jahr (Ende) in Sortierform in 1100b
- > # spec_B_E_01: Ausleihhinweis 8515; nur für Bautzen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8515.json > {{.LOG}}
- > # spec_B_E_04, spec_B_E_05 und spec_B_E_08: Exemplarstatus 7100d
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100d.json > {{.LOG}}
- > # spec_B_E_06: Mediengruppe 8011
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8011.json > {{.LOG}}
- > # spec_B_E_11 und spec_B_E_12: Zugangsnummer 8100
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8100.json > {{.LOG}}
- > # spec_B_T_03: Sprachcode 1500
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/1500.json > {{.LOG}}
- > # spec_B_T_54: Text für Abschlussarbeiten 1131
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/1131.json > {{.LOG}}
- > # spec_B_T_55: Text für Abschlussarbeiten 8600
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8600.json > {{.LOG}}
# Die folgende Transformationsregel muss direkt vor dem Export stehen
- > # Abschließend Titel ohne Exemplare löschen; Mehrteilige Monografien sollen stehen bleiben, daher nur: wenn BANDB und BANDN nicht leer
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/abschluss.json > {{.LOG}}
- > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/{{.PROJECT}}.csv)"
--template "$(< config/main/template.txt)"
--rowSeparator ""
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- Taskfile.yml
- output/*.tsv
- config/main/**
generates:
- log/{{.PROJECT}}.openrefine.tar.gz
- output/{{.PROJECT}}.csv
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
default: # enable standalone execution (running `task` in project directory)
cmds:
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"

View File

@ -0,0 +1,719 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 8,
"l": "8"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "M|IDNR",
"expression": "grel:value + with(11 - mod(sum(forRange(0,9,1,i,toNumber(value[i])*(9-i))),11),pz,if(pz == 11, '0', if(pz == 10, 'X', pz)))",
"onError": "set-to-blank",
"newColumnName": "0110",
"columnInsertIndex": 4
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 9,
"l": "9"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value[0,2]",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "53",
"l": "53"
}
},
{
"v": {
"v": "54",
"l": "54"
}
},
{
"v": {
"v": "55",
"l": "55"
}
},
{
"v": {
"v": "56",
"l": "56"
}
},
{
"v": {
"v": "57",
"l": "57"
}
},
{
"v": {
"v": "13",
"l": "13"
}
},
{
"v": {
"v": "14",
"l": "14"
}
},
{
"v": {
"v": "58",
"l": "58"
}
},
{
"v": {
"v": "15",
"l": "15"
}
},
{
"v": {
"v": "59",
"l": "59"
}
},
{
"v": {
"v": "16",
"l": "16"
}
},
{
"v": {
"v": "17",
"l": "17"
}
},
{
"v": {
"v": "18",
"l": "18"
}
},
{
"v": {
"v": "19",
"l": "19"
}
},
{
"v": {
"v": "21",
"l": "21"
}
},
{
"v": {
"v": "22",
"l": "22"
}
},
{
"v": {
"v": "23",
"l": "23"
}
},
{
"v": {
"v": "24",
"l": "24"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "M|IDNR",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "0100",
"columnInsertIndex": 4
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 9,
"l": "9"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value[0,1]",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "6",
"l": "6"
}
},
{
"v": {
"v": "7",
"l": "7"
}
},
{
"v": {
"v": "8",
"l": "8"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:cells['M|IDNR'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 9,
"l": "9"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value[0,2]",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "00",
"l": "00"
}
},
{
"v": {
"v": "10",
"l": "10"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:cells['M|IDNR'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 9,
"l": "9"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "0100",
"expression": "isBlank(value)",
"columnName": "0100",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "0110",
"expression": "isBlank(value)",
"columnName": "0110",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|026",
"expression": "grel:value[0,3]",
"columnName": "M|026",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "GBV",
"l": "GBV"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|026k",
"expression": "grel:value == cells['M|IDNR'].value",
"columnName": "M|026k",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:cells['M|IDNR'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 9,
"l": "9"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "0100",
"expression": "isBlank(value)",
"columnName": "0100",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "0110",
"expression": "isBlank(value)",
"columnName": "0110",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|026",
"expression": "grel:value[0,3]",
"columnName": "M|026",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "HBZ",
"l": "HBZ"
}
},
{
"v": {
"v": "KXP",
"l": "KXP"
}
},
{
"v": {
"v": "OBV",
"l": "OBV"
}
},
{
"v": {
"v": "DNB",
"l": "DNB"
}
},
{
"v": {
"v": "BVB",
"l": "BVB"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|026k",
"expression": "isBlank(value)",
"columnName": "M|026k",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:cells['M|IDNR'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 9,
"l": "9"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "0100",
"expression": "isBlank(value)",
"columnName": "0100",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "0110",
"expression": "isBlank(value)",
"columnName": "0110",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:cells['M|IDNR'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 10,
"l": "10"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value[0]",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "1",
"l": "1"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:cells['M|IDNR'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value.length()",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": 10,
"l": "10"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|IDNR",
"expression": "grel:value[0]",
"columnName": "M|IDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "9",
"l": "9"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:cells['M|IDNR'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,158 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDGR",
"expression": "value",
"columnName": "M|MEDGR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BAC",
"l": "BAC"
}
},
{
"v": {
"v": "DIP",
"l": "DIP"
}
},
{
"v": {
"v": "DI",
"l": "DI"
}
},
{
"v": {
"v": "MA",
"l": "MA"
}
},
{
"v": {
"v": "BA",
"l": "BA"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "7100f",
"expression": "value",
"columnName": "7100f",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BB",
"l": "BB"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "E0XXb",
"expression": "grel:'d' + value[1,3]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDGR",
"expression": "value",
"columnName": "M|MEDGR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "DI",
"l": "DI"
}
},
{
"v": {
"v": "MA",
"l": "MA"
}
},
{
"v": {
"v": "BA",
"l": "BA"
}
},
{
"v": {
"v": "BAC",
"l": "BAC"
}
},
{
"v": {
"v": "DIP",
"l": "DIP"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "7100f",
"expression": "value",
"columnName": "7100f",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "EH",
"l": "EH"
}
},
{
"v": {
"v": "EH-Theke",
"l": "EH-Theke"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "E0XXb",
"expression": "grel:'n' + value[1,3]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

View File

@ -0,0 +1,34 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|NRPRE",
"expression": "grel:forEach(value.cross('bibliotheca','M|MEDNR'),r,if(and(r.cells['File'].value == cells['File'].value,or(isNonBlank(cells['M|BANDB'].value),isNonBlank(cells['M|BANDN'].value))),'vorhanden','fehlt')).inArray('vorhanden')",
"columnName": "M|NRPRE",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0500",
"expression": "grel:if(isNonBlank(cells['M|HST'].value), value[0] + 'F' + value[2] ,value[0] + 'f' + value[2])",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,139 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDNR",
"expression": "isBlank(value)",
"columnName": "M|MEDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "M|JAHR",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "1100n",
"columnInsertIndex": 3
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDNR",
"expression": "isBlank(value)",
"columnName": "M|MEDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "M|JAHR",
"expression": "grel:with(with(with(value.replace('[','').replace(']','').replace('(','').replace(')','').replace(' ','').replace('?','').replace('.','').replace('ca','').replace('c','').replace('ff',''),x,forNonBlank(x.split('/')[1],v,v,x)),y,y.split('-')[0]),z,if(and(z.length()==4,isNumeric(z)),z,if(z=='19XX','19XX',null))))",
"onError": "set-to-blank",
"newColumnName": "1100a",
"columnInsertIndex": 3
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDNR",
"expression": "isBlank(value)",
"columnName": "M|MEDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "1100a",
"expression": "isBlank(value)",
"columnName": "1100a",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "1100a",
"expression": "grel:if(cells['M|JAHR'].value.contains('19'),'19XX','20XX')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "M|JAHR",
"columnName": "M|JAHR",
"query": "-",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
},
"baseColumnName": "M|JAHR",
"expression": "grel:value.split('-')[1].replace('[','').replace(']','').replace('(','').replace(')','').replace(' ','').replace('?','').replace('.','')",
"onError": "set-to-blank",
"newColumnName": "1100b",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,89 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDGR",
"expression": "value",
"columnName": "M|MEDGR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BAC",
"l": "BAC"
}
},
{
"v": {
"v": "DI",
"l": "DI"
}
},
{
"v": {
"v": "DIP",
"l": "DIP"
}
},
{
"v": {
"v": "MA",
"l": "MA"
}
},
{
"v": {
"v": "BA",
"l": "BA"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "7100f",
"expression": "value",
"columnName": "7100f",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BB",
"l": "BB"
}
},
{
"v": {
"v": "EH",
"l": "EH"
}
},
{
"v": {
"v": "EH-Theke",
"l": "EH-Theke"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "File",
"expression": "grel:'Hochschulschrift'",
"onError": "set-to-blank",
"newColumnName": "1131",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,77 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|SPRA",
"expression": "grel:forEach(value.split(/,|#|\\+|;/),v,forNonBlank(v.replace('.','').replace('-','').replace(' ','').\nreplace(/^arab$/,'ara').\nreplace(/^Arabisch$/,'ara').\nreplace(/^aram$/,'arc').\nreplace(/^daen$/,'dan').\nreplace(/^Deutsch$/,'ger').\nreplace(/^DEUTSCH$/,'ger').\nreplace(/^deutsch$/,'ger').\nreplace(/^dt$/,'ger').\nreplace(/^engl$/,'eng').\nreplace(/^Englisch$/,'eng').\nreplace(/^ENGLISCH$/,'eng').\nreplace(/^englisch$/,'eng').\nreplace(/^Finnisch$/,'fin').\nreplace(/^franz$/,'fre').\nreplace(/^Französisch$/,'fre').\nreplace(/^griech$/,'gre').\nreplace(/^hebr$/,'heb').\nreplace(/^hrv$/,'').\nreplace(/^ital$/,'ita').\nreplace(/^Italienisch$/,'ita').\nreplace(/^ITALIENISCH$/,'ita').\nreplace(/^Litauisch$/,'lit').\nreplace(/^n$/,'').\nreplace(/^Niederländisch$/,'dut').\nreplace(/^pers$/,'per').\nreplace(/^poln$/,'pol').\nreplace(/^Polnisch$/,'pol').\nreplace(/^polygl$/,'mul').\nreplace(/^portug$/,'por').\nreplace(/^Portugiesisch$/,'por').\nreplace(/^Portugisisch$/,'por').\nreplace(/^ru$/,'rus').\nreplace(/^Rumänisch$/,'rum').\nreplace(/^russ$/,'rus').\nreplace(/^Russisch$/,'rus').\nreplace(/^schwed$/,'swe').\nreplace(/^Schwedisch$/,'swe').\nreplace(/^slowak$/,'slo').\nreplace(/^sp$/,'spa').\nreplace(/^span$/,'spa').\nreplace(/^Spanisch$/,'spa').\nreplace(/^tschech$/,'cze').\nreplace(/^Tschechisch$/,'cze').\nreplace(/^tuerk$/,'tur').\nreplace(/^Türkisch$/,'tur').\nreplace(/^Ukrainisch$/,'ukr').\nreplace(/^ungar$/,'hun').\nreplace(/^Ungarisch$/,'hun')\n,x,x,null)).join('␟')",
"onError": "set-to-blank",
"newColumnName": "1500",
"columnInsertIndex": 3
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "1500",
"expression": "grel:forEachIndex(value.split('␟'),i,v,if(i != 0, if(inArray(value.split('␟')[0,i],v),null,v), v)).join('␟')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDNR",
"expression": "isBlank(value)",
"columnName": "M|MEDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "1500",
"expression": "isBlank(value)",
"columnName": "1500",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "1500",
"expression": "grel:'und'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "record-based"
},
"baseColumnName": "M|ISBN",
"expression": "grel:[ forNonBlank(cells['M|ISBN'].value,v,if(isNumeric(v[0]),v,null),null), forNonBlank(cells['M|ISBN2'].value,v,if(isNumeric(v[0]),v,null),null) ].uniques().join('␟').replace('-','').toUppercase()",
"onError": "set-to-blank",
"newColumnName": "2000",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|MEDNR",
"expression": "grel:'BA' + cells['File'].value + value",
"onError": "set-to-blank",
"newColumnName": "2199",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,65 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "record-based"
},
"baseColumnName": "M|HST",
"expression": "grel:if(value.contains('¬'),with(value.split('¬'), v, v[0].trim() + ' @' + v[1].trim()),value)",
"onError": "set-to-blank",
"newColumnName": "4000a",
"columnInsertIndex": 3
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "4000a",
"expression": "isBlank(value)",
"columnName": "4000a",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "0500",
"expression": "grel:value[1]",
"columnName": "0500",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "a",
"l": "a"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "4000a",
"expression": "grel:'Titel fehlt'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|HSTZU",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "4000d",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|AUFL",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "4020a",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|VERL",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "4030n",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|VORT",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "4030p",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|UMF",
"expression": "grel:value.split(' : ')[0]",
"onError": "set-to-blank",
"newColumnName": "4060a",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|STA1",
"expression": "grel:value.replace('␟',' ').replace(/ +/,' ')",
"onError": "set-to-blank",
"newColumnName": "7100a",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,878 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "H",
"l": "H"
}
},
{
"v": {
"v": "I",
"l": "I"
}
},
{
"v": {
"v": "T",
"l": "T"
}
},
{
"v": {
"v": "U",
"l": "U"
}
},
{
"v": {
"v": "V",
"l": "V"
}
},
{
"v": {
"v": "v",
"l": "v"
}
},
{
"v": {
"v": "Z",
"l": "Z"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "E|EXSTA",
"expression": "grel:'u'",
"onError": "set-to-blank",
"newColumnName": "7100d",
"columnInsertIndex": 3
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "A",
"l": "A"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "PL",
"l": "PL"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'z'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "A",
"l": "A"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "DD",
"l": "DD"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'a'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "B",
"l": "B"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'a'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "G",
"l": "G"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'g'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "K",
"l": "K"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'i'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "N",
"l": "N"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'u'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "P",
"l": "P"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BB",
"l": "BB"
}
},
{
"v": {
"v": "GC",
"l": "GC"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'s'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "P",
"l": "P"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "DD",
"l": "DD"
}
},
{
"v": {
"v": "BZ",
"l": "BZ"
}
},
{
"v": {
"v": "PL",
"l": "PL"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'i'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "W",
"l": "W"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "PL",
"l": "PL"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'c'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "W",
"l": "W"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "DD",
"l": "DD"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'z'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "W",
"l": "W"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BB",
"l": "BB"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'z'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "w",
"l": "w"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BB",
"l": "BB"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'z'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E",
"l": "E"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "E|ESORG",
"expression": "value",
"columnName": "E|ESORG",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "P",
"l": "P"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'i'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E",
"l": "E"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "E|ESORG",
"expression": "value",
"columnName": "E|ESORG",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "K",
"l": "K"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'u'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E",
"l": "E"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "E|ESORG",
"expression": "value",
"columnName": "E|ESORG",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "W",
"l": "W"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'c'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXSTA",
"expression": "value",
"columnName": "E|EXSTA",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E",
"l": "E"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "7100d",
"expression": "isBlank(value)",
"columnName": "7100d",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'u'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|STA2",
"expression": "value",
"columnName": "E|STA2",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "MAV",
"l": "MAV"
}
},
{
"v": {
"v": "eFlex",
"l": "eFlex"
}
},
{
"v": {
"v": "Verwaltung",
"l": "Verwaltung"
}
},
{
"v": {
"v": "Tonwerkstatt",
"l": "Tonwerkstatt"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100d",
"expression": "grel:'i'",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "File",
"expression": "grel:with(if(value=='DD',forNonBlank(cells['E|ZWGST'].value,v,v,value),value),x,x.replace('BB','0002').replace('BZ','0001').replace('DD','0003').replace('EH','0008').replace('GC','0004').replace('PL','0007'))",
"onError": "set-to-blank",
"newColumnName": "7100j",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|MEDGR",
"expression": "grel:'MEDGR: ' + value",
"onError": "set-to-blank",
"newColumnName": "8011",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,36 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|ZUNR",
"expression": "grel:cells['File'].value + ' ' + value.replace('-','/')",
"onError": "set-to-blank",
"newColumnName": "8100",
"columnInsertIndex": 3
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "E|ZUS",
"columnName": "E|ZUS",
"query": "Notation",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
},
"columnName": "8100",
"expression": "grel:value + ' ' + cells['E|ZUS'].value.replace('Notation||','')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|BARCO",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "8200",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,34 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BZ",
"l": "BZ"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "E|AUHIN",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "8515",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,89 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDGR",
"expression": "value",
"columnName": "M|MEDGR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BAC",
"l": "BAC"
}
},
{
"v": {
"v": "DI",
"l": "DI"
}
},
{
"v": {
"v": "DIP",
"l": "DIP"
}
},
{
"v": {
"v": "MA",
"l": "MA"
}
},
{
"v": {
"v": "BA",
"l": "BA"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "7100f",
"expression": "value",
"columnName": "7100f",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "BB",
"l": "BB"
}
},
{
"v": {
"v": "EH",
"l": "EH"
}
},
{
"v": {
"v": "EH-Theke",
"l": "EH-Theke"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "File",
"expression": "grel:'LOKMAT: Lah'",
"onError": "set-to-blank",
"newColumnName": "8600",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,68 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "isBlank(value)",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "E0XX",
"expression": "isBlank(value)",
"columnName": "E0XX",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|BANDB",
"expression": "grel:or(isNonBlank(cells['M|BANDB'].value), isNonBlank(cells['M|BANDN'].value))",
"columnName": "M|BANDB",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"description": "Remove rows"
}
]

View File

@ -0,0 +1,34 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXNR",
"expression": "isBlank(value)",
"columnName": "E|EXNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "E|ZUDAT",
"expression": "grel:forNonBlank(value,v,v[0,2] + '-' + v[3,5] + '-' + v[8,10],'22-07-20')",
"onError": "set-to-blank",
"newColumnName": "E0XX",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,34 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|EXNR",
"expression": "isBlank(value)",
"columnName": "E|EXNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "File",
"expression": "grel:with(if(value=='DD',forNonBlank(cells['E|ZWGST'].value,v,v,value),value),x,'n'+x.toLowercase())",
"onError": "set-to-blank",
"newColumnName": "E0XXb",
"columnInsertIndex": 3
}
]

View File

@ -0,0 +1,29 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|MEDGR",
"expression": "value",
"columnName": "M|MEDGR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "eBook",
"l": "eBook"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
}
}
]

View File

@ -0,0 +1,14 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "File",
"expression": "grel:with([ ['bautzen.tsv','BZ'], ['breitenbrunn.tsv','BB'], ['dresden.tsv','DD'], ['glauchau.tsv','GC'], ['plauen.tsv','PL'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

View File

@ -0,0 +1,17 @@
[
{
"op": "core/column-move",
"columnName": "File",
"index": 0
},
{
"op": "core/column-move",
"columnName": "E|EXNR",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|MEDNR",
"index": 0
}
]

View File

@ -0,0 +1,87 @@
{{
with(
[
'2199',
'0100',
'0110',
'0500',
'0501a',
'0501b',
'0502a',
'0502b',
'0503a',
'0503b',
'0999',
'1100a',
'1100b',
'1100n',
'1131',
'1140',
'1500',
'2000',
'4000a',
'4000d',
'4020a',
'4030n',
'4030p',
'4060a',
'7100j',
'7100f',
'7100a',
'7100d',
'8011',
'8100',
'8200',
'8515',
'8600',
'E0XX',
'E0XXb'
],
columns,
if(
row.index == 0,
forEach(
columns,
cn,
cn.escape('csv')
).join(',')
+ '\n'
+ with(
forEach(
columns,
cn,
forNonBlank(
cells[cn].value,
v,
v.escape('csv'),
'␀'
)
).join(',').replace('␀',''),
r,
if(
isNonBlank(r.split(',').join(',')),
r + '\n',
''
)
),
with(
forEach(
columns,
cn,
forNonBlank(
cells[cn].value,
v,
v.escape('csv'),
'␀'
)
).join(',').replace('␀',''),
r,
if(
isNonBlank(r.split(',').join(',')),
r + '\n',
''
)
)
)
)
}}

View File

@ -0,0 +1,165 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|BANDN",
"expression": "value",
"columnName": "M|BANDN",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "0",
"l": "0"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "M|BANDN",
"expression": "grel:null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|ART",
"expression": "value",
"columnName": "M|ART",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "GH",
"l": "GH"
}
},
{
"v": {
"v": "Z",
"l": "Z"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
}
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|ART",
"expression": "value",
"columnName": "M|ART",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "G",
"l": "G"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|UART",
"expression": "value",
"columnName": "M|UART",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "R",
"l": "R"
}
},
{
"v": {
"v": "Z",
"l": "Z"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "M|ART",
"expression": "value",
"columnName": "M|ART",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "G",
"l": "G"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "M|MEDNR",
"expression": "grel:forEach(value.cross('bibliotheca','M|NRPRE'),r,if(and(r.cells['File'].value == cells['File'].value, or(isNonBlank(r.cells['M|BANDB'].value),isNonBlank(r.cells['M|BANDN'].value))),'vorhanden','fehlt')).inArray('vorhanden')",
"columnName": "M|MEDNR",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": true
}
],
"mode": "row-based"
}
}
]

View File

@ -0,0 +1,99 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********M",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
},
"baseColumnName": "Column 1",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 1
},
{
"op": "core/column-move",
"columnName": "tmp",
"index": 0
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/MEKZ ./).uniques().join(''),v,v),null)",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "MEKZ ACQ",
"l": "MEKZ ACQ"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
}
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********E",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
},
"columnName": "tmp",
"expression": "grel:cells['Column 1'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "MEKZ ACQ",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
}
},
{
"op": "core/column-removal",
"columnName": "tmp"
}
]

View File

@ -0,0 +1,51 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "value",
"expression": "isBlank(value)",
"columnName": "value",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "value",
"expression": "grel:cells['Column 1'].value.slice(9)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:value[3,8]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-rename",
"oldColumnName": "Column 1",
"newColumnName": "key"
}
]

View File

@ -0,0 +1,85 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "key",
"expression": "value",
"columnName": "key",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "*****",
"l": "*****"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "value",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "typ",
"columnInsertIndex": 2
},
{
"op": "core/fill-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "typ"
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "key",
"expression": "value",
"columnName": "key",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "*****",
"l": "*****"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "key",
"expression": "grel:cells['typ'].value + '|' + value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "typ"
}
]

View File

@ -0,0 +1,29 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Blank Rows",
"expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
"columnName": "",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "true",
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
}
]

View File

@ -0,0 +1,99 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********M",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
},
"baseColumnName": "Column 1",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 1
},
{
"op": "core/column-move",
"columnName": "tmp",
"index": 0
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/EXSTA ./).uniques().join(''),v,v),null)",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "EXSTA M",
"l": "EXSTA M"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
}
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********E",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
},
"columnName": "tmp",
"expression": "grel:cells['Column 1'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "EXSTA M",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
}
},
{
"op": "core/column-removal",
"columnName": "tmp"
}
]

View File

@ -0,0 +1,16 @@
[
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "key"
},
{
"op": "core/multivalued-cell-join",
"columnName": "value",
"keyColumnName": "key",
"separator": "␟"
}
]

View File

@ -0,0 +1,46 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "^\\*",
"mode": "regex",
"caseSensitive": false,
"invert": true
}
],
"mode": "row-based"
},
"baseColumnName": "Column 1",
"expression": "grel:value.slice(1)",
"onError": "set-to-blank",
"newColumnName": "value",
"columnInsertIndex": 1
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "^\\*",
"mode": "regex",
"caseSensitive": false,
"invert": true
}
],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]

View File

@ -0,0 +1,8 @@
[
{
"op": "core/multivalued-cell-join",
"columnName": "value",
"keyColumnName": "key",
"separator": "␟"
}
]

View File

@ -0,0 +1,29 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "key",
"expression": "grel:and(isNumeric(value[2,4].trim()), not(or(value[2,6] == '025z', value[2,6] == '026 ', value[2,6] == '026k', value[2,6] == '052 ', value[2,6] == '076b', value[2,6] == '076d')))",
"columnName": "key",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
}
]

View File

@ -0,0 +1,8 @@
[
{
"op": "core/key-value-columnize",
"keyColumnName": "key",
"valueColumnName": "value",
"noteColumnName": ""
}
]

26
main.sh
View File

@ -1,26 +0,0 @@
#!/bin/bash
# Scripte zur Transformation von Bibliotheca und Alephino nach PICA+
# check and install requirements for bash-refine
source "${BASH_SOURCE%/*}/bash-refine.sh" || exit 1
requirements
# download task runner
task="$(readlink -m "${BASH_SOURCE%/*}/lib/task")"
if [[ -z "$(readlink -e "${task}")" ]]; then
echo "Download task..."
mkdir -p "$(dirname "${task}")"
curl -L --output task.tar.gz \
"https://github.com/go-task/task/releases/download/v3.0.0/task_linux_amd64.tar.gz"
tar -xzf task.tar.gz -C "$(dirname "${task}")" task --totals
rm -f task.tar.gz
fi
# make script executable from another directory
cd "${BASH_SOURCE%/*}/" || exit 1
# create folders
"${task}" mkdir
# execute default task (cf. Taskfile.yml)
"${task}"

78
pica+/Taskfile.yml Normal file
View File

@ -0,0 +1,78 @@
version: '3'
tasks:
main:
desc: PICA3/CSV aus Bibliotheca und Alephino zusammenführen, Exemplare clustern, anreichern und in PICA+ konvertieren
vars:
DIR: '{{splitList ":" .TASK | first}}'
deps:
- task: :alephino:main
- task: :bibliotheca:main
cmds:
- task: refine
refine:
dir: ./{{.DIR}}
vars:
DIR: '{{splitList ":" .TASK | first}}'
PROJECT: pica+
PORT: 3334 # assign a different port for each project
RAM: 8192M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
cmds:
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # Zip-Archiv mit Output der vorigen Tasks erstellen
zip -j tmp.zip
../bibliotheca/output/bibliotheca.csv
# ../alephino/output/alephino.csv
- > # Import ZIP-Archiv
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m tmp.zip)"
--format csv
--includeFileSources false
--projectName {{.PROJECT}}
> {{.LOG}}
&& rm tmp.zip
- > # spec_Z_04: PPN anreichern über ISBN
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/ppn.json > {{.LOG}}
- > # spec_Z_05: Exemplare clustern
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/clustern.json > {{.LOG}}
- mkdir -p output
- > # Export dubletter Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/barcodes.txt)"
--template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}"
--rowSeparator ""
> {{.LOG}}
- > # spec_Z_06: Dublette Barcodes löschen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/dedup.json > {{.LOG}}
- > # Export als PICA+
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/{{.PROJECT}}.txt)"
--template "$(< config/template.txt)"
--rowSeparator ""
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- Taskfile.yml
# - ../alephino/output/alephino.csv
- ../bibliotheca/output/bibliotheca.csv
- config/**
generates:
- log/{{.PROJECT}}.openrefine.tar.gz
- output/**
ignore_error: true # workaround to avoid an orphaned Java process on error
# https://github.com/go-task/task/issues/141
default: # enable standalone execution (running `task` in project directory)
cmds:
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"

138
pica+/config/clustern.json Normal file
View File

@ -0,0 +1,138 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "isBlank(value)",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "2199",
"expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)",
"onError": "set-to-blank",
"newColumnName": "ppn",
"columnInsertIndex": 1
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "ppn",
"expression": "isBlank(value)",
"columnName": "ppn",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "ppn",
"expression": "grel:row.record.cells[columnName].value[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-reorder",
"mode": "record-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "ppn",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
}
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "ppn",
"expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 0
},
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "id",
"expression": "isBlank(value)",
"columnName": "id",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
},
"columnName": "2199",
"expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "ppn"
}
]

35
pica+/config/dedup.json Normal file
View File

@ -0,0 +1,35 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "8200",
"expression": "facetCount(value, 'value', '8200') > 1",
"columnName": "8200",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "8200",
"expression": "null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column 8200 using expression null"
}
]

292
pica+/config/ppn.json Normal file
View File

@ -0,0 +1,292 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "2000",
"expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 3
},
{
"op": "core/column-split",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "tmp",
"guessCellType": false,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "␟",
"regex": false,
"maxColumns": 0
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "tmp 1"
},
{
"op": "core/column-removal",
"columnName": "tmp 2"
}
]

33
pica+/config/template.txt Normal file
View File

@ -0,0 +1,33 @@
{{
if(row.index - row.record.fromRowIndex == 0,
'' + '\n'
+ forNonBlank(cells['0500'].value, v, '002@ ' + '0' + v + '\n', '')
+ forNonBlank(cells['0501a'].value, v, '002C ' + 'a' + v + forNonBlank(cells['0501b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0502a'].value, v, '002D ' + 'a' + v + forNonBlank(cells['0502b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0503a'].value, v, '002E ' + 'a' + v + forNonBlank(cells['0503b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0100'].value, v, '003@ ' + '0' + v + '\n', '')
+ forNonBlank(cells['0110'].value, v, '003S ' + '0' + v + '\n', '')
+ forNonBlank(cells['2000'].value, v, forEach(v.split('␟'),x,'004A ' + '0' + x + '\n').join(''), '')
+ forNonBlank(cells['2199'].value, v, forEach(v.split('␟'),x,'006Y ' + '0' + x + '\n').join(''), '')
+ forNonBlank(cells['1500'].value, v, '010@ ' + forEach(v.split('␟'),x,'a' + x).join('') + '\n', '')
+ forNonBlank(cells['1100a'].value, v, '011@ ' + 'a' + v + forNonBlank(cells['1100b'].value, v, 'b' + v, '') + forNonBlank(cells['1100n'].value, v, 'n' + v, '') + '\n', '')
+ forNonBlank(cells['1131'].value, v, '013D ' + 'a' + v + '\n', '')
+ forNonBlank(cells['1140'].value, v, '013H ' + 'a' + v + '\n', '')
+ forNonBlank(cells['4000a'].value, v, '021A ' + 'a' + v + forNonBlank(cells['4000d'].value, v, 'd' + v, '') + '\n', '')
+ forNonBlank(cells['4020a'].value, v, '032@ ' + 'a' + v + '\n', '')
+ if(or(isNonBlank(cells['4030n'].value),isNonBlank(cells['4030p'].value)),'033A ' + forNonBlank(cells['4030n'].value, v, 'n' + v,'') + forNonBlank(cells['4030p'].value, v, 'p' + v, '') + '\n', '')
+ forNonBlank(cells['4060a'].value, v, '034D ' + 'a' + v + '\n', '')
+ forNonBlank(cells['0999'].value, v, '046W ' + 'a' + v + '\n', '')
,'')
}}{{
if(isNonBlank(cells['E0XXb'].value),
with(with(rowIndex - row.record.fromRowIndex + 1, i, '00'[0,2-i.length()] + i),exnr,
'208@/' + exnr + ' a' + cells['E0XX'].value + 'b' + cells['E0XXb'].value + '\n'
+ '209A/' + exnr + ' b4736' + 'j' + cells['7100j'].value + 'f' + cells['7100f'].value + forNonBlank(cells['7100a'].value, v, 'a' + v, '') + forNonBlank(cells['7100d'].value, v, 'd' + v, '') + 'x00' + '\n'
+ forNonBlank(cells['8011'].value, v, '209B/' + exnr + ' a' + v + 'x11' + '\n', '')
+ forNonBlank(cells['8100'].value, v, '209C/' + exnr + ' a' + v + 'x00' + '\n', '')
+ forNonBlank(cells['8200'].value, v, '209G/' + exnr + ' a' + v + '\n', '')
+ forNonBlank(cells['8600'].value, v, '209O/' + exnr + ' a' + v + 'x00' + '\n', '')
+ forNonBlank(cells['8515'].value, v, '220B/' + exnr + ' a' + v + '\n', '')
), '')
}}

View File

@ -1,868 +0,0 @@
#!/bin/bash
# Alephino Vorverarbeitung
# - Exporte (Titel und Exemplare) von einer der Bibliotheken importieren
# - in Tabellenformat umwandeln
# - Exemplarinformationen an Titel anhängen
# - als TSV exportieren
# =============================== ENVIRONMENT ================================ #
# source the main script
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
# read input
if [[ $2 ]]; then
titel="$(basename "$1" .txt)"
projects[$titel]="$(readlink -e "$1")"
exemplare="$(basename "$2" .txt)"
projects[$exemplare]="$(readlink -e "$2")"
else
echo 1>&2 "Please provide path to input files (1. Titel, 2. Exemplare)"; exit 1
fi
# check requirements, set trap, create workdir and tee to logfile
init
# ================================= STARTUP ================================== #
checkpoint "Startup"; echo
# print environment variables
printenv | grep REFINE; echo
# start OpenRefine server
refine_start; echo
# ================================== IMPORT ================================== #
checkpoint "Import"; echo
# Fixed-width text files
# Columns: 5
# Character encoding: UTF-8
# Store blank rows deaktivieren
echo "import file" "${projects[$titel]}" "..."
if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@${projects[$titel]}" \
--form project-name="${titel}" \
--form format="text/line-based/fixed-width" \
--form options='{
"encoding":"UTF-8",
"columnWidths":[5],
"ignoreLines":-1,
"headerLines":0,
"skipDataLines":0,
"limit":-1,
"guessCellValueTypes":false,
"storeBlankRows":false,
"storeBlankCellsAsNulls":true,
"includeFileSources":false
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${titel}.id"
then
log "imported ${projects[$titel]} as ${titel}"
else
error "import of ${projects[$titel]} failed!"
fi
refine_store "${titel}" "${workdir}/${titel}.id" || error "import of ${titel} failed!"
echo
echo "import file" "${projects[$exemplare]}" "..."
if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@${projects[$exemplare]}" \
--form project-name="${exemplare}" \
--form format="text/line-based/fixed-width" \
--form options='{
"encoding":"UTF-8",
"columnWidths":[5],
"ignoreLines":-1,
"headerLines":0,
"skipDataLines":0,
"limit":-1,
"guessCellValueTypes":false,
"storeBlankRows":false,
"storeBlankCellsAsNulls":true,
"includeFileSources":false
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${exemplare}.id"
then
log "imported ${projects[$exemplare]} as ${exemplare}"
else
error "import of ${projects[$exemplare]} failed!"
fi
refine_store "${exemplare}" "${workdir}/${exemplare}.id" || error "import of ${exemplare} failed!"
echo
# ================================ TRANSFORM ================================= #
checkpoint "Transform"; echo
# --------------------------- Korrekturen Einzelfälle ------------------------ #
echo "Korrekturen Einzelfälle..."
if curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/mass-edit",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "value",
"edits": [
{
"from": [
"001st"
],
"fromBlank": false,
"fromError": false,
"to": "001"
}
],
"description": "Mass edit cells in column Column 1"
}
]
JSON
then
log "transformed ${titel} (${projects[$titel]})"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
# ----------------------- Feldnamen um M bzw. E ergänzen --------------------- #
echo "Feldnamen um M bzw. E ergänzen..."
if curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:'M|' + value.replace(' ','')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column Column 1 using expression grel:'M|' + value.replace(' ','')"
}
]
JSON
then
log "transformed ${titel} (${projects[$titel]})"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
if curl -fs \
--data project="${projects[$exemplare]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:'E|' + value.replace(' ','')",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column Column 1 using expression grel:'E|' + value.replace(' ','')"
}
]
JSON
then
log "transformed ${exemplare} (${projects[$exemplare]})"
else
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
fi
echo
# -------------------------------- Sortieren --------------------------------- #
echo "Datensätze und Feldnamen sortieren..."
if curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "M|IDN",
"l": "M|IDN"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "Column 2",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 2,
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
},
{
"op": "core/column-move",
"columnName": "id",
"index": 0,
"description": "Move column id to position 0"
},
{
"op": "core/fill-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id",
"description": "Fill down cells in column id"
},
{
"op": "core/row-reorder",
"mode": "row-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "id",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
},
{
"valueType": "string",
"column": "Column 1",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
},
"description": "Reorder rows"
},
{
"op": "core/column-removal",
"columnName": "id",
"description": "Remove column id"
}
]
JSON
then
log "transformed ${titel} (${projects[$titel]})"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
if curl -fs \
--data project="${projects[$exemplare]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E|IDN",
"l": "E|IDN"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "Column 2",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 2,
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
},
{
"op": "core/column-move",
"columnName": "id",
"index": 0,
"description": "Move column id to position 0"
},
{
"op": "core/fill-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id",
"description": "Fill down cells in column id"
},
{
"op": "core/row-reorder",
"mode": "row-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "id",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
},
{
"valueType": "string",
"column": "Column 1",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
},
"description": "Reorder rows"
},
{
"op": "core/column-removal",
"columnName": "id",
"description": "Remove column id"
}
]
JSON
then
log "transformed ${exemplare} (${projects[$exemplare]})"
else
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
fi
echo
# --------------------- Mehrfachbelegungen zusammenführen -------------------- #
# - Column 1 > Edit cells > Blank down
# - Column 2 > Edit cells > join multi-valued cells... > ␟
echo "Mehrfachbelegungen zusammenführen..."
if curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"description": "Blank down cells in column Column 1"
},
{
"op": "core/multivalued-cell-join",
"columnName": "Column 2",
"keyColumnName": "Column 1",
"separator": "␟",
"description": "Join multi-valued cells in column Column 2"
}
]
JSON
then
log "transformed ${titel} (${projects[$titel]})"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
if curl -fs \
--data project="${projects[$exemplare]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"description": "Blank down cells in column Column 1"
},
{
"op": "core/multivalued-cell-join",
"columnName": "Column 2",
"keyColumnName": "Column 1",
"separator": "␟",
"description": "Join multi-valued cells in column Column 2"
}
]
JSON
then
log "transformed ${exemplare} (${projects[$exemplare]})"
else
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
fi
echo
# ---------------------- Nicht benötigte Felder löschen ---------------------- #
echo "Nicht benötigte Felder löschen..."
if curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "M|025_",
"l": "M|025_"
}
},
{
"v": {
"v": "M|025e",
"l": "M|025e"
}
},
{
"v": {
"v": "M|004",
"l": "M|004"
}
},
{
"v": {
"v": "M|011",
"l": "M|011"
}
},
{
"v": {
"v": "M|026_",
"l": "M|026_"
}
},
{
"v": {
"v": "M|026a",
"l": "M|026a"
}
},
{
"v": {
"v": "M|026d",
"l": "M|026d"
}
},
{
"v": {
"v": "M|026g",
"l": "M|026g"
}
},
{
"v": {
"v": "M|030",
"l": "M|030"
}
},
{
"v": {
"v": "M|037z",
"l": "M|037z"
}
},
{
"v": {
"v": "M|038b",
"l": "M|038b"
}
},
{
"v": {
"v": "M|070",
"l": "M|070"
}
},
{
"v": {
"v": "M|073",
"l": "M|073"
}
},
{
"v": {
"v": "M|076z",
"l": "M|076z"
}
},
{
"v": {
"v": "M|080",
"l": "M|080"
}
},
{
"v": {
"v": "M|800s",
"l": "M|800s"
}
},
{
"v": {
"v": "M|802",
"l": "M|802"
}
},
{
"v": {
"v": "M|808b",
"l": "M|808b"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "^M\\|9",
"mode": "regex",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
}
}
]
JSON
then
log "transformed ${titel} (${projects[$titel]})"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
if curl -fs \
--data project="${projects[$exemplare]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "value",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "E|A02",
"l": "E|A02"
}
},
{
"v": {
"v": "E|A86",
"l": "E|A86"
}
},
{
"v": {
"v": "E|SUB",
"l": "E|SUB"
}
},
{
"v": {
"v": "E|FMT",
"l": "E|FMT"
}
},
{
"v": {
"v": "E|CAT",
"l": "E|CAT"
}
},
{
"v": {
"v": "E|027",
"l": "E|027"
}
},
{
"v": {
"v": "E|123",
"l": "E|123"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
}
]
JSON
then
log "transformed ${exemplare} (${projects[$exemplare]})"
else
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
fi
echo
# ------------------------------- Transponieren ------------------------------ #
# - Column 1 > Transpose > Columnize by key/value columns... > OK
echo "Transponieren..."
if curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/key-value-columnize",
"keyColumnName": "Column 1",
"valueColumnName": "Column 2",
"noteColumnName": "",
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
}
]
JSON
then
log "transformed ${titel} (${projects[$titel]})"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
if curl -fs \
--data project="${projects[$exemplare]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/key-value-columnize",
"keyColumnName": "Column 1",
"valueColumnName": "Column 2",
"noteColumnName": "",
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
}
]
JSON
then
log "transformed ${exemplare} (${projects[$exemplare]})"
else
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
fi
echo
# ---------------------------- Titel-ID separieren --------------------------- #
echo "Titel-ID separieren..."
if curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|IDN",
"expression": "grel:value.replace(/^0+/,'')",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 12,
"description": "Create column id at index 12 based on column M|IDN using expression grel:value.replace(/^0+/,'')"
}
]
JSON
then
log "transformed ${titel} (${projects[$titel]})"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
if curl -fs \
--data project="${projects[$exemplare]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|BIB",
"expression": "grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')",
"onError": "set-to-blank",
"newColumnName": "titel_id",
"columnInsertIndex": 18,
"description": "Create column titel_id at index 18 based on column E|BIB using expression grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')"
}
]
JSON
then
log "transformed ${exemplare} (${projects[$exemplare]})"
else
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
fi
echo
# ---------------------------- Exemplare anreichern -------------------------- #
echo "Exemplare anreichern..."
columns=( "E|001" "E|002a" "E|003" "E|004" "E|027" "E|030" "E|050" "E|100" "E|115" "E|120" "E|123" "E|A02" "E|A72" "E|A73" "E|A87" "E|A91" "E|A95" "E|BIB" "E|CAT" "E|FMT" "E|IDN" "E|LDR" "E|STA" "E|SUB" "E|105" "E|107" "E|A94" "E|125" "E|072" "E|A98" "E|HOL" "E|A86" "E|A63" "E|A70" "E|A83" "E|A85" "E|ABO" "E|A97" "E|A82" "E|002" "E|ORD" )
for column in "${columns[@]}"; do
cat << JSON >> "${workdir}/${titel}.tmp"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "id",
"expression": "grel:forEach(value.cross('${exemplare}','titel_id'),r,forNonBlank(r.cells['${column}'].value,v,v,'')).join('␞')",
"onError": "set-to-blank",
"newColumnName": "${column}",
"columnInsertIndex": 13
},
{
"op": "core/multivalued-cell-split",
"columnName": "${column}",
"keyColumnName": "M|001",
"mode": "separator",
"separator": "␞",
"regex": false
}
]
JSON
done
if "${jq}" -s add "${workdir}/${titel}.tmp" | curl -fs \
--data project="${projects[$titel]}" \
--data-urlencode operations@- \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
then
log "transformed ${titel} (${projects[$titel]})"
rm "${workdir}/${titel}.tmp"
else
error "transform ${titel} (${projects[$titel]}) failed!"
fi
echo
# ================================== EXPORT ================================== #
checkpoint "Export"; echo
format="tsv"
p="${titel%%-*}" # Projektname ohne Zusatz
echo "export ${titel} to ${format} file..."
if curl -fs \
--data project="${projects[$titel]}" \
--data format="${format}" \
--data engine='{"facets":[],"mode":"row-based"}' \
"${endpoint}/command/core/export-rows" \
> "${workdir}/${p}.${format}"
then
log "exported ${titel} (${projects[$titel]}) to ${workdir}/${p}.${format}"
else
error "export of ${titel} (${projects[$titel]}) failed!"
fi
echo
# ================================== FINISH ================================== #
checkpoint "Finish"; echo
# stop OpenRefine server
refine_stop; echo
# calculate run time based on checkpoints
checkpoint_stats; echo
# word count on all files in workdir
count_output

View File

@ -1,767 +0,0 @@
#!/bin/bash
# Bibliotheca Vorverarbeitung
# - Export von einer der Bibliotheken importieren
# - in Tabellenformat umwandeln
# - als TSV exportieren
# =============================== ENVIRONMENT ================================ #
# source the main script
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
# read input
if [[ $1 ]]; then
p="$(basename "$1" .imp)"
projects[$p]="$(readlink -e "$1")"
else
echo 1>&2 "Please provide path to input file"; exit 1
fi
# check requirements, set trap, create workdir and tee to logfile
init
# ================================= STARTUP ================================== #
checkpoint "Startup"; echo
# print environment variables
printenv | grep REFINE; echo
# start OpenRefine server
refine_start; echo
# ================================== IMPORT ================================== #
checkpoint "Import"; echo
# Line-based text files
# Character encoding: ISO-8859-1
# Store blank rows deaktivieren
# ignore first 1 line(s) at the beginning of file
echo "import file" "${projects[$p]}" "..."
if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@${projects[$p]}" \
--form project-name="${p}" \
--form format="line-based" \
--form options='{
"encoding": "ISO-8859-1",
"storeBlankRows": "false",
"ignoreLines": 1
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${p}.id"
then
log "imported ${projects[$p]} as ${p}"
else
error "import of ${projects[$p]} failed!"
fi
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
echo
# ================================ TRANSFORM ================================= #
checkpoint "Transform"; echo
# ------------------------- Makulierte Medien löschen ------------------------ #
# spec_Z_03
# löscht alle Titel und deren Exemplare, die nur makulierte Ex. enthalten
# löscht dann alle verbliebenen makulierten Ex.
echo "Makulierte Medien löschen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********M",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
},
"baseColumnName": "Column 1",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 1
},
{
"op": "core/column-move",
"columnName": "tmp",
"index": 0
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/EXSTA ./).uniques().join(''),v,v),null)",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "EXSTA M",
"l": "EXSTA M"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
}
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********E",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
},
"columnName": "tmp",
"expression": "grel:cells['Column 1'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "EXSTA M",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
}
},
{
"op": "core/column-removal",
"columnName": "tmp"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# -------------------------- ACQ Datensätze löschen -------------------------- #
# spec_Z_03
# löscht alle Titel und deren Exemplare, die das Kennzeichen ACQ enthalten
# löscht dann alle verbliebenen Exemplare mit Kennzeichen ACQ
echo "ACQ Datensätze löschen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********M",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
},
"baseColumnName": "Column 1",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 1
},
{
"op": "core/column-move",
"columnName": "tmp",
"index": 0
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Column 1",
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/MEKZ ./).uniques().join(''),v,v),null)",
"columnName": "Column 1",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "MEKZ ACQ",
"l": "MEKZ ACQ"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
}
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "*********E",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "row-based"
},
"columnName": "tmp",
"expression": "grel:cells['Column 1'].value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "MEKZ ACQ",
"mode": "text",
"caseSensitive": false,
"invert": false
}
],
"mode": "record-based"
}
},
{
"op": "core/column-removal",
"columnName": "tmp"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ---------------------- Mehrzeilige Inhalte extrahieren --------------------- #
# - Column 1 > Text filter > regular expression aktivieren > ^\* > invert
# -- Column 1 > Edit column > Add column based on this column...
# > value > value.slice(1)
# -- Column 1 > Edit cells > Transform... > null
echo "Mehrzeilige Inhalte extrahieren..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "^\\*",
"mode": "regex",
"caseSensitive": false,
"invert": true
}
],
"mode": "row-based"
},
"baseColumnName": "Column 1",
"expression": "grel:value.slice(1)",
"onError": "set-to-blank",
"newColumnName": "value",
"columnInsertIndex": 1
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "text",
"name": "Column 1",
"columnName": "Column 1",
"query": "^\\*",
"mode": "regex",
"caseSensitive": false,
"invert": true
}
],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ---------------------------- Leerzeilen löschen ---------------------------- #
# - All > Facet > Facet by blank > true
# - All > Edit rows > Remove all matching rows
echo "Leerzeilen löschen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "Blank Rows",
"expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
"columnName": "",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "true",
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------ Felder und Werte aufteilen ------------------------ #
# - value > Facet > Customized facets > Facet by blank > true
# -- value > Edit cells > Transform... > cells['Column 1'].value.slice(9)
# - Column 1 > Edit cells.> Transform > value[3,8]
# - Column 1 > Edit column > Rename this column > key
echo "Felder und Werte aufteilen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "value",
"expression": "isBlank(value)",
"columnName": "value",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "value",
"expression": "grel:cells['Column 1'].value.slice(9)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "Column 1",
"expression": "grel:value[3,8]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-rename",
"oldColumnName": "Column 1",
"newColumnName": "key"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ----------------- Mehrzeilige Inhalte (mit #) zusammenführen --------------- #
# - value > Edit cells > Join multi-valued cells... > ␟
# (das ist das Unicode-Zeichen U+241F)
echo "Mehrzeilige Inhalte (mit #) zusammenführen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/multivalued-cell-join",
"columnName": "value",
"keyColumnName": "key",
"separator": "␟"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ----------------------- Feldnamen um M oder E ergänzen --------------------- #
# - key > Facet > Text facet > *****
# -- value > Edit column > Add column based on this column... > typ > value
# - typ > Edit cells > Fill down
# - key > Facet > Text facet > *****
# -- All > Edit rows > Remove all matching rows
# - key > Edit cells > Transform... > cells['typ'].value + '|' + value
# - typ > Edit column > Remove this column
echo "Feldnamen um M oder E ergänzen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "key",
"expression": "value",
"columnName": "key",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "*****",
"l": "*****"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "value",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "typ",
"columnInsertIndex": 2
},
{
"op": "core/fill-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "typ"
},
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "key",
"expression": "value",
"columnName": "key",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "*****",
"l": "*****"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "key",
"expression": "grel:cells['typ'].value + '|' + value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "typ"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# --------------------- Mehrfachbelegungen zusammenführen -------------------- #
# - key > Edit cells > Blank down
# - value > Edit cells > join multi-valued cells... > ␟
echo "Mehrfachbelegungen zusammenführen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "key"
},
{
"op": "core/multivalued-cell-join",
"columnName": "value",
"keyColumnName": "key",
"separator": "␟"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# -------------------- Titeldaten-Felder mit Zahlen löschen ------------------ #
# außer 025z 026 026k 052 076b 076d
echo "Titeldaten-Felder mit Zahlen löschen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "key",
"expression": "grel:and(isNumeric(value[2,4].trim()), not(or(value[2,6] == '025z', value[2,6] == '026 ', value[2,6] == '026k', value[2,6] == '052 ', value[2,6] == '076b', value[2,6] == '076d')))",
"columnName": "key",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
}
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------------- Transponieren ------------------------------ #
# - key > Transpose > Columnize by key/value columns... > OK
echo "Transponieren..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/key-value-columnize",
"keyColumnName": "key",
"valueColumnName": "value",
"noteColumnName": ""
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== EXPORT ================================== #
checkpoint "Export"; echo
format="tsv"
echo "export ${p} to ${format} file..."
if curl -fs \
--data project="${projects[$p]}" \
--data format="${format}" \
--data engine='{"facets":[],"mode":"row-based"}' \
"${endpoint}/command/core/export-rows" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== FINISH ================================== #
checkpoint "Finish"; echo
# stop OpenRefine server
refine_stop; echo
# calculate run time based on checkpoints
checkpoint_stats; echo
# word count on all files in workdir
count_output

View File

@ -1,198 +0,0 @@
#!/bin/bash
# Alephino Hauptverarbeitung
# - Datenbereinigungen
# - Mapping auf PICA3
# - PICA3 als CSV (via Template) exportieren
# =============================== ENVIRONMENT ================================ #
# source the main script
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
# read input
if [[ $1 ]]; then
inputdir="$(readlink -e "$1")"
else
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
fi
# check requirements, set trap, create workdir and tee to logfile
init
# ================================= STARTUP ================================== #
checkpoint "Startup"; echo
# start OpenRefine server
refine_start; echo
# ================================== IMPORT ================================== #
checkpoint "Import"; echo
# TSV-Exporte aller Einzelprojekte in ein Zip-Archiv packen
zip -j "${workdir}/alephino.zip" "${inputdir}"/*.tsv
projects["alephino"]="${workdir}/alephino.zip"
# Neues Projekt erstellen aus Zip-Archiv
p="alephino"
echo "import file" "${projects[$p]}" "..."
if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@${projects[$p]}" \
--form project-name="${p}" \
--form format="text/line-based/*sv" \
--form options='{
"encoding": "UTF-8",
"includeFileSources": "true",
"separator": "\t"
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${p}.id"
then
log "imported ${projects[$p]} as ${p}"
else
error "import of ${projects[$p]} failed!"
fi
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
echo
# ================================ TRANSFORM ================================= #
checkpoint "Transform"; echo
# ----------------------------- Spalten sortieren ---------------------------- #
# damit Records-Mode erhalten bleibt
echo "Spalten sortieren: Beginnen mit 1. M|001, 2. E|001, 3. File..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-move",
"columnName": "File",
"index": 0
},
{
"op": "core/column-move",
"columnName": "E|001",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|029",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|026f",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|IDN",
"index": 0
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------------------ File ---------------------------------- #
echo "Bibliothekskürzel aus Import-Dateiname..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "File",
"expression": "grel:with([ ['leipzig.tsv','LE'], ['riesa.tsv','RS'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------------------ 7100a ---------------------------------- #
# spec_A_E_01
echo "Signatur..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|100",
"expression": "grel:value.split('\u001f')[0].slice(1)",
"onError": "set-to-blank",
"newColumnName": "7100a",
"columnInsertIndex": 5
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== EXPORT ================================== #
checkpoint "Export"; echo
# Export des OpenRefine-Projekts für Tests
format="openrefine.tar.gz"
echo "export ${p} to ${format} file..."
if curl -fs \
--data project="${projects[$p]}" \
"${endpoint}/command/core/export-project" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== FINISH ================================== #
checkpoint "Finish"; echo
# stop OpenRefine server
refine_stop; echo
# calculate run time based on checkpoints
checkpoint_stats; echo
# word count on all files in workdir
count_output

File diff suppressed because it is too large Load Diff

View File

@ -1,688 +0,0 @@
#!/bin/bash
# Generierung PICA+
# - PPNs anreichern und Exemplare clustern
# - als PICA+ exportieren
# =============================== ENVIRONMENT ================================ #
# source the main script
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
# read input
if [[ $1 ]]; then
inputdir1="$(readlink -e "$1")"
else
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
fi
#if [[ $2 ]]; then
# inputdir2="$(readlink -e "$2")"
#fi
# check requirements, set trap, create workdir and tee to logfile
init
# ================================= STARTUP ================================== #
checkpoint "Startup"; echo
# start OpenRefine server
refine_start; echo
# ================================== IMPORT ================================== #
checkpoint "Import"; echo
# TODO: Zusammenführung mit Alephino
zip -j "${workdir}/ba-sachsen.zip" "${inputdir1}"/*.csv
projects["ba-sachsen"]="${workdir}/ba-sachsen.zip"
# Neues Projekt erstellen aus Zip-Archiv
p="ba-sachsen"
echo "import file" "${projects[$p]}" "..."
if curl -fs --write-out "%{redirect_url}\n" \
--form project-file="@${projects[$p]}" \
--form project-name="${p}" \
--form format="text/line-based/*sv" \
--form options='{
"encoding": "UTF-8",
"includeFileSources": "false",
"separator": ","
}' \
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
> "${workdir}/${p}.id"
then
log "imported ${projects[$p]} as ${p}"
else
error "import of ${projects[$p]} failed!"
fi
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
echo
# ================================ TRANSFORM ================================= #
checkpoint "Transform"; echo
# -------------------------- PPN anreichern über ISBN ------------------------ #
# spec_Z_04
echo "PPN anreichern über ISBN..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "2000",
"expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 3
},
{
"op": "core/column-split",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "tmp",
"guessCellType": false,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "␟",
"regex": false,
"maxColumns": 0
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "tmp 1"
},
{
"op": "core/column-removal",
"columnName": "tmp 2"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ----------------------------- Exemplare clustern --------------------------- #
# spec_Z_05
echo "Exemplare clustern..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "isBlank(value)",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "2199",
"expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)",
"onError": "set-to-blank",
"newColumnName": "ppn",
"columnInsertIndex": 1
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "ppn",
"expression": "isBlank(value)",
"columnName": "ppn",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "ppn",
"expression": "grel:row.record.cells[columnName].value[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-reorder",
"mode": "record-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "ppn",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
}
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "ppn",
"expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 0
},
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "id",
"expression": "isBlank(value)",
"columnName": "id",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
},
"columnName": "2199",
"expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "ppn"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ------------------------- Dublette Barcodes löschen ------------------------ #
# spec_Z_06
format="txt"
echo "Dublette Barcodes exportieren"
IFS= read -r -d '' template << "TEMPLATE"
{{
forNonBlank(cells['8200'].value, v, v + '\n', '')
}}
TEMPLATE
if echo "${template}" | head -c -2 | curl -fs \
--data project="${projects[$p]}" \
--data format="template" \
--data prefix="" \
--data suffix="" \
--data separator="" \
--data engine='{"facets":[],"mode":"row-based"}' \
--data-urlencode template@- \
"${endpoint}/command/core/export-rows" \
> "${workdir}/barcodes.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/barcodes.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
echo "Dublette Barcodes löschen..."
if curl -fs \
--data project="${projects[$p]}" \
--data-urlencode "operations@-" \
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
<< "JSON"
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "8200",
"expression": "facetCount(value, 'value', '8200') > 1",
"columnName": "8200",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "8200",
"expression": "null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column 8200 using expression null"
}
]
JSON
then
log "transformed ${p} (${projects[$p]})"
else
error "transform ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== EXPORT ================================== #
checkpoint "Export"; echo
# Export des OpenRefine-Projekts für Tests
format="openrefine.tar.gz"
echo "export ${p} to ${format} file..."
if curl -fs \
--data project="${projects[$p]}" \
"${endpoint}/command/core/export-project" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# Export in PICA+
format="pic"
echo "export ${p} to pica+ file using template..."
IFS= read -r -d '' template << "TEMPLATE"
{{
if(row.index - row.record.fromRowIndex == 0,
'' + '\n'
+ forNonBlank(cells['0500'].value, v, '002@ ' + '0' + v + '\n', '')
+ forNonBlank(cells['0501a'].value, v, '002C ' + 'a' + v + forNonBlank(cells['0501b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0502a'].value, v, '002D ' + 'a' + v + forNonBlank(cells['0502b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0503a'].value, v, '002E ' + 'a' + v + forNonBlank(cells['0503b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0100'].value, v, '003@ ' + '0' + v + '\n', '')
+ forNonBlank(cells['0110'].value, v, '003S ' + '0' + v + '\n', '')
+ forNonBlank(cells['2000'].value, v, forEach(v.split('␟'),x,'004A ' + '0' + x + '\n').join(''), '')
+ forNonBlank(cells['2199'].value, v, forEach(v.split('␟'),x,'006Y ' + '0' + x + '\n').join(''), '')
+ forNonBlank(cells['1500'].value, v, '010@ ' + forEach(v.split('␟'),x,'a' + x).join('') + '\n', '')
+ forNonBlank(cells['1100a'].value, v, '011@ ' + 'a' + v + forNonBlank(cells['1100b'].value, v, 'b' + v, '') + forNonBlank(cells['1100n'].value, v, 'n' + v, '') + '\n', '')
+ forNonBlank(cells['1131'].value, v, '013D ' + 'a' + v + '\n', '')
+ forNonBlank(cells['1140'].value, v, '013H ' + 'a' + v + '\n', '')
+ forNonBlank(cells['4000a'].value, v, '021A ' + 'a' + v + forNonBlank(cells['4000d'].value, v, 'd' + v, '') + '\n', '')
+ forNonBlank(cells['4020a'].value, v, '032@ ' + 'a' + v + '\n', '')
+ if(or(isNonBlank(cells['4030n'].value),isNonBlank(cells['4030p'].value)),'033A ' + forNonBlank(cells['4030n'].value, v, 'n' + v,'') + forNonBlank(cells['4030p'].value, v, 'p' + v, '') + '\n', '')
+ forNonBlank(cells['4060a'].value, v, '034D ' + 'a' + v + '\n', '')
+ forNonBlank(cells['0999'].value, v, '046W ' + 'a' + v + '\n', '')
,'')
}}{{
if(isNonBlank(cells['E0XXb'].value),
with(with(rowIndex - row.record.fromRowIndex + 1, i, '00'[0,2-i.length()] + i),exnr,
'208@/' + exnr + ' a' + cells['E0XX'].value + 'b' + cells['E0XXb'].value + '\n'
+ '209A/' + exnr + ' b4736' + 'j' + cells['7100j'].value + 'f' + cells['7100f'].value + forNonBlank(cells['7100a'].value, v, 'a' + v, '') + forNonBlank(cells['7100d'].value, v, 'd' + v, '') + 'x00' + '\n'
+ forNonBlank(cells['8011'].value, v, '209B/' + exnr + ' a' + v + 'x11' + '\n', '')
+ forNonBlank(cells['8100'].value, v, '209C/' + exnr + ' a' + v + 'x00' + '\n', '')
+ forNonBlank(cells['8200'].value, v, '209G/' + exnr + ' a' + v + '\n', '')
+ forNonBlank(cells['8600'].value, v, '209O/' + exnr + ' a' + v + 'x00' + '\n', '')
+ forNonBlank(cells['8515'].value, v, '220B/' + exnr + ' a' + v + '\n', '')
), '')
}}
TEMPLATE
if echo "${template}" | head -c -2 | curl -fs \
--data project="${projects[$p]}" \
--data format="template" \
--data prefix="" \
--data suffix="" \
--data separator="" \
--data engine='{"facets":[],"mode":"row-based"}' \
--data-urlencode template@- \
"${endpoint}/command/core/export-rows" \
> "${workdir}/${p}.${format}"
then
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
else
error "export of ${p} (${projects[$p]}) failed!"
fi
echo
# ================================== FINISH ================================== #
checkpoint "Finish"; echo
# stop OpenRefine server
refine_stop; echo
# calculate run time based on checkpoints
checkpoint_stats; echo
# word count on all files in workdir
count_output