refactoring mit openrefine-task runner
https://github.com/opencultureconsulting/openrefine-task-runner
This commit is contained in:
parent
b188267640
commit
9bad6aeb17
|
@ -1,5 +1,6 @@
|
|||
input
|
||||
lib
|
||||
log
|
||||
output
|
||||
.task
|
||||
.openrefine
|
||||
*/input
|
||||
*/output
|
||||
*/tmp
|
||||
*/log/*
|
||||
|
|
73
README.md
73
README.md
|
@ -1,29 +1,68 @@
|
|||
# Transformation von Bibliotheca und Alephino nach PICA+ für die Bibliotheken der Berufsakademie Sachsen
|
||||
|
||||
## Vorbereitung
|
||||
|
||||
1. Exporte bereitstellen mit folgenden Dateinamen:
|
||||
* alephino/input/leipzig-exemplare.txt
|
||||
* alephino/input/leipzig-titel.txt
|
||||
* alephino/input/riesa-exemplare.txt
|
||||
* alephino/input/riesa-titel.txt
|
||||
* bibliotheca/input/bautzen.imp
|
||||
* bibliotheca/input/breitenbrunn.imp
|
||||
* bibliotheca/input/dresden.imp
|
||||
* bibliotheca/input/glauchau.imp
|
||||
* bibliotheca/input/plauen.imp
|
||||
|
||||
2. Installation Task 3.2.2
|
||||
|
||||
a) RPM-based (Fedora, CentOS, SLES, etc.)
|
||||
|
||||
```sh
|
||||
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.rpm
|
||||
sudo dnf install ./task_linux_amd64.rpm && rm task_linux_amd64.rpm
|
||||
```
|
||||
|
||||
b) DEB-based (Debian, Ubuntu etc.)
|
||||
|
||||
```sh
|
||||
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.deb
|
||||
sudo apt install ./task_linux_amd64.deb && rm task_linux_amd64.deb
|
||||
```
|
||||
|
||||
3. Installation OpenRefine 3.4.1 und openrefine-client 0.3.10
|
||||
|
||||
```
|
||||
task install
|
||||
```
|
||||
|
||||
## Nutzung
|
||||
|
||||
1. Exporte bereitstellen mit folgenden Dateinamen:
|
||||
* input/bautzen.imp
|
||||
* input/breitenbrunn.imp
|
||||
* input/dresden.imp
|
||||
* input/glauchau.imp
|
||||
* input/leipzig-exemplare.txt
|
||||
* input/leipzig-titel.txt
|
||||
* input/plauen.imp
|
||||
* input/riesa-exemplare.txt
|
||||
* input/riesa-titel.txt
|
||||
2. Installation und initiale Datenverarbeitung: `./main.sh`
|
||||
3. Weitere Datenverarbeitungen:
|
||||
* `lib/task` um den gesamten Workflow zu starten
|
||||
* `lib/task --list` für eine Liste der verfügbaren Tasks
|
||||
Datenverarbeitung sequentiell
|
||||
|
||||
```
|
||||
task default
|
||||
```
|
||||
|
||||
Datenverarbeitung (teil)parallelisiert (benötigt bis zu 16 GB RAM)
|
||||
|
||||
```
|
||||
task pica+:main
|
||||
```
|
||||
|
||||
Analyse dubletter Barcodes
|
||||
|
||||
```
|
||||
task barcodes:main
|
||||
```
|
||||
|
||||
## Systemvoraussetzungen
|
||||
|
||||
* Linux mit Bash, cURL und JAVA (getestet auf Fedora 32)
|
||||
* 7 GB freien Arbeitsspeicher
|
||||
* GNU/Linux (getestet auf Fedora 32)
|
||||
* JAVA 8+ (für OpenReifne)
|
||||
* 8 GB freien Arbeitsspeicher
|
||||
|
||||
## Verwendete Tools
|
||||
|
||||
* [OpenRefine](https://openrefine.org/)
|
||||
* [bash-refine](https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d)
|
||||
* [openrefine-client](https://github.com/opencultureconsulting/openrefine-client)
|
||||
* [Task](https://github.com/go-task/task)
|
||||
|
|
293
Taskfile.yml
293
Taskfile.yml
|
@ -1,234 +1,99 @@
|
|||
# https://taskfile.dev
|
||||
# https://github.com/opencultureconsulting/openrefine-task-runner
|
||||
|
||||
version: '3'
|
||||
|
||||
output: 'group'
|
||||
includes:
|
||||
alephino: alephino
|
||||
barcodes: barcodes
|
||||
bibliotheca: bibliotheca
|
||||
pica+: pica+
|
||||
|
||||
vars:
|
||||
DATE: '{{ now | date "20060102_150405"}}'
|
||||
silent: true
|
||||
output: prefixed
|
||||
|
||||
env:
|
||||
REFINE_MEMORY: 8g
|
||||
REFINE_ENDPOINT: http://localhost:3334
|
||||
OPENREFINE:
|
||||
sh: readlink -m .openrefine/refine
|
||||
CLIENT:
|
||||
sh: readlink -m .openrefine/client
|
||||
|
||||
tasks:
|
||||
default:
|
||||
desc: Generierung PICA+
|
||||
# deps: [bibliotheca, alephino]
|
||||
desc: Datenverarbeitung sequentiell
|
||||
cmds:
|
||||
- task: alephino
|
||||
- task: bibliotheca
|
||||
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
|
||||
sources:
|
||||
- tasks/03-ba-sachsen.sh
|
||||
# - output/02-alephino-main/alephino.csv
|
||||
- output/02-bibliotheca-main/bibliotheca.csv
|
||||
generates:
|
||||
- output/03-ba-sachsen/ba-sachsen.pic
|
||||
- output/03-ba-sachsen/ba-sachsen.openrefine.tar.gz
|
||||
env:
|
||||
REFINE_WORKDIR: output/03-ba-sachsen
|
||||
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
|
||||
- task: alephino:main
|
||||
- task: bibliotheca:main
|
||||
- task: pica+:refine
|
||||
|
||||
alephino:
|
||||
desc: Alephino Hauptverarbeitung
|
||||
# deps: [leipzig, riesa]
|
||||
install:
|
||||
desc: (re)install OpenRefine and openrefine-client into subdirectory .openrefine
|
||||
cmds:
|
||||
- task: leipzig
|
||||
- task: riesa
|
||||
- tasks/02-alephino-main.sh "output/01-alephino-pre"
|
||||
sources:
|
||||
- tasks/02-alephino-main.sh
|
||||
- output/01-alephino-pre/*.tsv
|
||||
generates:
|
||||
# - output/02-alephino-main/alephino.csv
|
||||
- output/02-alephino-main/alephino.openrefine.tar.gz
|
||||
env:
|
||||
REFINE_ENDPOINT: http://localhost:3334
|
||||
REFINE_WORKDIR: output/02-alephino-main
|
||||
REFINE_LOGFILE: log/02-alephino-main/{{.DATE}}.log
|
||||
- | # delete existing install and recreate folder
|
||||
rm -rf .openrefine
|
||||
mkdir -p .openrefine
|
||||
- > # download OpenRefine archive
|
||||
wget --no-verbose -O openrefine.tar.gz
|
||||
https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz
|
||||
- | # install OpenRefine into subdirectory .openrefine
|
||||
tar -xzf openrefine.tar.gz -C .openrefine --strip 1
|
||||
rm openrefine.tar.gz
|
||||
- | # optimize OpenRefine for batch processing
|
||||
sed -i 's/cd `dirname $0`/cd "$(dirname "$0")"/' ".openrefine/refine" # fix path issue in OpenRefine startup file
|
||||
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' ".openrefine/refine.ini" # do not try to open OpenRefine in browser
|
||||
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' ".openrefine/refine.ini" # set autosave period from 5 minutes to 25 hours
|
||||
- > # download openrefine-client into subdirectory .openrefine
|
||||
wget --no-verbose -O .openrefine/client
|
||||
https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux
|
||||
- chmod +x .openrefine/client # make client executable
|
||||
|
||||
bibliotheca:
|
||||
desc: Bibliotheca Hauptverarbeitung
|
||||
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
|
||||
start:
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- task: bautzen
|
||||
- task: breitenbrunn
|
||||
- task: dresden
|
||||
- task: glauchau
|
||||
# - task: plauen
|
||||
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- tasks/02-bibliotheca-main.sh
|
||||
- output/01-bibliotheca-pre/*.tsv
|
||||
generates:
|
||||
- output/02-bibliotheca-main/bibliotheca.csv
|
||||
- output/02-bibliotheca-main/bibliotheca.openrefine.tar.gz
|
||||
env:
|
||||
REFINE_ENDPOINT: http://localhost:3335
|
||||
REFINE_WORKDIR: output/02-bibliotheca-main
|
||||
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
|
||||
- | # verify that OpenRefine is installed
|
||||
if [ ! -f "$OPENREFINE" ]; then
|
||||
echo 1>&2 "OpenRefine missing; try task install"; exit 1
|
||||
fi
|
||||
- | # delete temporary files and log file of previous run
|
||||
rm -rf ./*.project* workspace.json
|
||||
rm -rf "{{.PROJECT}}.log"
|
||||
- > # launch OpenRefine with specific data directory and redirect its output to a log file
|
||||
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
|
||||
-d ../{{.DIR}}
|
||||
>> "{{.PROJECT}}.log" 2>&1 &
|
||||
- | # wait until OpenRefine API is available
|
||||
timeout 30s bash -c "until
|
||||
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
|
||||
do sleep 1
|
||||
done"
|
||||
|
||||
bautzen:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
stop:
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/bautzen.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "6G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3334
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_bautzen.log
|
||||
- | # shut down OpenRefine gracefully
|
||||
PID=$(lsof -t -i:{{.PORT}})
|
||||
kill $PID
|
||||
while ps -p $PID > /dev/null; do sleep 1; done
|
||||
- > # archive the OpenRefine project
|
||||
tar cfz
|
||||
"{{.PROJECT}}.openrefine.tar.gz"
|
||||
-C $(grep -l "{{.PROJECT}}" *.project/metadata.json | cut -d '/' -f 1)
|
||||
.
|
||||
- rm -rf ./*.project* workspace.json # delete temporary files
|
||||
|
||||
breitenbrunn:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
kill:
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/breitenbrunn.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3335
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
|
||||
- | # shut down OpenRefine immediately to save time and disk space
|
||||
PID=$(lsof -t -i:{{.PORT}})
|
||||
kill -9 $PID
|
||||
while ps -p $PID > /dev/null; do sleep 1; done
|
||||
- rm -rf ./*.project* workspace.json # delete temporary files
|
||||
|
||||
dresden:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
check:
|
||||
desc: check OpenRefine log for any warnings and exit on error
|
||||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/dresden.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3336
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
|
||||
|
||||
leipzig:
|
||||
desc: Alephino Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
|
||||
sources:
|
||||
- tasks/01-alephino-pre.sh
|
||||
- '{{.TITEL}}'
|
||||
- '{{.EXEMPLARE}}'
|
||||
generates:
|
||||
- output/01-alephino-pre/leipzig.tsv
|
||||
vars:
|
||||
TITEL: '{{.TITEL | default "input/leipzig-titel.txt"}}'
|
||||
EXEMPLARE: '{{.EXEMPLARE | default "input/leipzig-exemplare.txt"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3337
|
||||
REFINE_WORKDIR: output/01-alephino-pre
|
||||
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_leipzig.log
|
||||
|
||||
glauchau:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/glauchau.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3338
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
|
||||
|
||||
plauen:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
|
||||
sources:
|
||||
- tasks/01-bibliotheca-pre.sh
|
||||
- input/plauen.imp
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/plauen.tsv
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3339
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
|
||||
|
||||
riesa:
|
||||
desc: Alephino Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
|
||||
sources:
|
||||
- tasks/01-alephino-pre.sh
|
||||
- '{{.TITEL}}'
|
||||
- '{{.EXEMPLARE}}'
|
||||
generates:
|
||||
- output/01-alephino-pre/riesa.tsv
|
||||
vars:
|
||||
TITEL: '{{.TITEL | default "input/riesa-titel.txt"}}'
|
||||
EXEMPLARE: '{{.EXEMPLARE | default "input/riesa-exemplare.txt"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3340
|
||||
REFINE_WORKDIR: output/01-alephino-pre
|
||||
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_riesa.log
|
||||
|
||||
clean:
|
||||
desc: Alle Daten löschen (reset auf Ausgangszustand)
|
||||
cmds:
|
||||
- rm -r lib log output
|
||||
|
||||
mkdir:
|
||||
desc: Ordner erstellen
|
||||
cmds:
|
||||
- mkdir -p output/01-alephino-pre log/01-alephino-pre
|
||||
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
||||
- mkdir -p output/02-alephino-main log/02-alephino-main
|
||||
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
|
||||
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
|
||||
|
||||
barcodes:
|
||||
desc: Ermitteln von Dubletten
|
||||
deps: [default]
|
||||
cmds:
|
||||
- mkdir -p output/barcodes
|
||||
# Bibliotheca Barcodes extrahieren
|
||||
- for f in input/*.imp; do grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).raw"; done
|
||||
# Alephino Barcodes extrahieren
|
||||
- for f in input/*-exemplare.txt; do grep '^120 ' "$f" | cut -c 6- | sort > "output/barcodes/$(f=${f##*/}; echo ${f%-*}).raw"; done
|
||||
# Extrahierte Barcodes gegen generiertes PICA+ abgleichen
|
||||
- for f in output/barcodes/*.raw; do comm -12 "$f" <(sort output/03-ba-sachsen/barcodes.txt) > "output/barcodes/$(f=${f##*/}; echo ${f%.*}).filtered"; done
|
||||
# Plauen, Leipzig, Riesa vorübergehend nicht filtern
|
||||
- for f in leipzig riesa plauen; do cp output/barcodes/$f.raw output/barcodes/$f.filtered; done
|
||||
# Dublette Barcodes Gesamtdubletten ermitteln
|
||||
- sort output/barcodes/*.filtered | uniq -d > output/barcodes/duplicates
|
||||
# Dubletten für jeden Teil ermitteln
|
||||
- (cd output/barcodes && for f in *.filtered ; do grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp; done)
|
||||
# Ergebnisse in Tabelle zusammenführen
|
||||
- paste output/barcodes/duplicates output/barcodes/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/barcodes/duplicates.tsv && rm output/barcodes/*.tmp
|
||||
# Bearbeitungsstand
|
||||
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
||||
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/barcodes/duplicates | wc -l)"'
|
||||
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l < output/barcodes/duplicates)"'
|
||||
# sources:
|
||||
# - input/*
|
||||
# generates:
|
||||
# - output/barcodes/duplicates.tsv
|
||||
- | # find log file(s) and check for "exception" or "error"
|
||||
if grep -i 'exception\|error' $(find . -name '*.log'); then
|
||||
echo 1>&2 "log contains warnings!"; exit 1
|
||||
fi
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
version: '3'
|
||||
|
||||
tasks:
|
||||
main:
|
||||
desc: Konvertierung von Alephino nach PICA3/CSV
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name
|
||||
cmds:
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: leipzig}
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: riesa}
|
||||
- task: refine-main
|
||||
|
||||
refine-pre:
|
||||
dir: ./{{.DIR}}
|
||||
label: '{{.TASK}}-{{.PROJECT}}'
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
PORT: 3335 # assign a different port for each project
|
||||
RAM: 8192M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # Import Titel
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/{{.PROJECT}}-titel.txt)"
|
||||
--format fixed-width
|
||||
--encoding UTF-8
|
||||
--columnWidths 5
|
||||
--skipDataLines 0
|
||||
--storeBlankRows false
|
||||
--projectName titel
|
||||
> {{.LOG}}
|
||||
- > # Import Exemplare
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/{{.PROJECT}}-exemplare.txt)"
|
||||
--format fixed-width
|
||||
--encoding UTF-8
|
||||
--columnWidths 5
|
||||
--skipDataLines 0
|
||||
--storeBlankRows false
|
||||
--projectName exemplare
|
||||
> {{.LOG}}
|
||||
- | # Titel: Korrekturen Einzelfälle
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-einzelfaelle.json > {{.LOG}}
|
||||
- | # Prefix M bzw. E für Feldnamen
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-prefix.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-prefix.json > {{.LOG}}
|
||||
- | # Datensätze und Feldnamen sortieren
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-sortieren.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-sortieren.json > {{.LOG}}
|
||||
- | # Mehrfachbelegungen zusammenführen
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-mehrfachbelegungen.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-mehrfachbelegungen.json > {{.LOG}}
|
||||
- | # Felder löschen
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-loeschen.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-loeschen.json > {{.LOG}}
|
||||
- | # Transponieren
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-transponieren.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-transponieren.json > {{.LOG}}
|
||||
- | # Titel-ID separieren
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-id-separieren.json > {{.LOG}}
|
||||
"$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-id-separieren.json > {{.LOG}}
|
||||
- | # Titel: Exemplare anreichern
|
||||
"$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-anreichern.json > {{.LOG}}
|
||||
- mkdir -p output
|
||||
- > # Export
|
||||
"$CLIENT" -P {{.PORT}} titel
|
||||
--output "$(readlink -m output/{{.PROJECT}}.tsv)"
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
PID="$(lsof -t -i:{{.PORT}})"
|
||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||
- task: :kill # shut down OpenRefine immediately to save time and disk space
|
||||
vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}'}
|
||||
- task: :check # check OpenRefine log for any warnings and exit on error
|
||||
vars: {DIR: '{{.DIR}}'}
|
||||
sources:
|
||||
- Taskfile.yml
|
||||
- input/{{.PROJECT}}.imp
|
||||
- config/pre/**
|
||||
generates:
|
||||
- output/{{.PROJECT}}.tsv
|
||||
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
|
||||
|
||||
refine-main:
|
||||
dir: ./{{.DIR}}
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
PROJECT: alephino
|
||||
PORT: 3335 # assign a different port for each project
|
||||
RAM: 8192M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # Zip-Archiv mit Output der Vorverarbeitung erstellen
|
||||
zip -j tmp.zip
|
||||
output/leipzig.tsv
|
||||
output/riesa.tsv
|
||||
- > # Import Zip-Archiv
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m tmp.zip)"
|
||||
--format tsv
|
||||
--includeFileSources true
|
||||
--projectName {{.PROJECT}}
|
||||
> {{.LOG}}
|
||||
&& rm tmp.zip
|
||||
- > # Spalten sortieren: Beginnen mit 1. M|001, 2. E|001, 3. File; damit Records-Mode erhalten bleibt
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}}
|
||||
- > # Bibliothekskürzel aus Import-Dateiname
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
|
||||
- > # spec_A_E_01: Signatur 7100a
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
|
||||
# - > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
|
||||
# mkdir -p output &&
|
||||
# "$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
# --output "$(readlink -m output/{{.PROJECT}}.csv)"
|
||||
# --template "$(< config/main/template.txt)"
|
||||
# --rowSeparator ""
|
||||
# > {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
PID="$(lsof -t -i:{{.PORT}})"
|
||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||||
- task: :check # check OpenRefine log for any warnings and exit on error
|
||||
vars: {DIR: '{{.DIR}}'}
|
||||
sources:
|
||||
- Taskfile.yml
|
||||
- output/*.tsv
|
||||
- config/main/**
|
||||
generates:
|
||||
- log/{{.PROJECT}}.openrefine.tar.gz
|
||||
# - output/{{.PROJECT}}.csv
|
||||
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
|
||||
|
||||
default: # enable standalone execution (running `task` in project directory)
|
||||
cmds:
|
||||
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|100",
|
||||
"expression": "grel:value.split('\u001f')[0].slice(1)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "7100a",
|
||||
"columnInsertIndex": 5
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "File",
|
||||
"expression": "grel:with([ ['leipzig.tsv','LE'], ['riesa.tsv','RS'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,27 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "File",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "E|001",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|029",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|026f",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|IDN",
|
||||
"index": 0
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|BIB",
|
||||
"expression": "grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "titel_id",
|
||||
"columnInsertIndex": 18,
|
||||
"description": "Create column titel_id at index 18 based on column E|BIB using expression grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,65 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E|A02",
|
||||
"l": "E|A02"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|A86",
|
||||
"l": "E|A86"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|SUB",
|
||||
"l": "E|SUB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|FMT",
|
||||
"l": "E|FMT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|CAT",
|
||||
"l": "E|CAT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|027",
|
||||
"l": "E|027"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|123",
|
||||
"l": "E|123"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,18 @@
|
|||
[
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"description": "Blank down cells in column Column 1"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "Column 2",
|
||||
"keyColumnName": "Column 1",
|
||||
"separator": "␟",
|
||||
"description": "Join multi-valued cells in column Column 2"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:'E|' + value.replace(' ','')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column Column 1 using expression grel:'E|' + value.replace(' ','')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,80 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E|IDN",
|
||||
"l": "E|IDN"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "Column 2",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "id",
|
||||
"index": 0,
|
||||
"description": "Move column id to position 0"
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id",
|
||||
"description": "Fill down cells in column id"
|
||||
},
|
||||
{
|
||||
"op": "core/row-reorder",
|
||||
"mode": "row-based",
|
||||
"sorting": {
|
||||
"criteria": [
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "id",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "Column 1",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Reorder rows"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "id",
|
||||
"description": "Remove column id"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,9 @@
|
|||
[
|
||||
{
|
||||
"op": "core/key-value-columnize",
|
||||
"keyColumnName": "Column 1",
|
||||
"valueColumnName": "Column 2",
|
||||
"noteColumnName": "",
|
||||
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
|
||||
}
|
||||
]
|
|
@ -0,0 +1,822 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|001'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|001",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|001",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|002a'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|002a",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|002a",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|003'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|003",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|003",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|004'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|004",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|004",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|027'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|027",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|027",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|030'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|030",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|030",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|050'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|050",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|050",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|100'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|100",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|100",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|115'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|115",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|115",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|120'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|120",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|120",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|123'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|123",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|123",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A02'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A02",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A02",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A72'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A72",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A72",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A73'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A73",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A73",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A87'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A87",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A87",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A91'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A91",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A91",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A95'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A95",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A95",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|BIB'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|BIB",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|BIB",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|CAT'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|CAT",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|CAT",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|FMT'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|FMT",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|FMT",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|IDN'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|IDN",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|IDN",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|LDR'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|LDR",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|LDR",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|STA'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|STA",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|STA",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|SUB'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|SUB",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|SUB",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|105'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|105",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|105",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|107'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|107",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|107",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A94'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A94",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A94",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|125'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|125",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|125",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|072'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|072",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|072",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A98'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A98",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A98",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|HOL'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|HOL",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|HOL",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A86'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A86",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A86",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A63'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A63",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A63",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A70'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A70",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A70",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A83'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A83",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A83",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A85'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A85",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A85",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|ABO'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|ABO",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|ABO",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A97'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A97",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A97",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|A82'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|A82",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|A82",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|002'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|002",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|002",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('exemplare','titel_id'),r,forNonBlank(r.cells['E|ORD'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E|ORD",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "E|ORD",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
}
|
||||
]
|
|
@ -0,0 +1,22 @@
|
|||
[
|
||||
{
|
||||
"op": "core/mass-edit",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "value",
|
||||
"edits": [
|
||||
{
|
||||
"from": [
|
||||
"001st"
|
||||
],
|
||||
"fromBlank": false,
|
||||
"fromError": false,
|
||||
"to": "001"
|
||||
}
|
||||
],
|
||||
"description": "Mass edit cells in column Column 1"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|IDN",
|
||||
"expression": "grel:value.replace(/^0+/,'')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 12,
|
||||
"description": "Create column id at index 12 based on column M|IDN using expression grel:value.replace(/^0+/,'')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,148 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "M|025_",
|
||||
"l": "M|025_"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|025e",
|
||||
"l": "M|025e"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|004",
|
||||
"l": "M|004"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|011",
|
||||
"l": "M|011"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026_",
|
||||
"l": "M|026_"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026a",
|
||||
"l": "M|026a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026d",
|
||||
"l": "M|026d"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026g",
|
||||
"l": "M|026g"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|030",
|
||||
"l": "M|030"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|037z",
|
||||
"l": "M|037z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|038b",
|
||||
"l": "M|038b"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|070",
|
||||
"l": "M|070"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|073",
|
||||
"l": "M|073"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|076z",
|
||||
"l": "M|076z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|080",
|
||||
"l": "M|080"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|800s",
|
||||
"l": "M|800s"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|802",
|
||||
"l": "M|802"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|808b",
|
||||
"l": "M|808b"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "^M\\|9",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,18 @@
|
|||
[
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"description": "Blank down cells in column Column 1"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "Column 2",
|
||||
"keyColumnName": "Column 1",
|
||||
"separator": "␟",
|
||||
"description": "Join multi-valued cells in column Column 2"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:'M|' + value.replace(' ','')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column Column 1 using expression grel:'M|' + value.replace(' ','')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,80 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "M|IDN",
|
||||
"l": "M|IDN"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "Column 2",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "id",
|
||||
"index": 0,
|
||||
"description": "Move column id to position 0"
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id",
|
||||
"description": "Fill down cells in column id"
|
||||
},
|
||||
{
|
||||
"op": "core/row-reorder",
|
||||
"mode": "row-based",
|
||||
"sorting": {
|
||||
"criteria": [
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "id",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "Column 1",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Reorder rows"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "id",
|
||||
"description": "Remove column id"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,9 @@
|
|||
[
|
||||
{
|
||||
"op": "core/key-value-columnize",
|
||||
"keyColumnName": "Column 1",
|
||||
"valueColumnName": "Column 2",
|
||||
"noteColumnName": "",
|
||||
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
|
||||
}
|
||||
]
|
|
@ -0,0 +1,38 @@
|
|||
version: '3'
|
||||
|
||||
tasks:
|
||||
main:
|
||||
desc: Ermitteln von dubletten Barcodes
|
||||
dir: ./{{.DIR}}
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
cmds:
|
||||
- rm -rf tmp
|
||||
- mkdir -p tmp output
|
||||
# Bibliotheca Barcodes extrahieren
|
||||
- for f in ../bibliotheca/input/*.imp; do grep '^\*I BARCO ' "$f" | dos2unix | cut -c 10- | sort > "tmp/$(f=${f##*/}; echo ${f%.*}).raw"; done
|
||||
# Alephino Barcodes extrahieren
|
||||
- for f in ../alephino/input/*-exemplare.txt; do grep '^120 ' "$f" | cut -c 6- | sort > "tmp/$(f=${f##*/}; echo ${f%-*}).raw"; done
|
||||
# Extrahierte Barcodes gegen generiertes PICA+ abgleichen
|
||||
- for f in tmp/*.raw; do comm -12 "$f" <(sort ../pica+/output/barcodes.txt) > "tmp/$(f=${f##*/}; echo ${f%.*}).filtered"; done
|
||||
# Plauen, Leipzig, Riesa vorübergehend nicht filtern
|
||||
- for f in leipzig riesa plauen; do cp tmp/$f.raw tmp/$f.filtered; done
|
||||
# Dublette Barcodes Gesamtdubletten ermitteln
|
||||
- sort tmp/*.filtered | uniq -d > tmp/duplicates
|
||||
# Dubletten für jeden Teil ermitteln
|
||||
- (cd tmp && for f in *.filtered ; do grep -FxH -f duplicates "$f" | sort | join -o 2.1 -t ':' -a1 -2 2 duplicates - | cut -d '.' -f 1 > "${f}".tmp; done)
|
||||
# Ergebnisse in Tabelle zusammenführen
|
||||
- paste tmp/duplicates tmp/*.tmp | awk -F $'\t' '{sub($1, "\"&\""); print}' > output/duplicates.tsv
|
||||
# Datei mit Gesamtdubletten in Verzeichnis output schieben
|
||||
- mv tmp/duplicates output/duplicates
|
||||
# Bearbeitungsstand ausgeben
|
||||
- 'echo "Seit Juli 2019 neu hinzugekommene Dubletten: $(comm -13 input/duplicates-2019-07-10.txt output/duplicates | wc -l)"'
|
||||
- 'echo "Seit Juli 2019 bearbeitete Dubletten: $(comm -23 input/duplicates-2019-07-10.txt output/duplicates | wc -l)"'
|
||||
- 'echo "Noch zu bearbeitende Dubletten: $(wc -l < output/duplicates)"'
|
||||
preconditions:
|
||||
- sh: test -f ../pica+/output/barcodes.txt
|
||||
msg: Barcode-Datei fehlt; versuche "task default"
|
||||
|
||||
default: # enable standalone execution (running `task` in project directory)
|
||||
cmds:
|
||||
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"
|
240
bash-refine.sh
240
bash-refine.sh
|
@ -1,240 +0,0 @@
|
|||
#!/bin/bash
|
||||
# bash-refine v1.3.4: bash-refine.sh, Felix Lohmeier, 2020-11-02
|
||||
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
||||
# license: MIT License https://choosealicense.com/licenses/mit/
|
||||
|
||||
# TODO: support for macOS
|
||||
|
||||
# ================================== CONFIG ================================== #
|
||||
|
||||
endpoint="${REFINE_ENDPOINT:-http://localhost:3333}"
|
||||
memory="${REFINE_MEMORY:-1400M}"
|
||||
csrf="${REFINE_CSRF:-true}"
|
||||
date="$(date +%Y%m%d_%H%M%S)"
|
||||
if [[ -n "$(readlink -e "${REFINE_WORKDIR}")" ]]; then
|
||||
workdir="$(readlink -e "${REFINE_WORKDIR}")"
|
||||
else
|
||||
workdir="$(readlink -m "${BASH_SOURCE%/*}/output/${date}")"
|
||||
fi
|
||||
if [[ -n "$(readlink -f "${REFINE_LOGFILE}")" ]]; then
|
||||
logfile="$(readlink -f "${REFINE_LOGFILE}")"
|
||||
else
|
||||
logfile="$(readlink -m "${BASH_SOURCE%/*}/log/${date}.log")"
|
||||
fi
|
||||
if [[ -n "$(readlink -e "${REFINE_JQ}")" ]]; then
|
||||
jq="$(readlink -e "${REFINE_JQ}")"
|
||||
else
|
||||
jq="$(readlink -m "${BASH_SOURCE%/*}/lib/jq")"
|
||||
fi
|
||||
if [[ -n "$(readlink -e "${REFINE_REFINE}")" ]]; then
|
||||
refine="$(readlink -e "${REFINE_REFINE}")"
|
||||
else
|
||||
refine="$(readlink -m "${BASH_SOURCE%/*}/lib/openrefine/refine")"
|
||||
fi
|
||||
|
||||
declare -A checkpoints # associative array for stats
|
||||
declare -A pids # associative array for monitoring background jobs
|
||||
declare -A projects # associative array for OpenRefine projects
|
||||
|
||||
# =============================== REQUIREMENTS =============================== #
|
||||
|
||||
function requirements {
|
||||
# check existence of java and cURL
|
||||
if [[ -z "$(command -v java 2> /dev/null)" ]] ; then
|
||||
echo 1>&2 "ERROR: OpenRefine requires JAVA runtime environment (jre)" \
|
||||
"https://openjdk.java.net/install/"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -z "$(command -v curl 2> /dev/null)" ]] ; then
|
||||
echo 1>&2 "ERROR: This shell script requires cURL" \
|
||||
"https://curl.haxx.se/download.html"
|
||||
exit 1
|
||||
fi
|
||||
# download jq and OpenRefine if necessary
|
||||
if [[ -z "$(readlink -e "${jq}")" ]]; then
|
||||
echo "Download jq..."
|
||||
mkdir -p "$(dirname "${jq}")"
|
||||
# jq 1.4 has much faster startup time than 1.5 and 1.6
|
||||
curl -L --output "${jq}" \
|
||||
"https://github.com/stedolan/jq/releases/download/jq-1.4/jq-linux-x86_64"
|
||||
chmod +x "${jq}"; echo
|
||||
fi
|
||||
if [[ -z "$(readlink -e "${refine}")" ]]; then
|
||||
echo "Download OpenRefine..."
|
||||
mkdir -p "$(dirname "${refine}")"
|
||||
curl -L --output openrefine.tar.gz \
|
||||
"https://github.com/OpenRefine/OpenRefine/releases/download/3.4/openrefine-linux-3.4.tar.gz"
|
||||
echo "Install OpenRefine in subdirectory $(dirname "${refine}")..."
|
||||
tar -xzf openrefine.tar.gz -C "$(dirname "${refine}")" --strip 1 --totals
|
||||
rm -f openrefine.tar.gz
|
||||
# do not try to open OpenRefine in browser
|
||||
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' \
|
||||
"$(dirname "${refine}")"/refine.ini
|
||||
# set min java heap space to allocated memory
|
||||
sed -i 's/-Xms$REFINE_MIN_MEMORY/-Xms$REFINE_MEMORY/' \
|
||||
"$(dirname "${refine}")"/refine
|
||||
# set autosave period from 5 minutes to 25 hours
|
||||
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1500/' \
|
||||
"$(dirname "${refine}")"/refine.ini
|
||||
echo
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================== OPENREFINE API ============================== #
|
||||
|
||||
function refine_start {
|
||||
echo "start OpenRefine server..."
|
||||
local dir
|
||||
dir="$(readlink -e "${workdir}")"
|
||||
${refine} -v warn -m "${memory}" -p "${endpoint##*:}" -d "${dir}" &
|
||||
pid_server=${!}
|
||||
timeout 30s bash -c "until curl -s \"${endpoint}\" \
|
||||
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|
||||
|| error "starting OpenRefine server failed!"
|
||||
}
|
||||
|
||||
function refine_stats {
|
||||
# print server load
|
||||
ps -o start,etime,%mem,%cpu,rss -p "${pid_server}"
|
||||
}
|
||||
|
||||
function refine_kill {
|
||||
# kill OpenRefine immediately; SIGKILL (kill -9) prevents saving projects
|
||||
{ kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null
|
||||
# delete temporary OpenRefine projects
|
||||
(cd "${workdir}" && rm -rf ./*.project* && rm -f workspace.json)
|
||||
}
|
||||
|
||||
function refine_check {
|
||||
if grep -i 'exception\|error' "${logfile}"; then
|
||||
error "log contains warnings!"
|
||||
else
|
||||
log "checked log file, all good!"
|
||||
fi
|
||||
}
|
||||
|
||||
function refine_stop {
|
||||
echo "stop OpenRefine server and print server load..."
|
||||
refine_stats
|
||||
echo
|
||||
refine_kill
|
||||
echo "check log for any warnings..."
|
||||
refine_check
|
||||
}
|
||||
|
||||
function refine_csrf {
|
||||
# get CSRF token (introduced in OpenRefine 3.3)
|
||||
if [[ "${csrf}" = true ]]; then
|
||||
local response
|
||||
response=$(curl -fs "${endpoint}/command/core/get-csrf-token")
|
||||
if [[ "${response}" != '{"token":"'* ]]; then
|
||||
error "getting CSRF token failed!"
|
||||
else
|
||||
echo "?csrf_token=$(echo "$response" | cut -d \" -f 4)"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
function refine_store {
|
||||
# check and store project id from import in associative array projects
|
||||
if [[ $# = 2 ]]; then
|
||||
projects[$1]=$(cut -d '=' -f 2 "$2")
|
||||
else
|
||||
error "invalid arguments supplied to import function!"
|
||||
fi
|
||||
if [[ "${#projects[$1]}" != 13 ]]; then
|
||||
error "returned project id is not valid!"
|
||||
else
|
||||
rm "$2"
|
||||
fi
|
||||
# check if project contains at least one row (may be skipped to gain ~40ms)
|
||||
local rows
|
||||
rows=$(curl -fs --get \
|
||||
--data project="${projects[$1]}" \
|
||||
--data limit=0 \
|
||||
"${endpoint}/command/core/get-rows" \
|
||||
| tr "," "\n" | grep total | cut -d ":" -f 2)
|
||||
if [[ "$rows" = "0" ]]; then
|
||||
error "imported project contains 0 rows!"
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================ SCRIPT ENVIRONMENT ============================ #
|
||||
|
||||
function log {
|
||||
# log status message
|
||||
echo "$(date +%H:%M:%S.%3N) [ client] $1"
|
||||
}
|
||||
|
||||
function error {
|
||||
# log error message and exit
|
||||
echo 1>&2 "ERROR: $1"
|
||||
refine_kill; pkill -P $$; exit 1
|
||||
}
|
||||
|
||||
function monitor {
|
||||
# store pid of last execution
|
||||
pids[$1]="$!"
|
||||
}
|
||||
|
||||
function monitoring {
|
||||
# wait for stored pids, remove them from array and check log for errors
|
||||
for pid in "${!pids[@]}"; do
|
||||
wait "${pids[$pid]}" \
|
||||
|| error "${pid} (${projects[$pid]}) failed!" \
|
||||
&& unset pids["$pid"]
|
||||
done
|
||||
refine_check
|
||||
}
|
||||
|
||||
function checkpoint {
|
||||
# store timestamp in associative array checkpoints and print checkpoint
|
||||
checkpoints[$1]=$(date +%s.%3N)
|
||||
printf '%*.*s %s %*.*s\n' \
|
||||
0 "$(((80-2-${#1})/2))" "$(printf '%0.1s' ={1..40})" \
|
||||
"${#checkpoints[@]}. $1" \
|
||||
0 "$(((80-1-${#1})/2))" "$(printf '%0.1s' ={1..40})"
|
||||
}
|
||||
|
||||
function checkpoint_stats {
|
||||
# calculate run time based on checkpoints
|
||||
local k keys values i diffsec
|
||||
echo "starting time and run time (hh:mm:ss) of each step..."
|
||||
# sort keys by value and store in array key
|
||||
readarray -t keys < <(
|
||||
for k in "${!checkpoints[@]}"; do
|
||||
echo "${checkpoints[$k]}:::$k"
|
||||
done | sort | awk -F::: '{print $2}')
|
||||
# remove milliseconds from corresponding values and store in array values
|
||||
readarray -t values < <(
|
||||
for k in "${keys[@]}" ; do
|
||||
echo "${checkpoints[$k]%.*}"
|
||||
done)
|
||||
# add final timestamp for calculation
|
||||
values+=("$(date +%s)")
|
||||
# calculate and print run time for each step
|
||||
for i in "${!keys[@]}"; do
|
||||
diffsec=$(( values[$((i + 1))] - values[i] ))
|
||||
printf "%35s %s %s %s\n" "${keys[$i]}" "($((i + 1)))" \
|
||||
"$(date -d @"${values[$i]}")" \
|
||||
"($(date -d @${diffsec} -u +%H:%M:%S))"
|
||||
done
|
||||
# calculate and print total run time
|
||||
diffsec=$(( values[${#keys[@]}] - values[0] ))
|
||||
printf "%80s\n%80s\n" "----------" "($(date -d @${diffsec} -u +%H:%M:%S))"
|
||||
}
|
||||
|
||||
function count_output {
|
||||
# word count on all files in workdir
|
||||
echo "files (number of lines / size in bytes) in ${workdir}..."
|
||||
(cd "${workdir}" && wc -c -l ./*)
|
||||
}
|
||||
|
||||
function init {
|
||||
# check requirements and download software if necessary
|
||||
requirements
|
||||
# set trap, create directories and tee to log file
|
||||
trap 'error "script interrupted!"' HUP INT QUIT TERM
|
||||
mkdir -p "${workdir}" "$(dirname "${logfile}")"
|
||||
exec &> >(tee -i -a "${logfile}")
|
||||
}
|
|
@ -0,0 +1,204 @@
|
|||
version: '3'
|
||||
|
||||
tasks:
|
||||
main:
|
||||
desc: Konvertierung von BIBLIOTHECA nach PICA3/CSV
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name
|
||||
cmds:
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: bautzen}
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: breitenbrunn}
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: dresden}
|
||||
- task: refine-pre
|
||||
vars: {PROJECT: glauchau}
|
||||
# - task: refine-pre
|
||||
# vars: {PROJECT: plauen}
|
||||
- task: refine-main
|
||||
|
||||
refine-pre:
|
||||
dir: ./{{.DIR}}
|
||||
label: '{{.TASK}}-{{.PROJECT}}'
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
PORT: 3334 # assign a different port for each project
|
||||
RAM: 8192M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # Import
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/{{.PROJECT}}.imp)"
|
||||
--encoding ISO-8859-1
|
||||
--ignoreLines 1
|
||||
--storeBlankRows false
|
||||
--projectName {{.PROJECT}}
|
||||
> {{.LOG}}
|
||||
- > # spec_Z_03: Makulierte Medien löschen; löscht alle Titel und deren Exemplare, die nur makulierte Ex. enthalten; löscht dann alle verbliebenen makulierten Ex.
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/makuliert.json > {{.LOG}}
|
||||
- > # ACQ Datensätze löschen; löscht alle Titel und deren Exemplare, die das Kennzeichen ACQ enthalten; löscht dann alle verbliebenen Exemplare mit Kennzeichen ACQ
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/acq.json > {{.LOG}}
|
||||
- > # Mehrzeilige Inhalte extrahieren
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/mehrzeiliges-extrahieren.json > {{.LOG}}
|
||||
- > # Leerzeilen löschen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/leerzeilen.json > {{.LOG}}
|
||||
- > # Felder und Werte aufteilen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/feld-wert-separieren.json > {{.LOG}}
|
||||
- > # Mehrzeilige Inhalte (mit #) zusammenführen; Trennzeichen: U+241F
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/mehrzeiliges-zusammen.json > {{.LOG}}
|
||||
- > # Feldnamen um M| oder E| ergänzen, weil gleiche Feldnamen in Medien und Exemplaren vorkommen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/feldname-prefix.json > {{.LOG}}
|
||||
- > # Mehrfachbelegungen zusammenführen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/mehrfachbelegungen.json > {{.LOG}}
|
||||
- > # Titeldaten-Felder mit Zahlen löschen (außer 025z 026 026k 052 076b 076d)
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/reduzieren.json > {{.LOG}}
|
||||
- > # Transponieren
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/pre/transponieren.json > {{.LOG}}
|
||||
- mkdir -p output
|
||||
- > # Export
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
--output "$(readlink -m output/{{.PROJECT}}.tsv)"
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
PID="$(lsof -t -i:{{.PORT}})"
|
||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||
- task: :kill # shut down OpenRefine immediately to save time and disk space
|
||||
vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}'}
|
||||
- task: :check # check OpenRefine log for any warnings and exit on error
|
||||
vars: {DIR: '{{.DIR}}'}
|
||||
sources:
|
||||
- Taskfile.yml
|
||||
- input/{{.PROJECT}}.imp
|
||||
- config/pre/**
|
||||
generates:
|
||||
- output/{{.PROJECT}}.tsv
|
||||
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
|
||||
|
||||
refine-main:
|
||||
dir: ./{{.DIR}}
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
PROJECT: bibliotheca
|
||||
PORT: 3334 # assign a different port for each project
|
||||
RAM: 8192M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # Zip-Archiv mit Output der Vorverarbeitung erstellen
|
||||
zip -j tmp.zip
|
||||
output/bautzen.tsv
|
||||
output/breitenbrunn.tsv
|
||||
output/dresden.tsv
|
||||
output/glauchau.tsv
|
||||
# output/plauen.tsv
|
||||
- > # Import Zip-Archiv
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m tmp.zip)"
|
||||
--format tsv
|
||||
--includeFileSources true
|
||||
--projectName {{.PROJECT}}
|
||||
> {{.LOG}}
|
||||
&& rm tmp.zip
|
||||
- > # Spalten sortieren: Beginnen mit 1. M|MEDNR, 2. E|EXNR, 3. File, damit Records-Mode erhalten bleibt
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}}
|
||||
- > # spec_Z_01: E-Books löschen (Bautzen)
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/ebooks.json > {{.LOG}}
|
||||
- > # spec_Z_02: Zeitschriften und Teile von MTM löschen; siehe auch Spezifikation in CBS-Titeldaten Bibliotheca
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/zeitschriften.json > {{.LOG}}
|
||||
- > # Bibliothekskürzel aus Import-Dateiname
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
|
||||
- > # spec_B_T_01: PPNs in 0100 (K10plus) und 0110 (SWB); 8-stellige aus Dresden sind SWN ohne Prüfziffer, dort wird Prüfziffer ergänzt; Zuordnung 9-stellige abhängig von ersten Zeichen und M026 / M026k; Zuordnung 10-stellige abhängig von erstem Zeichen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0100-0110.json > {{.LOG}}
|
||||
- > # spec_B_T_49: Nummern aus Datenkonversion 2199
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/2199.json > {{.LOG}}
|
||||
- > # spec_B_E_15: Abteilungsnummer 7100j
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100j.json > {{.LOG}}
|
||||
- > # spec_B_E_13, spec_Z_03 und spec_B_E_08: Zweigstelle 7100f
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100f.json > {{.LOG}}
|
||||
- > # spec_B_E_07: Standort 7100a
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
|
||||
- > # spec_B_T_04, spec_B_T_05: ISBN 2000
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/2000.json > {{.LOG}}
|
||||
# TODO: ISMN in 2020
|
||||
- > # spec_B_E_10: Zugangsdatum E0XX
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/e0xx.json > {{.LOG}}
|
||||
- > # spec_B_E_14, spec_Z_03, spec_B_E16: Selektionsschlüssel E0XXb
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/e0xxb.json > {{.LOG}}
|
||||
# TODO: Selektionsschlüssel für Abschlussarbeiten
|
||||
- > # spec_B_T_56_1: Gattung/Status 0500 und Veröffentlichungsart 1140
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0500-1140.json > {{.LOG}}
|
||||
# TODO: ART = S
|
||||
- > # spec_B_T_56_2: F/f für Überordnungen 0500
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0500-ueber.json > {{.LOG}}
|
||||
- > # spec_B_T_56_3: Lax für Abschlussarbeiten 0500
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0500-lax.json > {{.LOG}}
|
||||
- > # spec_B_T_50, spec_B_T_51, spec_B_T_52, spec_B_T_56: IMD-Felder 0501a, 0501b, 0502a, 0502b, 0503a, 0503b, 0999
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/0501-0502-0503-0999.json > {{.LOG}}
|
||||
- > # spec_B_T_17: Haupttitel 4000a
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4000a.json > {{.LOG}}
|
||||
- > # spec_B_T_18: Titelzusatz 4000d
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4000d.json > {{.LOG}}
|
||||
- > # spec_B_T_20: Ausgabevermerk 4020a
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4020a.json > {{.LOG}}
|
||||
- > # spec_B_T_16: Verlagsname 4030n
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4030n.json > {{.LOG}}
|
||||
- > # spec_B_T_21: Erscheinungsort 4030p
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4030p.json > {{.LOG}}
|
||||
- > # spec_B_T_22: Umfang 4060a
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/4060a.json > {{.LOG}}
|
||||
- > # spec_B_E_02: Verbuchungsnummer 8200
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8200.json > {{.LOG}}
|
||||
- > # spec_B_T_02: Jahresangaben 1100a und 1100n; 1100a normiert mit zahlreichen Ersetzungen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/1100a-1100n.json > {{.LOG}}
|
||||
# TODO: Jahr (Ende) in Sortierform in 1100b
|
||||
- > # spec_B_E_01: Ausleihhinweis 8515; nur für Bautzen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8515.json > {{.LOG}}
|
||||
- > # spec_B_E_04, spec_B_E_05 und spec_B_E_08: Exemplarstatus 7100d
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100d.json > {{.LOG}}
|
||||
- > # spec_B_E_06: Mediengruppe 8011
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8011.json > {{.LOG}}
|
||||
- > # spec_B_E_11 und spec_B_E_12: Zugangsnummer 8100
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8100.json > {{.LOG}}
|
||||
- > # spec_B_T_03: Sprachcode 1500
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/1500.json > {{.LOG}}
|
||||
- > # spec_B_T_54: Text für Abschlussarbeiten 1131
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/1131.json > {{.LOG}}
|
||||
- > # spec_B_T_55: Text für Abschlussarbeiten 8600
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8600.json > {{.LOG}}
|
||||
# Die folgende Transformationsregel muss direkt vor dem Export stehen
|
||||
- > # Abschließend Titel ohne Exemplare löschen; Mehrteilige Monografien sollen stehen bleiben, daher nur: wenn BANDB und BANDN nicht leer
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/abschluss.json > {{.LOG}}
|
||||
- > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
|
||||
mkdir -p output &&
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
--output "$(readlink -m output/{{.PROJECT}}.csv)"
|
||||
--template "$(< config/main/template.txt)"
|
||||
--rowSeparator ""
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
PID="$(lsof -t -i:{{.PORT}})"
|
||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||||
- task: :check # check OpenRefine log for any warnings and exit on error
|
||||
vars: {DIR: '{{.DIR}}'}
|
||||
sources:
|
||||
- Taskfile.yml
|
||||
- output/*.tsv
|
||||
- config/main/**
|
||||
generates:
|
||||
- log/{{.PROJECT}}.openrefine.tar.gz
|
||||
- output/{{.PROJECT}}.csv
|
||||
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
|
||||
|
||||
default: # enable standalone execution (running `task` in project directory)
|
||||
cmds:
|
||||
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"
|
|
@ -0,0 +1,719 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 8,
|
||||
"l": "8"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|IDNR",
|
||||
"expression": "grel:value + with(11 - mod(sum(forRange(0,9,1,i,toNumber(value[i])*(9-i))),11),pz,if(pz == 11, '0', if(pz == 10, 'X', pz)))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "0110",
|
||||
"columnInsertIndex": 4
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 9,
|
||||
"l": "9"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value[0,2]",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "53",
|
||||
"l": "53"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "54",
|
||||
"l": "54"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "55",
|
||||
"l": "55"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "56",
|
||||
"l": "56"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "57",
|
||||
"l": "57"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "13",
|
||||
"l": "13"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "14",
|
||||
"l": "14"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "58",
|
||||
"l": "58"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "15",
|
||||
"l": "15"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "59",
|
||||
"l": "59"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "16",
|
||||
"l": "16"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "17",
|
||||
"l": "17"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "18",
|
||||
"l": "18"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "19",
|
||||
"l": "19"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "21",
|
||||
"l": "21"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "22",
|
||||
"l": "22"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "23",
|
||||
"l": "23"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "24",
|
||||
"l": "24"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|IDNR",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "0100",
|
||||
"columnInsertIndex": 4
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 9,
|
||||
"l": "9"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value[0,1]",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "6",
|
||||
"l": "6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "7",
|
||||
"l": "7"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "8",
|
||||
"l": "8"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:cells['M|IDNR'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 9,
|
||||
"l": "9"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value[0,2]",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "00",
|
||||
"l": "00"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "10",
|
||||
"l": "10"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:cells['M|IDNR'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 9,
|
||||
"l": "9"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "0100",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "0100",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "0110",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "0110",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|026",
|
||||
"expression": "grel:value[0,3]",
|
||||
"columnName": "M|026",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "GBV",
|
||||
"l": "GBV"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|026k",
|
||||
"expression": "grel:value == cells['M|IDNR'].value",
|
||||
"columnName": "M|026k",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:cells['M|IDNR'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 9,
|
||||
"l": "9"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "0100",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "0100",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "0110",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "0110",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|026",
|
||||
"expression": "grel:value[0,3]",
|
||||
"columnName": "M|026",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "HBZ",
|
||||
"l": "HBZ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "KXP",
|
||||
"l": "KXP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "OBV",
|
||||
"l": "OBV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DNB",
|
||||
"l": "DNB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "BVB",
|
||||
"l": "BVB"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|026k",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "M|026k",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:cells['M|IDNR'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 9,
|
||||
"l": "9"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "0100",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "0100",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "0110",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "0110",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:cells['M|IDNR'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 10,
|
||||
"l": "10"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value[0]",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "1",
|
||||
"l": "1"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:cells['M|IDNR'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value.length()",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": 10,
|
||||
"l": "10"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|IDNR",
|
||||
"expression": "grel:value[0]",
|
||||
"columnName": "M|IDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "9",
|
||||
"l": "9"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:cells['M|IDNR'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,158 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDGR",
|
||||
"expression": "value",
|
||||
"columnName": "M|MEDGR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BAC",
|
||||
"l": "BAC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DIP",
|
||||
"l": "DIP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DI",
|
||||
"l": "DI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "MA",
|
||||
"l": "MA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "BA",
|
||||
"l": "BA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "7100f",
|
||||
"expression": "value",
|
||||
"columnName": "7100f",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BB",
|
||||
"l": "BB"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "E0XXb",
|
||||
"expression": "grel:'d' + value[1,3]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDGR",
|
||||
"expression": "value",
|
||||
"columnName": "M|MEDGR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "DI",
|
||||
"l": "DI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "MA",
|
||||
"l": "MA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "BA",
|
||||
"l": "BA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "BAC",
|
||||
"l": "BAC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DIP",
|
||||
"l": "DIP"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "7100f",
|
||||
"expression": "value",
|
||||
"columnName": "7100f",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "EH",
|
||||
"l": "EH"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "EH-Theke",
|
||||
"l": "EH-Theke"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "E0XXb",
|
||||
"expression": "grel:'n' + value[1,3]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,34 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|NRPRE",
|
||||
"expression": "grel:forEach(value.cross('bibliotheca','M|MEDNR'),r,if(and(r.cells['File'].value == cells['File'].value,or(isNonBlank(cells['M|BANDB'].value),isNonBlank(cells['M|BANDN'].value))),'vorhanden','fehlt')).inArray('vorhanden')",
|
||||
"columnName": "M|NRPRE",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0500",
|
||||
"expression": "grel:if(isNonBlank(cells['M|HST'].value), value[0] + 'F' + value[2] ,value[0] + 'f' + value[2])",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,139 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDNR",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "M|MEDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|JAHR",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "1100n",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDNR",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "M|MEDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|JAHR",
|
||||
"expression": "grel:with(with(with(value.replace('[','').replace(']','').replace('(','').replace(')','').replace(' ','').replace('?','').replace('.','').replace('ca','').replace('c','').replace('ff',''),x,forNonBlank(x.split('/')[1],v,v,x)),y,y.split('-')[0]),z,if(and(z.length()==4,isNumeric(z)),z,if(z=='19XX','19XX',null))))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "1100a",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDNR",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "M|MEDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "1100a",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "1100a",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "1100a",
|
||||
"expression": "grel:if(cells['M|JAHR'].value.contains('19'),'19XX','20XX')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "M|JAHR",
|
||||
"columnName": "M|JAHR",
|
||||
"query": "-",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|JAHR",
|
||||
"expression": "grel:value.split('-')[1].replace('[','').replace(']','').replace('(','').replace(')','').replace(' ','').replace('?','').replace('.','')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "1100b",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,89 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDGR",
|
||||
"expression": "value",
|
||||
"columnName": "M|MEDGR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BAC",
|
||||
"l": "BAC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DI",
|
||||
"l": "DI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DIP",
|
||||
"l": "DIP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "MA",
|
||||
"l": "MA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "BA",
|
||||
"l": "BA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "7100f",
|
||||
"expression": "value",
|
||||
"columnName": "7100f",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BB",
|
||||
"l": "BB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "EH",
|
||||
"l": "EH"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "EH-Theke",
|
||||
"l": "EH-Theke"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "File",
|
||||
"expression": "grel:'Hochschulschrift'",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "1131",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,77 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|SPRA",
|
||||
"expression": "grel:forEach(value.split(/,|#|\\+|;/),v,forNonBlank(v.replace('.','').replace('-','').replace(' ','').\nreplace(/^arab$/,'ara').\nreplace(/^Arabisch$/,'ara').\nreplace(/^aram$/,'arc').\nreplace(/^daen$/,'dan').\nreplace(/^Deutsch$/,'ger').\nreplace(/^DEUTSCH$/,'ger').\nreplace(/^deutsch$/,'ger').\nreplace(/^dt$/,'ger').\nreplace(/^engl$/,'eng').\nreplace(/^Englisch$/,'eng').\nreplace(/^ENGLISCH$/,'eng').\nreplace(/^englisch$/,'eng').\nreplace(/^Finnisch$/,'fin').\nreplace(/^franz$/,'fre').\nreplace(/^Französisch$/,'fre').\nreplace(/^griech$/,'gre').\nreplace(/^hebr$/,'heb').\nreplace(/^hrv$/,'').\nreplace(/^ital$/,'ita').\nreplace(/^Italienisch$/,'ita').\nreplace(/^ITALIENISCH$/,'ita').\nreplace(/^Litauisch$/,'lit').\nreplace(/^n$/,'').\nreplace(/^Niederländisch$/,'dut').\nreplace(/^pers$/,'per').\nreplace(/^poln$/,'pol').\nreplace(/^Polnisch$/,'pol').\nreplace(/^polygl$/,'mul').\nreplace(/^portug$/,'por').\nreplace(/^Portugiesisch$/,'por').\nreplace(/^Portugisisch$/,'por').\nreplace(/^ru$/,'rus').\nreplace(/^Rumänisch$/,'rum').\nreplace(/^russ$/,'rus').\nreplace(/^Russisch$/,'rus').\nreplace(/^schwed$/,'swe').\nreplace(/^Schwedisch$/,'swe').\nreplace(/^slowak$/,'slo').\nreplace(/^sp$/,'spa').\nreplace(/^span$/,'spa').\nreplace(/^Spanisch$/,'spa').\nreplace(/^tschech$/,'cze').\nreplace(/^Tschechisch$/,'cze').\nreplace(/^tuerk$/,'tur').\nreplace(/^Türkisch$/,'tur').\nreplace(/^Ukrainisch$/,'ukr').\nreplace(/^ungar$/,'hun').\nreplace(/^Ungarisch$/,'hun')\n,x,x,null)).join('␟')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "1500",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "1500",
|
||||
"expression": "grel:forEachIndex(value.split('␟'),i,v,if(i != 0, if(inArray(value.split('␟')[0,i],v),null,v), v)).join('␟')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDNR",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "M|MEDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "1500",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "1500",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "1500",
|
||||
"expression": "grel:'und'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"baseColumnName": "M|ISBN",
|
||||
"expression": "grel:[ forNonBlank(cells['M|ISBN'].value,v,if(isNumeric(v[0]),v,null),null), forNonBlank(cells['M|ISBN2'].value,v,if(isNumeric(v[0]),v,null),null) ].uniques().join('␟').replace('-','').toUppercase()",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "2000",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|MEDNR",
|
||||
"expression": "grel:'BA' + cells['File'].value + value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "2199",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,65 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"baseColumnName": "M|HST",
|
||||
"expression": "grel:if(value.contains('¬'),with(value.split('¬'), v, v[0].trim() + ' @' + v[1].trim()),value)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "4000a",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "4000a",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "4000a",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "0500",
|
||||
"expression": "grel:value[1]",
|
||||
"columnName": "0500",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "a",
|
||||
"l": "a"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "4000a",
|
||||
"expression": "grel:'Titel fehlt'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|HSTZU",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "4000d",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|AUFL",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "4020a",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|VERL",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "4030n",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|VORT",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "4030p",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|UMF",
|
||||
"expression": "grel:value.split(' : ')[0]",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "4060a",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|STA1",
|
||||
"expression": "grel:value.replace('␟',' ').replace(/ +/,' ')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "7100a",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,878 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "H",
|
||||
"l": "H"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "I",
|
||||
"l": "I"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "T",
|
||||
"l": "T"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "U",
|
||||
"l": "U"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "V",
|
||||
"l": "V"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "v",
|
||||
"l": "v"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "Z",
|
||||
"l": "Z"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|EXSTA",
|
||||
"expression": "grel:'u'",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "7100d",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "A",
|
||||
"l": "A"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "PL",
|
||||
"l": "PL"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'z'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "A",
|
||||
"l": "A"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "DD",
|
||||
"l": "DD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'a'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "B",
|
||||
"l": "B"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'a'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "G",
|
||||
"l": "G"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'g'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "K",
|
||||
"l": "K"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'i'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "N",
|
||||
"l": "N"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'u'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "P",
|
||||
"l": "P"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BB",
|
||||
"l": "BB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "GC",
|
||||
"l": "GC"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'s'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "P",
|
||||
"l": "P"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "DD",
|
||||
"l": "DD"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "BZ",
|
||||
"l": "BZ"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "PL",
|
||||
"l": "PL"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'i'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "W",
|
||||
"l": "W"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "PL",
|
||||
"l": "PL"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'c'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "W",
|
||||
"l": "W"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "DD",
|
||||
"l": "DD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'z'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "W",
|
||||
"l": "W"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BB",
|
||||
"l": "BB"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'z'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "w",
|
||||
"l": "w"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BB",
|
||||
"l": "BB"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'z'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E",
|
||||
"l": "E"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|ESORG",
|
||||
"expression": "value",
|
||||
"columnName": "E|ESORG",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "P",
|
||||
"l": "P"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'i'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E",
|
||||
"l": "E"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|ESORG",
|
||||
"expression": "value",
|
||||
"columnName": "E|ESORG",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "K",
|
||||
"l": "K"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'u'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E",
|
||||
"l": "E"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|ESORG",
|
||||
"expression": "value",
|
||||
"columnName": "E|ESORG",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "W",
|
||||
"l": "W"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'c'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXSTA",
|
||||
"expression": "value",
|
||||
"columnName": "E|EXSTA",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E",
|
||||
"l": "E"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "7100d",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "7100d",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'u'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|STA2",
|
||||
"expression": "value",
|
||||
"columnName": "E|STA2",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "MAV",
|
||||
"l": "MAV"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "eFlex",
|
||||
"l": "eFlex"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "Verwaltung",
|
||||
"l": "Verwaltung"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "Tonwerkstatt",
|
||||
"l": "Tonwerkstatt"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "7100d",
|
||||
"expression": "grel:'i'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "File",
|
||||
"expression": "grel:with(if(value=='DD',forNonBlank(cells['E|ZWGST'].value,v,v,value),value),x,x.replace('BB','0002').replace('BZ','0001').replace('DD','0003').replace('EH','0008').replace('GC','0004').replace('PL','0007'))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "7100j",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|MEDGR",
|
||||
"expression": "grel:'MEDGR: ' + value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "8011",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,36 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|ZUNR",
|
||||
"expression": "grel:cells['File'].value + ' ' + value.replace('-','/')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "8100",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "E|ZUS",
|
||||
"columnName": "E|ZUS",
|
||||
"query": "Notation",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "8100",
|
||||
"expression": "grel:value + ' ' + cells['E|ZUS'].value.replace('Notation||','')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|BARCO",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "8200",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,34 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "File",
|
||||
"expression": "value",
|
||||
"columnName": "File",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BZ",
|
||||
"l": "BZ"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|AUHIN",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "8515",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,89 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDGR",
|
||||
"expression": "value",
|
||||
"columnName": "M|MEDGR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BAC",
|
||||
"l": "BAC"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DI",
|
||||
"l": "DI"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "DIP",
|
||||
"l": "DIP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "MA",
|
||||
"l": "MA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "BA",
|
||||
"l": "BA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "7100f",
|
||||
"expression": "value",
|
||||
"columnName": "7100f",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "BB",
|
||||
"l": "BB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "EH",
|
||||
"l": "EH"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "EH-Theke",
|
||||
"l": "EH-Theke"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "File",
|
||||
"expression": "grel:'LOKMAT: Lah'",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "8600",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,68 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E0XX",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "E0XX",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|BANDB",
|
||||
"expression": "grel:or(isNonBlank(cells['M|BANDB'].value), isNonBlank(cells['M|BANDN'].value))",
|
||||
"columnName": "M|BANDB",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"description": "Remove rows"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,34 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXNR",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "E|EXNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|ZUDAT",
|
||||
"expression": "grel:forNonBlank(value,v,v[0,2] + '-' + v[3,5] + '-' + v[8,10],'22-07-20')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E0XX",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,34 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "E|EXNR",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "E|EXNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "File",
|
||||
"expression": "grel:with(if(value=='DD',forNonBlank(cells['E|ZWGST'].value,v,v,value),value),x,'n'+x.toLowercase())",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "E0XXb",
|
||||
"columnInsertIndex": 3
|
||||
}
|
||||
]
|
|
@ -0,0 +1,29 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDGR",
|
||||
"expression": "value",
|
||||
"columnName": "M|MEDGR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "eBook",
|
||||
"l": "eBook"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "File",
|
||||
"expression": "grel:with([ ['bautzen.tsv','BZ'], ['breitenbrunn.tsv','BB'], ['dresden.tsv','DD'], ['glauchau.tsv','GC'], ['plauen.tsv','PL'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,17 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "File",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "E|EXNR",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|MEDNR",
|
||||
"index": 0
|
||||
}
|
||||
]
|
|
@ -0,0 +1,87 @@
|
|||
{{
|
||||
with(
|
||||
[
|
||||
'2199',
|
||||
'0100',
|
||||
'0110',
|
||||
'0500',
|
||||
'0501a',
|
||||
'0501b',
|
||||
'0502a',
|
||||
'0502b',
|
||||
'0503a',
|
||||
'0503b',
|
||||
'0999',
|
||||
'1100a',
|
||||
'1100b',
|
||||
'1100n',
|
||||
'1131',
|
||||
'1140',
|
||||
'1500',
|
||||
'2000',
|
||||
'4000a',
|
||||
'4000d',
|
||||
'4020a',
|
||||
'4030n',
|
||||
'4030p',
|
||||
'4060a',
|
||||
'7100j',
|
||||
'7100f',
|
||||
'7100a',
|
||||
'7100d',
|
||||
'8011',
|
||||
'8100',
|
||||
'8200',
|
||||
'8515',
|
||||
'8600',
|
||||
'E0XX',
|
||||
'E0XXb'
|
||||
],
|
||||
columns,
|
||||
if(
|
||||
row.index == 0,
|
||||
forEach(
|
||||
columns,
|
||||
cn,
|
||||
cn.escape('csv')
|
||||
).join(',')
|
||||
+ '\n'
|
||||
+ with(
|
||||
forEach(
|
||||
columns,
|
||||
cn,
|
||||
forNonBlank(
|
||||
cells[cn].value,
|
||||
v,
|
||||
v.escape('csv'),
|
||||
'␀'
|
||||
)
|
||||
).join(',').replace('␀',''),
|
||||
r,
|
||||
if(
|
||||
isNonBlank(r.split(',').join(',')),
|
||||
r + '\n',
|
||||
''
|
||||
)
|
||||
),
|
||||
with(
|
||||
forEach(
|
||||
columns,
|
||||
cn,
|
||||
forNonBlank(
|
||||
cells[cn].value,
|
||||
v,
|
||||
v.escape('csv'),
|
||||
'␀'
|
||||
)
|
||||
).join(',').replace('␀',''),
|
||||
r,
|
||||
if(
|
||||
isNonBlank(r.split(',').join(',')),
|
||||
r + '\n',
|
||||
''
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
}}
|
|
@ -0,0 +1,165 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|BANDN",
|
||||
"expression": "value",
|
||||
"columnName": "M|BANDN",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "0",
|
||||
"l": "0"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "M|BANDN",
|
||||
"expression": "grel:null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|ART",
|
||||
"expression": "value",
|
||||
"columnName": "M|ART",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "GH",
|
||||
"l": "GH"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "Z",
|
||||
"l": "Z"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|ART",
|
||||
"expression": "value",
|
||||
"columnName": "M|ART",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "G",
|
||||
"l": "G"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|UART",
|
||||
"expression": "value",
|
||||
"columnName": "M|UART",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "R",
|
||||
"l": "R"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "Z",
|
||||
"l": "Z"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|ART",
|
||||
"expression": "value",
|
||||
"columnName": "M|ART",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "G",
|
||||
"l": "G"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "M|MEDNR",
|
||||
"expression": "grel:forEach(value.cross('bibliotheca','M|NRPRE'),r,if(and(r.cells['File'].value == cells['File'].value, or(isNonBlank(r.cells['M|BANDB'].value),isNonBlank(r.cells['M|BANDN'].value))),'vorhanden','fehlt')).inArray('vorhanden')",
|
||||
"columnName": "M|MEDNR",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": true
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,99 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********M",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"baseColumnName": "Column 1",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "tmp",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "tmp",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/MEKZ ./).uniques().join(''),v,v),null)",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "MEKZ ACQ",
|
||||
"l": "MEKZ ACQ"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********E",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "tmp",
|
||||
"expression": "grel:cells['Column 1'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "MEKZ ACQ",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,51 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "value",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "value",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "value",
|
||||
"expression": "grel:cells['Column 1'].value.slice(9)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:value[3,8]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Column 1",
|
||||
"newColumnName": "key"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,85 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "key",
|
||||
"expression": "value",
|
||||
"columnName": "key",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "*****",
|
||||
"l": "*****"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "value",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "typ",
|
||||
"columnInsertIndex": 2
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "typ"
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "key",
|
||||
"expression": "value",
|
||||
"columnName": "key",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "*****",
|
||||
"l": "*****"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "key",
|
||||
"expression": "grel:cells['typ'].value + '|' + value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "typ"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,29 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Blank Rows",
|
||||
"expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
|
||||
"columnName": "",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "true",
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,99 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********M",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"baseColumnName": "Column 1",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "tmp",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "tmp",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/EXSTA ./).uniques().join(''),v,v),null)",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "EXSTA M",
|
||||
"l": "EXSTA M"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********E",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "tmp",
|
||||
"expression": "grel:cells['Column 1'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "EXSTA M",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,16 @@
|
|||
[
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "key"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "value",
|
||||
"keyColumnName": "key",
|
||||
"separator": "␟"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,46 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "^\\*",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": true
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "Column 1",
|
||||
"expression": "grel:value.slice(1)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "value",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "^\\*",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": true
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
|
@ -0,0 +1,8 @@
|
|||
[
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "value",
|
||||
"keyColumnName": "key",
|
||||
"separator": "␟"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,29 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "key",
|
||||
"expression": "grel:and(isNumeric(value[2,4].trim()), not(or(value[2,6] == '025z', value[2,6] == '026 ', value[2,6] == '026k', value[2,6] == '052 ', value[2,6] == '076b', value[2,6] == '076d')))",
|
||||
"columnName": "key",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
|
@ -0,0 +1,8 @@
|
|||
[
|
||||
{
|
||||
"op": "core/key-value-columnize",
|
||||
"keyColumnName": "key",
|
||||
"valueColumnName": "value",
|
||||
"noteColumnName": ""
|
||||
}
|
||||
]
|
26
main.sh
26
main.sh
|
@ -1,26 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Scripte zur Transformation von Bibliotheca und Alephino nach PICA+
|
||||
|
||||
# check and install requirements for bash-refine
|
||||
source "${BASH_SOURCE%/*}/bash-refine.sh" || exit 1
|
||||
requirements
|
||||
|
||||
# download task runner
|
||||
task="$(readlink -m "${BASH_SOURCE%/*}/lib/task")"
|
||||
if [[ -z "$(readlink -e "${task}")" ]]; then
|
||||
echo "Download task..."
|
||||
mkdir -p "$(dirname "${task}")"
|
||||
curl -L --output task.tar.gz \
|
||||
"https://github.com/go-task/task/releases/download/v3.0.0/task_linux_amd64.tar.gz"
|
||||
tar -xzf task.tar.gz -C "$(dirname "${task}")" task --totals
|
||||
rm -f task.tar.gz
|
||||
fi
|
||||
|
||||
# make script executable from another directory
|
||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||
|
||||
# create folders
|
||||
"${task}" mkdir
|
||||
|
||||
# execute default task (cf. Taskfile.yml)
|
||||
"${task}"
|
|
@ -0,0 +1,78 @@
|
|||
version: '3'
|
||||
|
||||
tasks:
|
||||
main:
|
||||
desc: PICA3/CSV aus Bibliotheca und Alephino zusammenführen, Exemplare clustern, anreichern und in PICA+ konvertieren
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
deps:
|
||||
- task: :alephino:main
|
||||
- task: :bibliotheca:main
|
||||
cmds:
|
||||
- task: refine
|
||||
|
||||
refine:
|
||||
dir: ./{{.DIR}}
|
||||
vars:
|
||||
DIR: '{{splitList ":" .TASK | first}}'
|
||||
PROJECT: pica+
|
||||
PORT: 3334 # assign a different port for each project
|
||||
RAM: 8192M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}"
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # Zip-Archiv mit Output der vorigen Tasks erstellen
|
||||
zip -j tmp.zip
|
||||
../bibliotheca/output/bibliotheca.csv
|
||||
# ../alephino/output/alephino.csv
|
||||
- > # Import ZIP-Archiv
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m tmp.zip)"
|
||||
--format csv
|
||||
--includeFileSources false
|
||||
--projectName {{.PROJECT}}
|
||||
> {{.LOG}}
|
||||
&& rm tmp.zip
|
||||
- > # spec_Z_04: PPN anreichern über ISBN
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/ppn.json > {{.LOG}}
|
||||
- > # spec_Z_05: Exemplare clustern
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/clustern.json > {{.LOG}}
|
||||
- mkdir -p output
|
||||
- > # Export dubletter Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
--output "$(readlink -m output/barcodes.txt)"
|
||||
--template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}"
|
||||
--rowSeparator ""
|
||||
> {{.LOG}}
|
||||
- > # spec_Z_06: Dublette Barcodes löschen
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/dedup.json > {{.LOG}}
|
||||
- > # Export als PICA+
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
--output "$(readlink -m output/{{.PROJECT}}.txt)"
|
||||
--template "$(< config/template.txt)"
|
||||
--rowSeparator ""
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
PID="$(lsof -t -i:{{.PORT}})"
|
||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
||||
vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||||
- task: :check # check OpenRefine log for any warnings and exit on error
|
||||
vars: {DIR: '{{.DIR}}'}
|
||||
sources:
|
||||
- Taskfile.yml
|
||||
# - ../alephino/output/alephino.csv
|
||||
- ../bibliotheca/output/bibliotheca.csv
|
||||
- config/**
|
||||
generates:
|
||||
- log/{{.PROJECT}}.openrefine.tar.gz
|
||||
- output/**
|
||||
ignore_error: true # workaround to avoid an orphaned Java process on error
|
||||
# https://github.com/go-task/task/issues/141
|
||||
|
||||
default: # enable standalone execution (running `task` in project directory)
|
||||
cmds:
|
||||
- DIR="${PWD##*/}:main" && cd .. && task "$DIR"
|
|
@ -0,0 +1,138 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "2199",
|
||||
"expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "ppn",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "ppn",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "ppn",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ppn",
|
||||
"expression": "grel:row.record.cells[columnName].value[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/row-reorder",
|
||||
"mode": "record-based",
|
||||
"sorting": {
|
||||
"criteria": [
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "ppn",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "ppn",
|
||||
"expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 0
|
||||
},
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"columnName": "2199",
|
||||
"expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "ppn"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,35 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "8200",
|
||||
"expression": "facetCount(value, 'value', '8200') > 1",
|
||||
"columnName": "8200",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "8200",
|
||||
"expression": "null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column 8200 using expression null"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,292 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "2000",
|
||||
"expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "tmp",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/column-split",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "tmp",
|
||||
"guessCellType": false,
|
||||
"removeOriginalColumn": true,
|
||||
"mode": "separator",
|
||||
"separator": "␟",
|
||||
"regex": false,
|
||||
"maxColumns": 0
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp 1"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp 2"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,33 @@
|
|||
{{
|
||||
if(row.index - row.record.fromRowIndex == 0,
|
||||
'' + '\n'
|
||||
+ forNonBlank(cells['0500'].value, v, '002@ ' + '0' + v + '\n', '')
|
||||
+ forNonBlank(cells['0501a'].value, v, '002C ' + 'a' + v + forNonBlank(cells['0501b'].value, v, 'b' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['0502a'].value, v, '002D ' + 'a' + v + forNonBlank(cells['0502b'].value, v, 'b' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['0503a'].value, v, '002E ' + 'a' + v + forNonBlank(cells['0503b'].value, v, 'b' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['0100'].value, v, '003@ ' + '0' + v + '\n', '')
|
||||
+ forNonBlank(cells['0110'].value, v, '003S ' + '0' + v + '\n', '')
|
||||
+ forNonBlank(cells['2000'].value, v, forEach(v.split('␟'),x,'004A ' + '0' + x + '\n').join(''), '')
|
||||
+ forNonBlank(cells['2199'].value, v, forEach(v.split('␟'),x,'006Y ' + '0' + x + '\n').join(''), '')
|
||||
+ forNonBlank(cells['1500'].value, v, '010@ ' + forEach(v.split('␟'),x,'a' + x).join('') + '\n', '')
|
||||
+ forNonBlank(cells['1100a'].value, v, '011@ ' + 'a' + v + forNonBlank(cells['1100b'].value, v, 'b' + v, '') + forNonBlank(cells['1100n'].value, v, 'n' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['1131'].value, v, '013D ' + 'a' + v + '\n', '')
|
||||
+ forNonBlank(cells['1140'].value, v, '013H ' + 'a' + v + '\n', '')
|
||||
+ forNonBlank(cells['4000a'].value, v, '021A ' + 'a' + v + forNonBlank(cells['4000d'].value, v, 'd' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['4020a'].value, v, '032@ ' + 'a' + v + '\n', '')
|
||||
+ if(or(isNonBlank(cells['4030n'].value),isNonBlank(cells['4030p'].value)),'033A ' + forNonBlank(cells['4030n'].value, v, 'n' + v,'') + forNonBlank(cells['4030p'].value, v, 'p' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['4060a'].value, v, '034D ' + 'a' + v + '\n', '')
|
||||
+ forNonBlank(cells['0999'].value, v, '046W ' + 'a' + v + '\n', '')
|
||||
,'')
|
||||
}}{{
|
||||
if(isNonBlank(cells['E0XXb'].value),
|
||||
with(with(rowIndex - row.record.fromRowIndex + 1, i, '00'[0,2-i.length()] + i),exnr,
|
||||
'208@/' + exnr + ' a' + cells['E0XX'].value + 'b' + cells['E0XXb'].value + '\n'
|
||||
+ '209A/' + exnr + ' b4736' + 'j' + cells['7100j'].value + 'f' + cells['7100f'].value + forNonBlank(cells['7100a'].value, v, 'a' + v, '') + forNonBlank(cells['7100d'].value, v, 'd' + v, '') + 'x00' + '\n'
|
||||
+ forNonBlank(cells['8011'].value, v, '209B/' + exnr + ' a' + v + 'x11' + '\n', '')
|
||||
+ forNonBlank(cells['8100'].value, v, '209C/' + exnr + ' a' + v + 'x00' + '\n', '')
|
||||
+ forNonBlank(cells['8200'].value, v, '209G/' + exnr + ' a' + v + '\n', '')
|
||||
+ forNonBlank(cells['8600'].value, v, '209O/' + exnr + ' a' + v + 'x00' + '\n', '')
|
||||
+ forNonBlank(cells['8515'].value, v, '220B/' + exnr + ' a' + v + '\n', '')
|
||||
), '')
|
||||
}}
|
|
@ -1,868 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Alephino Vorverarbeitung
|
||||
# - Exporte (Titel und Exemplare) von einer der Bibliotheken importieren
|
||||
# - in Tabellenformat umwandeln
|
||||
# - Exemplarinformationen an Titel anhängen
|
||||
# - als TSV exportieren
|
||||
|
||||
# =============================== ENVIRONMENT ================================ #
|
||||
|
||||
# source the main script
|
||||
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
|
||||
|
||||
# read input
|
||||
if [[ $2 ]]; then
|
||||
titel="$(basename "$1" .txt)"
|
||||
projects[$titel]="$(readlink -e "$1")"
|
||||
exemplare="$(basename "$2" .txt)"
|
||||
projects[$exemplare]="$(readlink -e "$2")"
|
||||
else
|
||||
echo 1>&2 "Please provide path to input files (1. Titel, 2. Exemplare)"; exit 1
|
||||
fi
|
||||
|
||||
# check requirements, set trap, create workdir and tee to logfile
|
||||
init
|
||||
|
||||
# ================================= STARTUP ================================== #
|
||||
|
||||
checkpoint "Startup"; echo
|
||||
|
||||
# print environment variables
|
||||
printenv | grep REFINE; echo
|
||||
|
||||
# start OpenRefine server
|
||||
refine_start; echo
|
||||
|
||||
# ================================== IMPORT ================================== #
|
||||
|
||||
checkpoint "Import"; echo
|
||||
|
||||
# Fixed-width text files
|
||||
# Columns: 5
|
||||
# Character encoding: UTF-8
|
||||
# Store blank rows deaktivieren
|
||||
|
||||
echo "import file" "${projects[$titel]}" "..."
|
||||
if curl -fs --write-out "%{redirect_url}\n" \
|
||||
--form project-file="@${projects[$titel]}" \
|
||||
--form project-name="${titel}" \
|
||||
--form format="text/line-based/fixed-width" \
|
||||
--form options='{
|
||||
"encoding":"UTF-8",
|
||||
"columnWidths":[5],
|
||||
"ignoreLines":-1,
|
||||
"headerLines":0,
|
||||
"skipDataLines":0,
|
||||
"limit":-1,
|
||||
"guessCellValueTypes":false,
|
||||
"storeBlankRows":false,
|
||||
"storeBlankCellsAsNulls":true,
|
||||
"includeFileSources":false
|
||||
}' \
|
||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||
> "${workdir}/${titel}.id"
|
||||
then
|
||||
log "imported ${projects[$titel]} as ${titel}"
|
||||
else
|
||||
error "import of ${projects[$titel]} failed!"
|
||||
fi
|
||||
refine_store "${titel}" "${workdir}/${titel}.id" || error "import of ${titel} failed!"
|
||||
echo
|
||||
|
||||
echo "import file" "${projects[$exemplare]}" "..."
|
||||
if curl -fs --write-out "%{redirect_url}\n" \
|
||||
--form project-file="@${projects[$exemplare]}" \
|
||||
--form project-name="${exemplare}" \
|
||||
--form format="text/line-based/fixed-width" \
|
||||
--form options='{
|
||||
"encoding":"UTF-8",
|
||||
"columnWidths":[5],
|
||||
"ignoreLines":-1,
|
||||
"headerLines":0,
|
||||
"skipDataLines":0,
|
||||
"limit":-1,
|
||||
"guessCellValueTypes":false,
|
||||
"storeBlankRows":false,
|
||||
"storeBlankCellsAsNulls":true,
|
||||
"includeFileSources":false
|
||||
}' \
|
||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||
> "${workdir}/${exemplare}.id"
|
||||
then
|
||||
log "imported ${projects[$exemplare]} as ${exemplare}"
|
||||
else
|
||||
error "import of ${projects[$exemplare]} failed!"
|
||||
fi
|
||||
refine_store "${exemplare}" "${workdir}/${exemplare}.id" || error "import of ${exemplare} failed!"
|
||||
echo
|
||||
|
||||
# ================================ TRANSFORM ================================= #
|
||||
|
||||
checkpoint "Transform"; echo
|
||||
|
||||
# --------------------------- Korrekturen Einzelfälle ------------------------ #
|
||||
|
||||
echo "Korrekturen Einzelfälle..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/mass-edit",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "value",
|
||||
"edits": [
|
||||
{
|
||||
"from": [
|
||||
"001st"
|
||||
],
|
||||
"fromBlank": false,
|
||||
"fromError": false,
|
||||
"to": "001"
|
||||
}
|
||||
],
|
||||
"description": "Mass edit cells in column Column 1"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
|
||||
# ----------------------- Feldnamen um M bzw. E ergänzen --------------------- #
|
||||
|
||||
echo "Feldnamen um M bzw. E ergänzen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:'M|' + value.replace(' ','')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column Column 1 using expression grel:'M|' + value.replace(' ','')"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
if curl -fs \
|
||||
--data project="${projects[$exemplare]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:'E|' + value.replace(' ','')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column Column 1 using expression grel:'E|' + value.replace(' ','')"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${exemplare} (${projects[$exemplare]})"
|
||||
else
|
||||
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# -------------------------------- Sortieren --------------------------------- #
|
||||
|
||||
echo "Datensätze und Feldnamen sortieren..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "M|IDN",
|
||||
"l": "M|IDN"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "Column 2",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "id",
|
||||
"index": 0,
|
||||
"description": "Move column id to position 0"
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id",
|
||||
"description": "Fill down cells in column id"
|
||||
},
|
||||
{
|
||||
"op": "core/row-reorder",
|
||||
"mode": "row-based",
|
||||
"sorting": {
|
||||
"criteria": [
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "id",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "Column 1",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Reorder rows"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "id",
|
||||
"description": "Remove column id"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
if curl -fs \
|
||||
--data project="${projects[$exemplare]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E|IDN",
|
||||
"l": "E|IDN"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "Column 2",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "id",
|
||||
"index": 0,
|
||||
"description": "Move column id to position 0"
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id",
|
||||
"description": "Fill down cells in column id"
|
||||
},
|
||||
{
|
||||
"op": "core/row-reorder",
|
||||
"mode": "row-based",
|
||||
"sorting": {
|
||||
"criteria": [
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "id",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
},
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "Column 1",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Reorder rows"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "id",
|
||||
"description": "Remove column id"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${exemplare} (${projects[$exemplare]})"
|
||||
else
|
||||
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# --------------------- Mehrfachbelegungen zusammenführen -------------------- #
|
||||
|
||||
# - Column 1 > Edit cells > Blank down
|
||||
# - Column 2 > Edit cells > join multi-valued cells... > ␟
|
||||
|
||||
echo "Mehrfachbelegungen zusammenführen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"description": "Blank down cells in column Column 1"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "Column 2",
|
||||
"keyColumnName": "Column 1",
|
||||
"separator": "␟",
|
||||
"description": "Join multi-valued cells in column Column 2"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
if curl -fs \
|
||||
--data project="${projects[$exemplare]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"description": "Blank down cells in column Column 1"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "Column 2",
|
||||
"keyColumnName": "Column 1",
|
||||
"separator": "␟",
|
||||
"description": "Join multi-valued cells in column Column 2"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${exemplare} (${projects[$exemplare]})"
|
||||
else
|
||||
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ---------------------- Nicht benötigte Felder löschen ---------------------- #
|
||||
|
||||
echo "Nicht benötigte Felder löschen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "M|025_",
|
||||
"l": "M|025_"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|025e",
|
||||
"l": "M|025e"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|004",
|
||||
"l": "M|004"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|011",
|
||||
"l": "M|011"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026_",
|
||||
"l": "M|026_"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026a",
|
||||
"l": "M|026a"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026d",
|
||||
"l": "M|026d"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|026g",
|
||||
"l": "M|026g"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|030",
|
||||
"l": "M|030"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|037z",
|
||||
"l": "M|037z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|038b",
|
||||
"l": "M|038b"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|070",
|
||||
"l": "M|070"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|073",
|
||||
"l": "M|073"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|076z",
|
||||
"l": "M|076z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|080",
|
||||
"l": "M|080"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|800s",
|
||||
"l": "M|800s"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|802",
|
||||
"l": "M|802"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "M|808b",
|
||||
"l": "M|808b"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "^M\\|9",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
if curl -fs \
|
||||
--data project="${projects[$exemplare]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "value",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "E|A02",
|
||||
"l": "E|A02"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|A86",
|
||||
"l": "E|A86"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|SUB",
|
||||
"l": "E|SUB"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|FMT",
|
||||
"l": "E|FMT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|CAT",
|
||||
"l": "E|CAT"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|027",
|
||||
"l": "E|027"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "E|123",
|
||||
"l": "E|123"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${exemplare} (${projects[$exemplare]})"
|
||||
else
|
||||
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
|
||||
# ------------------------------- Transponieren ------------------------------ #
|
||||
|
||||
# - Column 1 > Transpose > Columnize by key/value columns... > OK
|
||||
|
||||
echo "Transponieren..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/key-value-columnize",
|
||||
"keyColumnName": "Column 1",
|
||||
"valueColumnName": "Column 2",
|
||||
"noteColumnName": "",
|
||||
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
if curl -fs \
|
||||
--data project="${projects[$exemplare]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/key-value-columnize",
|
||||
"keyColumnName": "Column 1",
|
||||
"valueColumnName": "Column 2",
|
||||
"noteColumnName": "",
|
||||
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${exemplare} (${projects[$exemplare]})"
|
||||
else
|
||||
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ---------------------------- Titel-ID separieren --------------------------- #
|
||||
|
||||
echo "Titel-ID separieren..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "M|IDN",
|
||||
"expression": "grel:value.replace(/^0+/,'')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 12,
|
||||
"description": "Create column id at index 12 based on column M|IDN using expression grel:value.replace(/^0+/,'')"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
if curl -fs \
|
||||
--data project="${projects[$exemplare]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|BIB",
|
||||
"expression": "grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "titel_id",
|
||||
"columnInsertIndex": 18,
|
||||
"description": "Create column titel_id at index 18 based on column E|BIB using expression grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${exemplare} (${projects[$exemplare]})"
|
||||
else
|
||||
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ---------------------------- Exemplare anreichern -------------------------- #
|
||||
|
||||
echo "Exemplare anreichern..."
|
||||
columns=( "E|001" "E|002a" "E|003" "E|004" "E|027" "E|030" "E|050" "E|100" "E|115" "E|120" "E|123" "E|A02" "E|A72" "E|A73" "E|A87" "E|A91" "E|A95" "E|BIB" "E|CAT" "E|FMT" "E|IDN" "E|LDR" "E|STA" "E|SUB" "E|105" "E|107" "E|A94" "E|125" "E|072" "E|A98" "E|HOL" "E|A86" "E|A63" "E|A70" "E|A83" "E|A85" "E|ABO" "E|A97" "E|A82" "E|002" "E|ORD" )
|
||||
for column in "${columns[@]}"; do
|
||||
cat << JSON >> "${workdir}/${titel}.tmp"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"expression": "grel:forEach(value.cross('${exemplare}','titel_id'),r,forNonBlank(r.cells['${column}'].value,v,v,'')).join('␞')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "${column}",
|
||||
"columnInsertIndex": 13
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-split",
|
||||
"columnName": "${column}",
|
||||
"keyColumnName": "M|001",
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false
|
||||
}
|
||||
]
|
||||
JSON
|
||||
done
|
||||
if "${jq}" -s add "${workdir}/${titel}.tmp" | curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data-urlencode operations@- \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
|
||||
then
|
||||
log "transformed ${titel} (${projects[$titel]})"
|
||||
rm "${workdir}/${titel}.tmp"
|
||||
else
|
||||
error "transform ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== EXPORT ================================== #
|
||||
|
||||
checkpoint "Export"; echo
|
||||
|
||||
format="tsv"
|
||||
p="${titel%%-*}" # Projektname ohne Zusatz
|
||||
echo "export ${titel} to ${format} file..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$titel]}" \
|
||||
--data format="${format}" \
|
||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||
"${endpoint}/command/core/export-rows" \
|
||||
> "${workdir}/${p}.${format}"
|
||||
then
|
||||
log "exported ${titel} (${projects[$titel]}) to ${workdir}/${p}.${format}"
|
||||
else
|
||||
error "export of ${titel} (${projects[$titel]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== FINISH ================================== #
|
||||
|
||||
checkpoint "Finish"; echo
|
||||
|
||||
# stop OpenRefine server
|
||||
refine_stop; echo
|
||||
|
||||
# calculate run time based on checkpoints
|
||||
checkpoint_stats; echo
|
||||
|
||||
# word count on all files in workdir
|
||||
count_output
|
|
@ -1,767 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Bibliotheca Vorverarbeitung
|
||||
# - Export von einer der Bibliotheken importieren
|
||||
# - in Tabellenformat umwandeln
|
||||
# - als TSV exportieren
|
||||
|
||||
# =============================== ENVIRONMENT ================================ #
|
||||
|
||||
# source the main script
|
||||
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
|
||||
|
||||
# read input
|
||||
if [[ $1 ]]; then
|
||||
p="$(basename "$1" .imp)"
|
||||
projects[$p]="$(readlink -e "$1")"
|
||||
else
|
||||
echo 1>&2 "Please provide path to input file"; exit 1
|
||||
fi
|
||||
|
||||
# check requirements, set trap, create workdir and tee to logfile
|
||||
init
|
||||
|
||||
# ================================= STARTUP ================================== #
|
||||
|
||||
checkpoint "Startup"; echo
|
||||
|
||||
# print environment variables
|
||||
printenv | grep REFINE; echo
|
||||
|
||||
# start OpenRefine server
|
||||
refine_start; echo
|
||||
|
||||
# ================================== IMPORT ================================== #
|
||||
|
||||
checkpoint "Import"; echo
|
||||
|
||||
# Line-based text files
|
||||
# Character encoding: ISO-8859-1
|
||||
# Store blank rows deaktivieren
|
||||
# ignore first 1 line(s) at the beginning of file
|
||||
|
||||
echo "import file" "${projects[$p]}" "..."
|
||||
if curl -fs --write-out "%{redirect_url}\n" \
|
||||
--form project-file="@${projects[$p]}" \
|
||||
--form project-name="${p}" \
|
||||
--form format="line-based" \
|
||||
--form options='{
|
||||
"encoding": "ISO-8859-1",
|
||||
"storeBlankRows": "false",
|
||||
"ignoreLines": 1
|
||||
}' \
|
||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||
> "${workdir}/${p}.id"
|
||||
then
|
||||
log "imported ${projects[$p]} as ${p}"
|
||||
else
|
||||
error "import of ${projects[$p]} failed!"
|
||||
fi
|
||||
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
|
||||
echo
|
||||
|
||||
# ================================ TRANSFORM ================================= #
|
||||
|
||||
checkpoint "Transform"; echo
|
||||
|
||||
# ------------------------- Makulierte Medien löschen ------------------------ #
|
||||
|
||||
# spec_Z_03
|
||||
# löscht alle Titel und deren Exemplare, die nur makulierte Ex. enthalten
|
||||
# löscht dann alle verbliebenen makulierten Ex.
|
||||
|
||||
echo "Makulierte Medien löschen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********M",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"baseColumnName": "Column 1",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "tmp",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "tmp",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/EXSTA ./).uniques().join(''),v,v),null)",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "EXSTA M",
|
||||
"l": "EXSTA M"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********E",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "tmp",
|
||||
"expression": "grel:cells['Column 1'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "EXSTA M",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# -------------------------- ACQ Datensätze löschen -------------------------- #
|
||||
|
||||
# spec_Z_03
|
||||
# löscht alle Titel und deren Exemplare, die das Kennzeichen ACQ enthalten
|
||||
# löscht dann alle verbliebenen Exemplare mit Kennzeichen ACQ
|
||||
|
||||
echo "ACQ Datensätze löschen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********M",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"baseColumnName": "Column 1",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "tmp",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "tmp",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Column 1",
|
||||
"expression": "grel:if(isNonBlank(cells['tmp'].value),with(row.record.cells[columnName].value.join('').find(/MEKZ ./).uniques().join(''),v,v),null)",
|
||||
"columnName": "Column 1",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "MEKZ ACQ",
|
||||
"l": "MEKZ ACQ"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "*********E",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "tmp",
|
||||
"expression": "grel:cells['Column 1'].value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "MEKZ ACQ",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ---------------------- Mehrzeilige Inhalte extrahieren --------------------- #
|
||||
|
||||
# - Column 1 > Text filter > regular expression aktivieren > ^\* > invert
|
||||
# -- Column 1 > Edit column > Add column based on this column...
|
||||
# > value > value.slice(1)
|
||||
# -- Column 1 > Edit cells > Transform... > null
|
||||
|
||||
echo "Mehrzeilige Inhalte extrahieren..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "^\\*",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": true
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "Column 1",
|
||||
"expression": "grel:value.slice(1)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "value",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "Column 1",
|
||||
"columnName": "Column 1",
|
||||
"query": "^\\*",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": true
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ---------------------------- Leerzeilen löschen ---------------------------- #
|
||||
|
||||
# - All > Facet > Facet by blank > true
|
||||
# - All > Edit rows > Remove all matching rows
|
||||
|
||||
echo "Leerzeilen löschen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Blank Rows",
|
||||
"expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
|
||||
"columnName": "",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "true",
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
|
||||
# ------------------------ Felder und Werte aufteilen ------------------------ #
|
||||
|
||||
# - value > Facet > Customized facets > Facet by blank > true
|
||||
# -- value > Edit cells > Transform... > cells['Column 1'].value.slice(9)
|
||||
# - Column 1 > Edit cells.> Transform > value[3,8]
|
||||
# - Column 1 > Edit column > Rename this column > key
|
||||
|
||||
echo "Felder und Werte aufteilen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "value",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "value",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "value",
|
||||
"expression": "grel:cells['Column 1'].value.slice(9)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "Column 1",
|
||||
"expression": "grel:value[3,8]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Column 1",
|
||||
"newColumnName": "key"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
|
||||
# ----------------- Mehrzeilige Inhalte (mit #) zusammenführen --------------- #
|
||||
|
||||
# - value > Edit cells > Join multi-valued cells... > ␟
|
||||
# (das ist das Unicode-Zeichen U+241F)
|
||||
|
||||
echo "Mehrzeilige Inhalte (mit #) zusammenführen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "value",
|
||||
"keyColumnName": "key",
|
||||
"separator": "␟"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
|
||||
# ----------------------- Feldnamen um M oder E ergänzen --------------------- #
|
||||
|
||||
# - key > Facet > Text facet > *****
|
||||
# -- value > Edit column > Add column based on this column... > typ > value
|
||||
# - typ > Edit cells > Fill down
|
||||
# - key > Facet > Text facet > *****
|
||||
# -- All > Edit rows > Remove all matching rows
|
||||
# - key > Edit cells > Transform... > cells['typ'].value + '|' + value
|
||||
# - typ > Edit column > Remove this column
|
||||
|
||||
echo "Feldnamen um M oder E ergänzen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "key",
|
||||
"expression": "value",
|
||||
"columnName": "key",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "*****",
|
||||
"l": "*****"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "value",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "typ",
|
||||
"columnInsertIndex": 2
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "typ"
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "key",
|
||||
"expression": "value",
|
||||
"columnName": "key",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "*****",
|
||||
"l": "*****"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "key",
|
||||
"expression": "grel:cells['typ'].value + '|' + value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "typ"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# --------------------- Mehrfachbelegungen zusammenführen -------------------- #
|
||||
|
||||
# - key > Edit cells > Blank down
|
||||
# - value > Edit cells > join multi-valued cells... > ␟
|
||||
|
||||
echo "Mehrfachbelegungen zusammenführen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "key"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"columnName": "value",
|
||||
"keyColumnName": "key",
|
||||
"separator": "␟"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# -------------------- Titeldaten-Felder mit Zahlen löschen ------------------ #
|
||||
|
||||
# außer 025z 026 026k 052 076b 076d
|
||||
echo "Titeldaten-Felder mit Zahlen löschen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "key",
|
||||
"expression": "grel:and(isNumeric(value[2,4].trim()), not(or(value[2,6] == '025z', value[2,6] == '026 ', value[2,6] == '026k', value[2,6] == '052 ', value[2,6] == '076b', value[2,6] == '076d')))",
|
||||
"columnName": "key",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
}
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ------------------------------- Transponieren ------------------------------ #
|
||||
|
||||
# - key > Transpose > Columnize by key/value columns... > OK
|
||||
|
||||
echo "Transponieren..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/key-value-columnize",
|
||||
"keyColumnName": "key",
|
||||
"valueColumnName": "value",
|
||||
"noteColumnName": ""
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== EXPORT ================================== #
|
||||
|
||||
checkpoint "Export"; echo
|
||||
|
||||
format="tsv"
|
||||
echo "export ${p} to ${format} file..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data format="${format}" \
|
||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||
"${endpoint}/command/core/export-rows" \
|
||||
> "${workdir}/${p}.${format}"
|
||||
then
|
||||
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
|
||||
else
|
||||
error "export of ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== FINISH ================================== #
|
||||
|
||||
checkpoint "Finish"; echo
|
||||
|
||||
# stop OpenRefine server
|
||||
refine_stop; echo
|
||||
|
||||
# calculate run time based on checkpoints
|
||||
checkpoint_stats; echo
|
||||
|
||||
# word count on all files in workdir
|
||||
count_output
|
|
@ -1,198 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Alephino Hauptverarbeitung
|
||||
# - Datenbereinigungen
|
||||
# - Mapping auf PICA3
|
||||
# - PICA3 als CSV (via Template) exportieren
|
||||
|
||||
# =============================== ENVIRONMENT ================================ #
|
||||
|
||||
# source the main script
|
||||
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
|
||||
|
||||
# read input
|
||||
if [[ $1 ]]; then
|
||||
inputdir="$(readlink -e "$1")"
|
||||
else
|
||||
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
|
||||
fi
|
||||
|
||||
# check requirements, set trap, create workdir and tee to logfile
|
||||
init
|
||||
|
||||
# ================================= STARTUP ================================== #
|
||||
|
||||
checkpoint "Startup"; echo
|
||||
|
||||
# start OpenRefine server
|
||||
refine_start; echo
|
||||
|
||||
# ================================== IMPORT ================================== #
|
||||
|
||||
checkpoint "Import"; echo
|
||||
|
||||
# TSV-Exporte aller Einzelprojekte in ein Zip-Archiv packen
|
||||
zip -j "${workdir}/alephino.zip" "${inputdir}"/*.tsv
|
||||
projects["alephino"]="${workdir}/alephino.zip"
|
||||
|
||||
# Neues Projekt erstellen aus Zip-Archiv
|
||||
p="alephino"
|
||||
echo "import file" "${projects[$p]}" "..."
|
||||
if curl -fs --write-out "%{redirect_url}\n" \
|
||||
--form project-file="@${projects[$p]}" \
|
||||
--form project-name="${p}" \
|
||||
--form format="text/line-based/*sv" \
|
||||
--form options='{
|
||||
"encoding": "UTF-8",
|
||||
"includeFileSources": "true",
|
||||
"separator": "\t"
|
||||
}' \
|
||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||
> "${workdir}/${p}.id"
|
||||
then
|
||||
log "imported ${projects[$p]} as ${p}"
|
||||
else
|
||||
error "import of ${projects[$p]} failed!"
|
||||
fi
|
||||
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
|
||||
echo
|
||||
|
||||
# ================================ TRANSFORM ================================= #
|
||||
|
||||
checkpoint "Transform"; echo
|
||||
|
||||
# ----------------------------- Spalten sortieren ---------------------------- #
|
||||
|
||||
# damit Records-Mode erhalten bleibt
|
||||
|
||||
echo "Spalten sortieren: Beginnen mit 1. M|001, 2. E|001, 3. File..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "File",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "E|001",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|029",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|026f",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "M|IDN",
|
||||
"index": 0
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ------------------------------------ File ---------------------------------- #
|
||||
|
||||
echo "Bibliothekskürzel aus Import-Dateiname..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "File",
|
||||
"expression": "grel:with([ ['leipzig.tsv','LE'], ['riesa.tsv','RS'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ------------------------------------ 7100a ---------------------------------- #
|
||||
|
||||
# spec_A_E_01
|
||||
echo "Signatur..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "E|100",
|
||||
"expression": "grel:value.split('\u001f')[0].slice(1)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "7100a",
|
||||
"columnInsertIndex": 5
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== EXPORT ================================== #
|
||||
|
||||
checkpoint "Export"; echo
|
||||
|
||||
# Export des OpenRefine-Projekts für Tests
|
||||
format="openrefine.tar.gz"
|
||||
echo "export ${p} to ${format} file..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
"${endpoint}/command/core/export-project" \
|
||||
> "${workdir}/${p}.${format}"
|
||||
then
|
||||
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
|
||||
else
|
||||
error "export of ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== FINISH ================================== #
|
||||
|
||||
checkpoint "Finish"; echo
|
||||
|
||||
# stop OpenRefine server
|
||||
refine_stop; echo
|
||||
|
||||
# calculate run time based on checkpoints
|
||||
checkpoint_stats; echo
|
||||
|
||||
# word count on all files in workdir
|
||||
count_output
|
File diff suppressed because it is too large
Load Diff
|
@ -1,688 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Generierung PICA+
|
||||
# - PPNs anreichern und Exemplare clustern
|
||||
# - als PICA+ exportieren
|
||||
|
||||
# =============================== ENVIRONMENT ================================ #
|
||||
|
||||
# source the main script
|
||||
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
|
||||
|
||||
# read input
|
||||
if [[ $1 ]]; then
|
||||
inputdir1="$(readlink -e "$1")"
|
||||
else
|
||||
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
|
||||
fi
|
||||
#if [[ $2 ]]; then
|
||||
# inputdir2="$(readlink -e "$2")"
|
||||
#fi
|
||||
|
||||
# check requirements, set trap, create workdir and tee to logfile
|
||||
init
|
||||
|
||||
# ================================= STARTUP ================================== #
|
||||
|
||||
checkpoint "Startup"; echo
|
||||
|
||||
# start OpenRefine server
|
||||
refine_start; echo
|
||||
|
||||
# ================================== IMPORT ================================== #
|
||||
|
||||
checkpoint "Import"; echo
|
||||
|
||||
# TODO: Zusammenführung mit Alephino
|
||||
zip -j "${workdir}/ba-sachsen.zip" "${inputdir1}"/*.csv
|
||||
projects["ba-sachsen"]="${workdir}/ba-sachsen.zip"
|
||||
|
||||
# Neues Projekt erstellen aus Zip-Archiv
|
||||
p="ba-sachsen"
|
||||
echo "import file" "${projects[$p]}" "..."
|
||||
if curl -fs --write-out "%{redirect_url}\n" \
|
||||
--form project-file="@${projects[$p]}" \
|
||||
--form project-name="${p}" \
|
||||
--form format="text/line-based/*sv" \
|
||||
--form options='{
|
||||
"encoding": "UTF-8",
|
||||
"includeFileSources": "false",
|
||||
"separator": ","
|
||||
}' \
|
||||
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
||||
> "${workdir}/${p}.id"
|
||||
then
|
||||
log "imported ${projects[$p]} as ${p}"
|
||||
else
|
||||
error "import of ${projects[$p]} failed!"
|
||||
fi
|
||||
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
|
||||
echo
|
||||
|
||||
# ================================ TRANSFORM ================================= #
|
||||
|
||||
checkpoint "Transform"; echo
|
||||
|
||||
# -------------------------- PPN anreichern über ISBN ------------------------ #
|
||||
|
||||
# spec_Z_04
|
||||
echo "PPN anreichern über ISBN..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "2000",
|
||||
"expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "tmp",
|
||||
"columnInsertIndex": 3
|
||||
},
|
||||
{
|
||||
"op": "core/column-split",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "tmp",
|
||||
"guessCellType": false,
|
||||
"removeOriginalColumn": true,
|
||||
"mode": "separator",
|
||||
"separator": "␟",
|
||||
"regex": false,
|
||||
"maxColumns": 0
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0100",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 1'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "0110",
|
||||
"expression": "grel:forEach(cells['tmp 2'].value.cross('ba-sachsen','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp 1"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "tmp 2"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ----------------------------- Exemplare clustern --------------------------- #
|
||||
|
||||
# spec_Z_05
|
||||
echo "Exemplare clustern..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "2199",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "2199",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "2199",
|
||||
"expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "ppn",
|
||||
"columnInsertIndex": 1
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "ppn",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "ppn",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ppn",
|
||||
"expression": "grel:row.record.cells[columnName].value[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/row-reorder",
|
||||
"mode": "record-based",
|
||||
"sorting": {
|
||||
"criteria": [
|
||||
{
|
||||
"valueType": "string",
|
||||
"column": "ppn",
|
||||
"blankPosition": 2,
|
||||
"errorPosition": 1,
|
||||
"reverse": false,
|
||||
"caseSensitive": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "ppn",
|
||||
"expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "id",
|
||||
"columnInsertIndex": 0
|
||||
},
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"columnName": "2199",
|
||||
"expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "ppn"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ------------------------- Dublette Barcodes löschen ------------------------ #
|
||||
|
||||
# spec_Z_06
|
||||
format="txt"
|
||||
echo "Dublette Barcodes exportieren"
|
||||
IFS= read -r -d '' template << "TEMPLATE"
|
||||
{{
|
||||
forNonBlank(cells['8200'].value, v, v + '\n', '')
|
||||
}}
|
||||
TEMPLATE
|
||||
if echo "${template}" | head -c -2 | curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data format="template" \
|
||||
--data prefix="" \
|
||||
--data suffix="" \
|
||||
--data separator="" \
|
||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||
--data-urlencode template@- \
|
||||
"${endpoint}/command/core/export-rows" \
|
||||
> "${workdir}/barcodes.${format}"
|
||||
then
|
||||
log "exported ${p} (${projects[$p]}) to ${workdir}/barcodes.${format}"
|
||||
else
|
||||
error "export of ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
echo "Dublette Barcodes löschen..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data-urlencode "operations@-" \
|
||||
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
||||
<< "JSON"
|
||||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "8200",
|
||||
"expression": "facetCount(value, 'value', '8200') > 1",
|
||||
"columnName": "8200",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "8200",
|
||||
"expression": "null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column 8200 using expression null"
|
||||
}
|
||||
]
|
||||
JSON
|
||||
then
|
||||
log "transformed ${p} (${projects[$p]})"
|
||||
else
|
||||
error "transform ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== EXPORT ================================== #
|
||||
|
||||
checkpoint "Export"; echo
|
||||
|
||||
# Export des OpenRefine-Projekts für Tests
|
||||
format="openrefine.tar.gz"
|
||||
echo "export ${p} to ${format} file..."
|
||||
if curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
"${endpoint}/command/core/export-project" \
|
||||
> "${workdir}/${p}.${format}"
|
||||
then
|
||||
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
|
||||
else
|
||||
error "export of ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# Export in PICA+
|
||||
format="pic"
|
||||
echo "export ${p} to pica+ file using template..."
|
||||
IFS= read -r -d '' template << "TEMPLATE"
|
||||
{{
|
||||
if(row.index - row.record.fromRowIndex == 0,
|
||||
'' + '\n'
|
||||
+ forNonBlank(cells['0500'].value, v, '002@ ' + '0' + v + '\n', '')
|
||||
+ forNonBlank(cells['0501a'].value, v, '002C ' + 'a' + v + forNonBlank(cells['0501b'].value, v, 'b' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['0502a'].value, v, '002D ' + 'a' + v + forNonBlank(cells['0502b'].value, v, 'b' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['0503a'].value, v, '002E ' + 'a' + v + forNonBlank(cells['0503b'].value, v, 'b' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['0100'].value, v, '003@ ' + '0' + v + '\n', '')
|
||||
+ forNonBlank(cells['0110'].value, v, '003S ' + '0' + v + '\n', '')
|
||||
+ forNonBlank(cells['2000'].value, v, forEach(v.split('␟'),x,'004A ' + '0' + x + '\n').join(''), '')
|
||||
+ forNonBlank(cells['2199'].value, v, forEach(v.split('␟'),x,'006Y ' + '0' + x + '\n').join(''), '')
|
||||
+ forNonBlank(cells['1500'].value, v, '010@ ' + forEach(v.split('␟'),x,'a' + x).join('') + '\n', '')
|
||||
+ forNonBlank(cells['1100a'].value, v, '011@ ' + 'a' + v + forNonBlank(cells['1100b'].value, v, 'b' + v, '') + forNonBlank(cells['1100n'].value, v, 'n' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['1131'].value, v, '013D ' + 'a' + v + '\n', '')
|
||||
+ forNonBlank(cells['1140'].value, v, '013H ' + 'a' + v + '\n', '')
|
||||
+ forNonBlank(cells['4000a'].value, v, '021A ' + 'a' + v + forNonBlank(cells['4000d'].value, v, 'd' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['4020a'].value, v, '032@ ' + 'a' + v + '\n', '')
|
||||
+ if(or(isNonBlank(cells['4030n'].value),isNonBlank(cells['4030p'].value)),'033A ' + forNonBlank(cells['4030n'].value, v, 'n' + v,'') + forNonBlank(cells['4030p'].value, v, 'p' + v, '') + '\n', '')
|
||||
+ forNonBlank(cells['4060a'].value, v, '034D ' + 'a' + v + '\n', '')
|
||||
+ forNonBlank(cells['0999'].value, v, '046W ' + 'a' + v + '\n', '')
|
||||
,'')
|
||||
}}{{
|
||||
if(isNonBlank(cells['E0XXb'].value),
|
||||
with(with(rowIndex - row.record.fromRowIndex + 1, i, '00'[0,2-i.length()] + i),exnr,
|
||||
'208@/' + exnr + ' a' + cells['E0XX'].value + 'b' + cells['E0XXb'].value + '\n'
|
||||
+ '209A/' + exnr + ' b4736' + 'j' + cells['7100j'].value + 'f' + cells['7100f'].value + forNonBlank(cells['7100a'].value, v, 'a' + v, '') + forNonBlank(cells['7100d'].value, v, 'd' + v, '') + 'x00' + '\n'
|
||||
+ forNonBlank(cells['8011'].value, v, '209B/' + exnr + ' a' + v + 'x11' + '\n', '')
|
||||
+ forNonBlank(cells['8100'].value, v, '209C/' + exnr + ' a' + v + 'x00' + '\n', '')
|
||||
+ forNonBlank(cells['8200'].value, v, '209G/' + exnr + ' a' + v + '\n', '')
|
||||
+ forNonBlank(cells['8600'].value, v, '209O/' + exnr + ' a' + v + 'x00' + '\n', '')
|
||||
+ forNonBlank(cells['8515'].value, v, '220B/' + exnr + ' a' + v + '\n', '')
|
||||
), '')
|
||||
}}
|
||||
TEMPLATE
|
||||
if echo "${template}" | head -c -2 | curl -fs \
|
||||
--data project="${projects[$p]}" \
|
||||
--data format="template" \
|
||||
--data prefix="" \
|
||||
--data suffix="" \
|
||||
--data separator="" \
|
||||
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||
--data-urlencode template@- \
|
||||
"${endpoint}/command/core/export-rows" \
|
||||
> "${workdir}/${p}.${format}"
|
||||
then
|
||||
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
|
||||
else
|
||||
error "export of ${p} (${projects[$p]}) failed!"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ================================== FINISH ================================== #
|
||||
|
||||
checkpoint "Finish"; echo
|
||||
|
||||
# stop OpenRefine server
|
||||
refine_stop; echo
|
||||
|
||||
# calculate run time based on checkpoints
|
||||
checkpoint_stats; echo
|
||||
|
||||
# word count on all files in workdir
|
||||
count_output
|
Loading…
Reference in New Issue