2020-08-01 02:04:39 +02:00
|
|
|
# https://taskfile.dev
|
|
|
|
|
|
|
|
version: '3'
|
|
|
|
|
|
|
|
output: 'group'
|
|
|
|
|
|
|
|
vars:
|
2020-08-01 11:48:36 +02:00
|
|
|
DATE: '{{ now | date "20060102_150405"}}'
|
2020-08-01 02:04:39 +02:00
|
|
|
|
|
|
|
env:
|
|
|
|
REFINE_MEMORY: 8g
|
|
|
|
REFINE_ENDPOINT: http://localhost:3334
|
|
|
|
|
|
|
|
tasks:
|
|
|
|
default:
|
2020-08-01 11:48:36 +02:00
|
|
|
desc: Generierung PICA+
|
2020-11-09 16:12:35 +01:00
|
|
|
deps: [bibliotheca, alephino]
|
2020-08-01 02:04:39 +02:00
|
|
|
cmds:
|
|
|
|
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/03-ba-sachsen.sh
|
2020-11-09 16:12:35 +01:00
|
|
|
# - output/02-alephino-main/alephino.csv
|
2020-08-01 02:04:39 +02:00
|
|
|
- output/02-bibliotheca-main/bibliotheca.csv
|
|
|
|
generates:
|
|
|
|
- output/03-ba-sachsen/ba-sachsen.pic
|
2020-08-18 14:44:34 +02:00
|
|
|
- output/03-ba-sachsen/ba-sachsen.openrefine.tar.gz
|
2020-08-01 02:04:39 +02:00
|
|
|
env:
|
|
|
|
REFINE_WORKDIR: output/03-ba-sachsen
|
|
|
|
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
|
|
|
|
|
2020-11-09 16:12:35 +01:00
|
|
|
alephino:
|
|
|
|
desc: Alephino Hauptverarbeitung
|
|
|
|
cmds:
|
|
|
|
- task: leipzig
|
|
|
|
- task: riesa
|
|
|
|
- tasks/02-alephino-main.sh "output/01-alephino-pre"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/02-alephino-main.sh
|
2020-11-09 16:12:35 +01:00
|
|
|
- output/01-alephino-pre/*.tsv
|
|
|
|
generates:
|
|
|
|
# - output/02-alephino-main/alephino.csv
|
|
|
|
- output/02-alephino-main/alephino.openrefine.tar.gz
|
|
|
|
env:
|
2020-12-11 12:50:32 +01:00
|
|
|
REFINE_ENDPOINT: http://localhost:3334
|
2020-11-09 16:12:35 +01:00
|
|
|
REFINE_WORKDIR: output/02-alephino-main
|
|
|
|
REFINE_LOGFILE: log/02-alephino-main/{{.DATE}}.log
|
|
|
|
|
2020-08-01 11:48:36 +02:00
|
|
|
bibliotheca:
|
|
|
|
desc: Bibliotheca Hauptverarbeitung
|
|
|
|
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
|
|
|
|
cmds:
|
|
|
|
- task: bautzen
|
|
|
|
- task: breitenbrunn
|
|
|
|
- task: dresden
|
|
|
|
- task: glauchau
|
2020-12-10 17:17:54 +01:00
|
|
|
# - task: plauen
|
2020-08-01 11:48:36 +02:00
|
|
|
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/02-bibliotheca-main.sh
|
2020-08-01 11:48:36 +02:00
|
|
|
- output/01-bibliotheca-pre/*.tsv
|
|
|
|
generates:
|
|
|
|
- output/02-bibliotheca-main/bibliotheca.csv
|
2020-08-12 17:01:26 +02:00
|
|
|
- output/02-bibliotheca-main/bibliotheca.openrefine.tar.gz
|
2020-08-01 11:48:36 +02:00
|
|
|
env:
|
2020-12-11 12:50:32 +01:00
|
|
|
REFINE_ENDPOINT: http://localhost:3335
|
2020-08-01 11:48:36 +02:00
|
|
|
REFINE_WORKDIR: output/02-bibliotheca-main
|
|
|
|
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
|
|
|
|
|
|
|
|
bautzen:
|
|
|
|
desc: Bibliotheca Vorverarbeitung
|
|
|
|
cmds:
|
|
|
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/01-bibliotheca-pre.sh
|
2020-08-01 11:48:36 +02:00
|
|
|
- '{{.INPUT}}'
|
|
|
|
generates:
|
|
|
|
- output/01-bibliotheca-pre/bautzen.tsv
|
|
|
|
vars:
|
|
|
|
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
|
|
|
|
env:
|
2020-08-01 12:32:20 +02:00
|
|
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "6G"}}'
|
2020-12-10 17:22:02 +01:00
|
|
|
REFINE_ENDPOINT: http://localhost:3334
|
2020-08-01 11:48:36 +02:00
|
|
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
2020-08-13 15:21:04 +02:00
|
|
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_bautzen.log
|
2020-08-01 11:48:36 +02:00
|
|
|
|
|
|
|
breitenbrunn:
|
|
|
|
desc: Bibliotheca Vorverarbeitung
|
|
|
|
cmds:
|
|
|
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/01-bibliotheca-pre.sh
|
2020-08-01 11:48:36 +02:00
|
|
|
- '{{.INPUT}}'
|
|
|
|
generates:
|
|
|
|
- output/01-bibliotheca-pre/breitenbrunn.tsv
|
|
|
|
vars:
|
|
|
|
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
|
|
|
|
env:
|
2020-08-01 12:32:20 +02:00
|
|
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
2020-08-01 11:48:36 +02:00
|
|
|
REFINE_ENDPOINT: http://localhost:3335
|
|
|
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
|
|
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
|
|
|
|
|
|
|
|
dresden:
|
|
|
|
desc: Bibliotheca Vorverarbeitung
|
|
|
|
cmds:
|
|
|
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/01-bibliotheca-pre.sh
|
2020-08-01 11:48:36 +02:00
|
|
|
- '{{.INPUT}}'
|
|
|
|
generates:
|
2020-08-12 17:54:27 +02:00
|
|
|
- output/01-bibliotheca-pre/dresden.tsv
|
2020-08-01 11:48:36 +02:00
|
|
|
vars:
|
|
|
|
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
|
|
|
|
env:
|
2020-08-01 12:32:20 +02:00
|
|
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
2020-08-01 11:48:36 +02:00
|
|
|
REFINE_ENDPOINT: http://localhost:3336
|
|
|
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
|
|
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
|
|
|
|
|
2020-11-09 16:12:35 +01:00
|
|
|
leipzig:
|
|
|
|
desc: Alephino Vorverarbeitung
|
|
|
|
cmds:
|
|
|
|
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/01-alephino-pre.sh
|
2020-11-09 16:12:35 +01:00
|
|
|
- '{{.TITEL}}'
|
|
|
|
- '{{.EXEMPLARE}}'
|
|
|
|
generates:
|
|
|
|
- output/01-alephino-pre/leipzig.tsv
|
|
|
|
vars:
|
|
|
|
TITEL: '{{.TITEL | default "input/leipzig-titel.txt"}}'
|
|
|
|
EXEMPLARE: '{{.EXEMPLARE | default "input/leipzig-exemplare.txt"}}'
|
|
|
|
env:
|
|
|
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
2020-12-10 17:22:02 +01:00
|
|
|
REFINE_ENDPOINT: http://localhost:3337
|
2020-11-09 16:12:35 +01:00
|
|
|
REFINE_WORKDIR: output/01-alephino-pre
|
|
|
|
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_leipzig.log
|
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
glauchau:
|
2020-08-01 11:48:36 +02:00
|
|
|
desc: Bibliotheca Vorverarbeitung
|
2020-08-01 02:04:39 +02:00
|
|
|
cmds:
|
2020-08-01 11:48:36 +02:00
|
|
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
2020-08-01 02:04:39 +02:00
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/01-bibliotheca-pre.sh
|
2020-08-01 11:48:36 +02:00
|
|
|
- '{{.INPUT}}'
|
2020-08-01 02:04:39 +02:00
|
|
|
generates:
|
|
|
|
- output/01-bibliotheca-pre/glauchau.tsv
|
2020-08-01 11:48:36 +02:00
|
|
|
vars:
|
|
|
|
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
|
2020-08-01 02:04:39 +02:00
|
|
|
env:
|
2020-08-01 11:48:36 +02:00
|
|
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
2020-12-10 17:22:02 +01:00
|
|
|
REFINE_ENDPOINT: http://localhost:3338
|
2020-08-01 02:04:39 +02:00
|
|
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
|
|
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
|
|
|
|
|
|
|
|
plauen:
|
2020-08-01 11:48:36 +02:00
|
|
|
desc: Bibliotheca Vorverarbeitung
|
2020-08-01 02:04:39 +02:00
|
|
|
cmds:
|
|
|
|
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/01-bibliotheca-pre.sh
|
2020-08-01 02:04:39 +02:00
|
|
|
- input/plauen.imp
|
|
|
|
generates:
|
|
|
|
- output/01-bibliotheca-pre/plauen.tsv
|
|
|
|
env:
|
2020-08-01 11:48:36 +02:00
|
|
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
|
2020-12-10 17:22:02 +01:00
|
|
|
REFINE_ENDPOINT: http://localhost:3339
|
2020-08-01 02:04:39 +02:00
|
|
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
|
|
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
|
|
|
|
|
2020-11-09 16:12:35 +01:00
|
|
|
riesa:
|
|
|
|
desc: Alephino Vorverarbeitung
|
|
|
|
cmds:
|
|
|
|
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
|
|
|
|
sources:
|
2020-12-12 18:11:39 +01:00
|
|
|
- tasks/01-alephino-pre.sh
|
2020-11-09 16:12:35 +01:00
|
|
|
- '{{.TITEL}}'
|
|
|
|
- '{{.EXEMPLARE}}'
|
|
|
|
generates:
|
|
|
|
- output/01-alephino-pre/riesa.tsv
|
|
|
|
vars:
|
|
|
|
TITEL: '{{.TITEL | default "input/riesa-titel.txt"}}'
|
|
|
|
EXEMPLARE: '{{.EXEMPLARE | default "input/riesa-exemplare.txt"}}'
|
|
|
|
env:
|
|
|
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
|
2020-12-10 17:22:02 +01:00
|
|
|
REFINE_ENDPOINT: http://localhost:3340
|
2020-11-09 16:12:35 +01:00
|
|
|
REFINE_WORKDIR: output/01-alephino-pre
|
|
|
|
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_riesa.log
|
|
|
|
|
2020-08-01 11:48:36 +02:00
|
|
|
clean:
|
|
|
|
desc: Alle Daten löschen (reset auf Ausgangszustand)
|
2020-08-01 02:04:39 +02:00
|
|
|
cmds:
|
2020-08-01 11:48:36 +02:00
|
|
|
- rm -r lib log output
|
2020-08-01 02:04:39 +02:00
|
|
|
|
|
|
|
mkdir:
|
|
|
|
desc: Ordner erstellen
|
|
|
|
cmds:
|
2020-11-09 16:12:35 +01:00
|
|
|
- mkdir -p output/01-alephino-pre log/01-alephino-pre
|
2020-08-01 02:04:39 +02:00
|
|
|
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
2020-11-09 16:12:35 +01:00
|
|
|
- mkdir -p output/02-alephino-main log/02-alephino-main
|
2020-08-01 02:04:39 +02:00
|
|
|
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
|
|
|
|
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
|