ba-sachsen-pica/Taskfile.yml

191 lines
5.7 KiB
YAML
Raw Normal View History

2020-08-01 02:04:39 +02:00
# https://taskfile.dev
version: '3'
output: 'group'
vars:
2020-08-01 11:48:36 +02:00
DATE: '{{ now | date "20060102_150405"}}'
2020-08-01 02:04:39 +02:00
env:
REFINE_MEMORY: 8g
REFINE_ENDPOINT: http://localhost:3334
tasks:
default:
2020-08-01 11:48:36 +02:00
desc: Generierung PICA+
2020-11-09 16:12:35 +01:00
deps: [bibliotheca, alephino]
2020-08-01 02:04:39 +02:00
cmds:
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
sources:
2020-11-09 16:12:35 +01:00
# - output/02-alephino-main/alephino.csv
2020-08-01 02:04:39 +02:00
- output/02-bibliotheca-main/bibliotheca.csv
generates:
- output/03-ba-sachsen/ba-sachsen.pic
- output/03-ba-sachsen/ba-sachsen.openrefine.tar.gz
2020-08-01 02:04:39 +02:00
env:
REFINE_WORKDIR: output/03-ba-sachsen
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
2020-11-09 16:12:35 +01:00
alephino:
desc: Alephino Hauptverarbeitung
cmds:
- task: leipzig
- task: riesa
- tasks/02-alephino-main.sh "output/01-alephino-pre"
sources:
- output/01-alephino-pre/*.tsv
generates:
# - output/02-alephino-main/alephino.csv
- output/02-alephino-main/alephino.openrefine.tar.gz
env:
REFINE_WORKDIR: output/02-alephino-main
REFINE_LOGFILE: log/02-alephino-main/{{.DATE}}.log
2020-08-01 11:48:36 +02:00
bibliotheca:
desc: Bibliotheca Hauptverarbeitung
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
cmds:
- task: bautzen
- task: breitenbrunn
- task: dresden
- task: glauchau
- task: plauen
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
sources:
- output/01-bibliotheca-pre/*.tsv
generates:
- output/02-bibliotheca-main/bibliotheca.csv
- output/02-bibliotheca-main/bibliotheca.openrefine.tar.gz
2020-08-01 11:48:36 +02:00
env:
REFINE_WORKDIR: output/02-bibliotheca-main
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
bautzen:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/bautzen.tsv
vars:
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
env:
2020-08-01 12:32:20 +02:00
REFINE_MEMORY: '{{.REFINE_MEMORY | default "6G"}}'
2020-08-01 11:48:36 +02:00
REFINE_ENDPOINT: http://localhost:3335
REFINE_WORKDIR: output/01-bibliotheca-pre
2020-08-13 15:21:04 +02:00
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_bautzen.log
2020-08-01 11:48:36 +02:00
breitenbrunn:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- '{{.INPUT}}'
generates:
- output/01-bibliotheca-pre/breitenbrunn.tsv
vars:
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
env:
2020-08-01 12:32:20 +02:00
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
2020-08-01 11:48:36 +02:00
REFINE_ENDPOINT: http://localhost:3335
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
dresden:
desc: Bibliotheca Vorverarbeitung
cmds:
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
sources:
- '{{.INPUT}}'
generates:
2020-08-12 17:54:27 +02:00
- output/01-bibliotheca-pre/dresden.tsv
2020-08-01 11:48:36 +02:00
vars:
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
env:
2020-08-01 12:32:20 +02:00
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
2020-08-01 11:48:36 +02:00
REFINE_ENDPOINT: http://localhost:3336
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
2020-11-09 16:12:35 +01:00
leipzig:
desc: Alephino Vorverarbeitung
cmds:
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
sources:
- '{{.TITEL}}'
- '{{.EXEMPLARE}}'
generates:
- output/01-alephino-pre/leipzig.tsv
vars:
TITEL: '{{.TITEL | default "input/leipzig-titel.txt"}}'
EXEMPLARE: '{{.EXEMPLARE | default "input/leipzig-exemplare.txt"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3339
REFINE_WORKDIR: output/01-alephino-pre
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_leipzig.log
2020-08-01 02:04:39 +02:00
glauchau:
2020-08-01 11:48:36 +02:00
desc: Bibliotheca Vorverarbeitung
2020-08-01 02:04:39 +02:00
cmds:
2020-08-01 11:48:36 +02:00
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
2020-08-01 02:04:39 +02:00
sources:
2020-08-01 11:48:36 +02:00
- '{{.INPUT}}'
2020-08-01 02:04:39 +02:00
generates:
- output/01-bibliotheca-pre/glauchau.tsv
2020-08-01 11:48:36 +02:00
vars:
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
2020-08-01 02:04:39 +02:00
env:
2020-08-01 11:48:36 +02:00
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
REFINE_ENDPOINT: http://localhost:3337
2020-08-01 02:04:39 +02:00
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
plauen:
2020-08-01 11:48:36 +02:00
desc: Bibliotheca Vorverarbeitung
2020-08-01 02:04:39 +02:00
cmds:
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
sources:
- input/plauen.imp
generates:
- output/01-bibliotheca-pre/plauen.tsv
env:
2020-08-01 11:48:36 +02:00
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
REFINE_ENDPOINT: http://localhost:3338
2020-08-01 02:04:39 +02:00
REFINE_WORKDIR: output/01-bibliotheca-pre
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
2020-11-09 16:12:35 +01:00
riesa:
desc: Alephino Vorverarbeitung
cmds:
- tasks/01-alephino-pre.sh "{{.TITEL}}" "{{.EXEMPLARE}}"
sources:
- '{{.TITEL}}'
- '{{.EXEMPLARE}}'
generates:
- output/01-alephino-pre/riesa.tsv
vars:
TITEL: '{{.TITEL | default "input/riesa-titel.txt"}}'
EXEMPLARE: '{{.EXEMPLARE | default "input/riesa-exemplare.txt"}}'
env:
REFINE_MEMORY: '{{.REFINE_MEMORY | default "7G"}}'
REFINE_ENDPOINT: http://localhost:3339
REFINE_WORKDIR: output/01-alephino-pre
REFINE_LOGFILE: log/01-alephino-pre/{{.DATE}}_riesa.log
2020-08-01 11:48:36 +02:00
clean:
desc: Alle Daten löschen (reset auf Ausgangszustand)
2020-08-01 02:04:39 +02:00
cmds:
2020-08-01 11:48:36 +02:00
- rm -r lib log output
2020-08-01 02:04:39 +02:00
mkdir:
desc: Ordner erstellen
cmds:
2020-11-09 16:12:35 +01:00
- mkdir -p output/01-alephino-pre log/01-alephino-pre
2020-08-01 02:04:39 +02:00
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
2020-11-09 16:12:35 +01:00
- mkdir -p output/02-alephino-main log/02-alephino-main
2020-08-01 02:04:39 +02:00
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen