Workflow optimiert
This commit is contained in:
parent
8005060d10
commit
583ad56582
|
@ -10,5 +10,7 @@
|
|||
* input/plauen.imp
|
||||
* input/riesa-exemplare.txt
|
||||
* input/riesa-titel.txt
|
||||
2. Datenverarbeitung: `./main.sh`
|
||||
3. Ergebnisse prüfen: `wc -l output/*/*.tsv`
|
||||
2. Installation und initiale Datenverarbeitung: `./main.sh`
|
||||
3. Weitere Datenverarbeitungen:
|
||||
* `lib/task` für gesamten Workflow
|
||||
* `lib/task --list` für eine Liste der verfügbaren Tasks
|
||||
|
|
137
Taskfile.yml
137
Taskfile.yml
|
@ -5,8 +5,7 @@ version: '3'
|
|||
output: 'group'
|
||||
|
||||
vars:
|
||||
DATE:
|
||||
sh: date +%Y%m%d_%H%M%S
|
||||
DATE: '{{ now | date "20060102_150405"}}'
|
||||
|
||||
env:
|
||||
REFINE_MEMORY: 8g
|
||||
|
@ -14,8 +13,8 @@ env:
|
|||
|
||||
tasks:
|
||||
default:
|
||||
desc: Workflow
|
||||
deps: [bibliotheca, mkdir]
|
||||
desc: Generierung PICA+
|
||||
deps: [bibliotheca]
|
||||
cmds:
|
||||
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
|
||||
sources:
|
||||
|
@ -26,41 +25,15 @@ tasks:
|
|||
REFINE_WORKDIR: output/03-ba-sachsen
|
||||
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
|
||||
|
||||
glauchau:
|
||||
desc: Glauchau
|
||||
deps: [mkdir]
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "input/glauchau.imp"
|
||||
sources:
|
||||
- input/glauchau.imp
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/glauchau.tsv
|
||||
env:
|
||||
REFINE_MEMORY: 6G
|
||||
REFINE_ENDPOINT: http://localhost:3334
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
|
||||
|
||||
plauen:
|
||||
desc: Plauen
|
||||
deps: [mkdir]
|
||||
cmds:
|
||||
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
||||
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
|
||||
sources:
|
||||
- input/plauen.imp
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/plauen.tsv
|
||||
env:
|
||||
REFINE_MEMORY: 4G
|
||||
REFINE_ENDPOINT: http://localhost:3335
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
|
||||
|
||||
bibliotheca:
|
||||
desc: Hauptverarbeitung
|
||||
deps: [glauchau, plauen, mkdir]
|
||||
desc: Bibliotheca Hauptverarbeitung
|
||||
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
|
||||
cmds:
|
||||
- task: bautzen
|
||||
- task: breitenbrunn
|
||||
- task: dresden
|
||||
- task: glauchau
|
||||
- task: plauen
|
||||
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
|
||||
sources:
|
||||
- output/01-bibliotheca-pre/*.tsv
|
||||
|
@ -70,16 +43,92 @@ tasks:
|
|||
REFINE_WORKDIR: output/02-bibliotheca-main
|
||||
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
|
||||
|
||||
bautzen:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/bautzen.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "8G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3335
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}}_bautzen.log
|
||||
|
||||
breitenbrunn:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/breitenbrunn.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "8G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3335
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
|
||||
|
||||
dresden:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/dresdeb.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "8G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3336
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
|
||||
|
||||
glauchau:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||
sources:
|
||||
- '{{.INPUT}}'
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/glauchau.tsv
|
||||
vars:
|
||||
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3337
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
|
||||
|
||||
plauen:
|
||||
desc: Bibliotheca Vorverarbeitung
|
||||
cmds:
|
||||
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
|
||||
sources:
|
||||
- input/plauen.imp
|
||||
generates:
|
||||
- output/01-bibliotheca-pre/plauen.tsv
|
||||
env:
|
||||
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
|
||||
REFINE_ENDPOINT: http://localhost:3338
|
||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
|
||||
|
||||
clean:
|
||||
desc: Alle Daten löschen (reset auf Ausgangszustand)
|
||||
cmds:
|
||||
- rm -r lib log output
|
||||
|
||||
mkdir:
|
||||
desc: Ordner erstellen
|
||||
cmds:
|
||||
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
||||
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
|
||||
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
|
||||
status:
|
||||
- test -d output/01-bibliotheca-pre
|
||||
- test -d log/01-bibliotheca-pre
|
||||
- test -d output/02-bibliotheca-main
|
||||
- test -d log/02-bibliotheca-main
|
||||
- test -d output/03-ba-sachsen
|
||||
- test -d log/03-ba-sachsen
|
||||
|
|
12
main.sh
12
main.sh
|
@ -1,7 +1,11 @@
|
|||
#!/bin/bash
|
||||
# Scripte zur Transformation von Bibliotheca und Alephino nach PICA+
|
||||
|
||||
# download task if necessary
|
||||
# check and install requirements for bash-refine
|
||||
source "${BASH_SOURCE%/*}/bash-refine.sh" || exit 1
|
||||
requirements
|
||||
|
||||
# download task runner
|
||||
task="$(readlink -m "${BASH_SOURCE%/*}/lib/task")"
|
||||
if [[ -z "$(readlink -e "${task}")" ]]; then
|
||||
echo "Download task..."
|
||||
|
@ -12,5 +16,11 @@ if [[ -z "$(readlink -e "${task}")" ]]; then
|
|||
rm -f task.tar.gz
|
||||
fi
|
||||
|
||||
# make script executable from another directory
|
||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||
|
||||
# create folders
|
||||
"${task}" mkdir
|
||||
|
||||
# execute default task (cf. Taskfile.yml)
|
||||
"${task}"
|
||||
|
|
|
@ -17,9 +17,6 @@ else
|
|||
echo 1>&2 "Please provide path to input file"; exit 1
|
||||
fi
|
||||
|
||||
# make script executable from another directory
|
||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||
|
||||
# check requirements, set trap, create workdir and tee to logfile
|
||||
init
|
||||
|
||||
|
@ -27,6 +24,9 @@ init
|
|||
|
||||
checkpoint "Startup"; echo
|
||||
|
||||
# print environment variables
|
||||
printenv | grep REFINE; echo
|
||||
|
||||
# start OpenRefine server
|
||||
refine_start; echo
|
||||
|
||||
|
|
|
@ -16,9 +16,6 @@ else
|
|||
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
|
||||
fi
|
||||
|
||||
# make script executable from another directory
|
||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||
|
||||
# check requirements, set trap, create workdir and tee to logfile
|
||||
init
|
||||
|
||||
|
|
|
@ -18,9 +18,6 @@ if [[ $2 ]]; then
|
|||
inputdir2="$(readlink -e "$2")"
|
||||
fi
|
||||
|
||||
# make script executable from another directory
|
||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||
|
||||
# check requirements, set trap, create workdir and tee to logfile
|
||||
init
|
||||
|
||||
|
|
Loading…
Reference in New Issue