Workflow optimiert
This commit is contained in:
parent
8005060d10
commit
583ad56582
|
@ -10,5 +10,7 @@
|
||||||
* input/plauen.imp
|
* input/plauen.imp
|
||||||
* input/riesa-exemplare.txt
|
* input/riesa-exemplare.txt
|
||||||
* input/riesa-titel.txt
|
* input/riesa-titel.txt
|
||||||
2. Datenverarbeitung: `./main.sh`
|
2. Installation und initiale Datenverarbeitung: `./main.sh`
|
||||||
3. Ergebnisse prüfen: `wc -l output/*/*.tsv`
|
3. Weitere Datenverarbeitungen:
|
||||||
|
* `lib/task` für gesamten Workflow
|
||||||
|
* `lib/task --list` für eine Liste der verfügbaren Tasks
|
||||||
|
|
137
Taskfile.yml
137
Taskfile.yml
|
@ -5,8 +5,7 @@ version: '3'
|
||||||
output: 'group'
|
output: 'group'
|
||||||
|
|
||||||
vars:
|
vars:
|
||||||
DATE:
|
DATE: '{{ now | date "20060102_150405"}}'
|
||||||
sh: date +%Y%m%d_%H%M%S
|
|
||||||
|
|
||||||
env:
|
env:
|
||||||
REFINE_MEMORY: 8g
|
REFINE_MEMORY: 8g
|
||||||
|
@ -14,8 +13,8 @@ env:
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
default:
|
default:
|
||||||
desc: Workflow
|
desc: Generierung PICA+
|
||||||
deps: [bibliotheca, mkdir]
|
deps: [bibliotheca]
|
||||||
cmds:
|
cmds:
|
||||||
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
|
- tasks/03-ba-sachsen.sh "output/02-bibliotheca-main"
|
||||||
sources:
|
sources:
|
||||||
|
@ -26,41 +25,15 @@ tasks:
|
||||||
REFINE_WORKDIR: output/03-ba-sachsen
|
REFINE_WORKDIR: output/03-ba-sachsen
|
||||||
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
|
REFINE_LOGFILE: log/03-ba-sachsen/{{.DATE}}.log
|
||||||
|
|
||||||
glauchau:
|
|
||||||
desc: Glauchau
|
|
||||||
deps: [mkdir]
|
|
||||||
cmds:
|
|
||||||
- tasks/01-bibliotheca-pre.sh "input/glauchau.imp"
|
|
||||||
sources:
|
|
||||||
- input/glauchau.imp
|
|
||||||
generates:
|
|
||||||
- output/01-bibliotheca-pre/glauchau.tsv
|
|
||||||
env:
|
|
||||||
REFINE_MEMORY: 6G
|
|
||||||
REFINE_ENDPOINT: http://localhost:3334
|
|
||||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
|
||||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
|
|
||||||
|
|
||||||
plauen:
|
|
||||||
desc: Plauen
|
|
||||||
deps: [mkdir]
|
|
||||||
cmds:
|
|
||||||
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
|
||||||
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
|
|
||||||
sources:
|
|
||||||
- input/plauen.imp
|
|
||||||
generates:
|
|
||||||
- output/01-bibliotheca-pre/plauen.tsv
|
|
||||||
env:
|
|
||||||
REFINE_MEMORY: 4G
|
|
||||||
REFINE_ENDPOINT: http://localhost:3335
|
|
||||||
REFINE_WORKDIR: output/01-bibliotheca-pre
|
|
||||||
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
|
|
||||||
|
|
||||||
bibliotheca:
|
bibliotheca:
|
||||||
desc: Hauptverarbeitung
|
desc: Bibliotheca Hauptverarbeitung
|
||||||
deps: [glauchau, plauen, mkdir]
|
# deps: [bautzen, breitenbrunn, dresden, glauchau, plauen]
|
||||||
cmds:
|
cmds:
|
||||||
|
- task: bautzen
|
||||||
|
- task: breitenbrunn
|
||||||
|
- task: dresden
|
||||||
|
- task: glauchau
|
||||||
|
- task: plauen
|
||||||
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
|
- tasks/02-bibliotheca-main.sh "output/01-bibliotheca-pre"
|
||||||
sources:
|
sources:
|
||||||
- output/01-bibliotheca-pre/*.tsv
|
- output/01-bibliotheca-pre/*.tsv
|
||||||
|
@ -70,16 +43,92 @@ tasks:
|
||||||
REFINE_WORKDIR: output/02-bibliotheca-main
|
REFINE_WORKDIR: output/02-bibliotheca-main
|
||||||
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
|
REFINE_LOGFILE: log/02-bibliotheca-main/{{.DATE}}.log
|
||||||
|
|
||||||
|
bautzen:
|
||||||
|
desc: Bibliotheca Vorverarbeitung
|
||||||
|
cmds:
|
||||||
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||||
|
sources:
|
||||||
|
- '{{.INPUT}}'
|
||||||
|
generates:
|
||||||
|
- output/01-bibliotheca-pre/bautzen.tsv
|
||||||
|
vars:
|
||||||
|
INPUT: '{{.INPUT | default "input/bautzen.imp"}}'
|
||||||
|
env:
|
||||||
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "8G"}}'
|
||||||
|
REFINE_ENDPOINT: http://localhost:3335
|
||||||
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||||
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}}_bautzen.log
|
||||||
|
|
||||||
|
breitenbrunn:
|
||||||
|
desc: Bibliotheca Vorverarbeitung
|
||||||
|
cmds:
|
||||||
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||||
|
sources:
|
||||||
|
- '{{.INPUT}}'
|
||||||
|
generates:
|
||||||
|
- output/01-bibliotheca-pre/breitenbrunn.tsv
|
||||||
|
vars:
|
||||||
|
INPUT: '{{.INPUT | default "input/breitenbrunn.imp"}}'
|
||||||
|
env:
|
||||||
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "8G"}}'
|
||||||
|
REFINE_ENDPOINT: http://localhost:3335
|
||||||
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||||
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_breitenbrunn.log
|
||||||
|
|
||||||
|
dresden:
|
||||||
|
desc: Bibliotheca Vorverarbeitung
|
||||||
|
cmds:
|
||||||
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||||
|
sources:
|
||||||
|
- '{{.INPUT}}'
|
||||||
|
generates:
|
||||||
|
- output/01-bibliotheca-pre/dresdeb.tsv
|
||||||
|
vars:
|
||||||
|
INPUT: '{{.INPUT | default "input/dresden.imp"}}'
|
||||||
|
env:
|
||||||
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "8G"}}'
|
||||||
|
REFINE_ENDPOINT: http://localhost:3336
|
||||||
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||||
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_dresden.log
|
||||||
|
|
||||||
|
glauchau:
|
||||||
|
desc: Bibliotheca Vorverarbeitung
|
||||||
|
cmds:
|
||||||
|
- tasks/01-bibliotheca-pre.sh "{{.INPUT}}"
|
||||||
|
sources:
|
||||||
|
- '{{.INPUT}}'
|
||||||
|
generates:
|
||||||
|
- output/01-bibliotheca-pre/glauchau.tsv
|
||||||
|
vars:
|
||||||
|
INPUT: '{{.INPUT | default "input/glauchau.imp"}}'
|
||||||
|
env:
|
||||||
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "4G"}}'
|
||||||
|
REFINE_ENDPOINT: http://localhost:3337
|
||||||
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||||
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_glauchau.log
|
||||||
|
|
||||||
|
plauen:
|
||||||
|
desc: Bibliotheca Vorverarbeitung
|
||||||
|
cmds:
|
||||||
|
- tasks/01-bibliotheca-pre.sh "input/plauen.imp"
|
||||||
|
sources:
|
||||||
|
- input/plauen.imp
|
||||||
|
generates:
|
||||||
|
- output/01-bibliotheca-pre/plauen.tsv
|
||||||
|
env:
|
||||||
|
REFINE_MEMORY: '{{.REFINE_MEMORY | default "2G"}}'
|
||||||
|
REFINE_ENDPOINT: http://localhost:3338
|
||||||
|
REFINE_WORKDIR: output/01-bibliotheca-pre
|
||||||
|
REFINE_LOGFILE: log/01-bibliotheca-pre/{{.DATE}}_plauen.log
|
||||||
|
|
||||||
|
clean:
|
||||||
|
desc: Alle Daten löschen (reset auf Ausgangszustand)
|
||||||
|
cmds:
|
||||||
|
- rm -r lib log output
|
||||||
|
|
||||||
mkdir:
|
mkdir:
|
||||||
desc: Ordner erstellen
|
desc: Ordner erstellen
|
||||||
cmds:
|
cmds:
|
||||||
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
- mkdir -p output/01-bibliotheca-pre log/01-bibliotheca-pre
|
||||||
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
|
- mkdir -p output/02-bibliotheca-main log/02-bibliotheca-main
|
||||||
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
|
- mkdir -p output/03-ba-sachsen log/03-ba-sachsen
|
||||||
status:
|
|
||||||
- test -d output/01-bibliotheca-pre
|
|
||||||
- test -d log/01-bibliotheca-pre
|
|
||||||
- test -d output/02-bibliotheca-main
|
|
||||||
- test -d log/02-bibliotheca-main
|
|
||||||
- test -d output/03-ba-sachsen
|
|
||||||
- test -d log/03-ba-sachsen
|
|
||||||
|
|
12
main.sh
12
main.sh
|
@ -1,7 +1,11 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# Scripte zur Transformation von Bibliotheca und Alephino nach PICA+
|
# Scripte zur Transformation von Bibliotheca und Alephino nach PICA+
|
||||||
|
|
||||||
# download task if necessary
|
# check and install requirements for bash-refine
|
||||||
|
source "${BASH_SOURCE%/*}/bash-refine.sh" || exit 1
|
||||||
|
requirements
|
||||||
|
|
||||||
|
# download task runner
|
||||||
task="$(readlink -m "${BASH_SOURCE%/*}/lib/task")"
|
task="$(readlink -m "${BASH_SOURCE%/*}/lib/task")"
|
||||||
if [[ -z "$(readlink -e "${task}")" ]]; then
|
if [[ -z "$(readlink -e "${task}")" ]]; then
|
||||||
echo "Download task..."
|
echo "Download task..."
|
||||||
|
@ -12,5 +16,11 @@ if [[ -z "$(readlink -e "${task}")" ]]; then
|
||||||
rm -f task.tar.gz
|
rm -f task.tar.gz
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# make script executable from another directory
|
||||||
|
cd "${BASH_SOURCE%/*}/" || exit 1
|
||||||
|
|
||||||
|
# create folders
|
||||||
|
"${task}" mkdir
|
||||||
|
|
||||||
# execute default task (cf. Taskfile.yml)
|
# execute default task (cf. Taskfile.yml)
|
||||||
"${task}"
|
"${task}"
|
||||||
|
|
|
@ -17,9 +17,6 @@ else
|
||||||
echo 1>&2 "Please provide path to input file"; exit 1
|
echo 1>&2 "Please provide path to input file"; exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# make script executable from another directory
|
|
||||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
|
||||||
|
|
||||||
# check requirements, set trap, create workdir and tee to logfile
|
# check requirements, set trap, create workdir and tee to logfile
|
||||||
init
|
init
|
||||||
|
|
||||||
|
@ -27,6 +24,9 @@ init
|
||||||
|
|
||||||
checkpoint "Startup"; echo
|
checkpoint "Startup"; echo
|
||||||
|
|
||||||
|
# print environment variables
|
||||||
|
printenv | grep REFINE; echo
|
||||||
|
|
||||||
# start OpenRefine server
|
# start OpenRefine server
|
||||||
refine_start; echo
|
refine_start; echo
|
||||||
|
|
||||||
|
|
|
@ -16,9 +16,6 @@ else
|
||||||
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
|
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# make script executable from another directory
|
|
||||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
|
||||||
|
|
||||||
# check requirements, set trap, create workdir and tee to logfile
|
# check requirements, set trap, create workdir and tee to logfile
|
||||||
init
|
init
|
||||||
|
|
||||||
|
|
|
@ -18,9 +18,6 @@ if [[ $2 ]]; then
|
||||||
inputdir2="$(readlink -e "$2")"
|
inputdir2="$(readlink -e "$2")"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# make script executable from another directory
|
|
||||||
cd "${BASH_SOURCE%/*}/" || exit 1
|
|
||||||
|
|
||||||
# check requirements, set trap, create workdir and tee to logfile
|
# check requirements, set trap, create workdir and tee to logfile
|
||||||
init
|
init
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue