Statusprüfungen in Taskfiles der Datenquelle #9
This commit is contained in:
parent
278bb59ace
commit
1286c8177b
20
Taskfile.yml
20
Taskfile.yml
|
@ -57,10 +57,6 @@ tasks:
|
||||||
- rm -f *.xml
|
- rm -f *.xml
|
||||||
# Identifier als Dateinamen
|
# Identifier als Dateinamen
|
||||||
- for f in xx*; do mv "$f" "$(xmllint --xpath "//*[local-name(.) = 'recordIdentifier']/text()" "$f").xml"; done
|
- for f in xx*; do mv "$f" "$(xmllint --xpath "//*[local-name(.) = 'recordIdentifier']/text()" "$f").xml"; done
|
||||||
sources:
|
|
||||||
- ../refine/{{.PROJECT}}.txt
|
|
||||||
generates:
|
|
||||||
- ./*.xml
|
|
||||||
preconditions:
|
preconditions:
|
||||||
- test -n "{{.PROJECT}}"
|
- test -n "{{.PROJECT}}"
|
||||||
|
|
||||||
|
@ -70,10 +66,6 @@ tasks:
|
||||||
# Validierung gegen METS Schema
|
# Validierung gegen METS Schema
|
||||||
- wget -q -nc https://www.loc.gov/standards/mets/mets.xsd
|
- wget -q -nc https://www.loc.gov/standards/mets/mets.xsd
|
||||||
- xmllint --schema mets.xsd --noout split/*.xml > validate.log 2>&1
|
- xmllint --schema mets.xsd --noout split/*.xml > validate.log 2>&1
|
||||||
sources:
|
|
||||||
- split/*.xml
|
|
||||||
generates:
|
|
||||||
- validate.log
|
|
||||||
preconditions:
|
preconditions:
|
||||||
- test -n "{{.PROJECT}}"
|
- test -n "{{.PROJECT}}"
|
||||||
|
|
||||||
|
@ -82,10 +74,6 @@ tasks:
|
||||||
cmds:
|
cmds:
|
||||||
# ZIP-Archiv mit Zeitstempel erstellen
|
# ZIP-Archiv mit Zeitstempel erstellen
|
||||||
- zip -q -FS -j {{.PROJECT}}_{{.DATE}}.zip split/*.xml
|
- zip -q -FS -j {{.PROJECT}}_{{.DATE}}.zip split/*.xml
|
||||||
sources:
|
|
||||||
- split/*.xml
|
|
||||||
generates:
|
|
||||||
- "{{.PROJECT}}_{{.DATE}}.zip"
|
|
||||||
preconditions:
|
preconditions:
|
||||||
- test -n "{{.PROJECT}}"
|
- test -n "{{.PROJECT}}"
|
||||||
|
|
||||||
|
@ -99,10 +87,6 @@ tasks:
|
||||||
- rm -rf old new
|
- rm -rf old new
|
||||||
# Diff prüfen, ob es weniger als 500 Zeilen enthält
|
# Diff prüfen, ob es weniger als 500 Zeilen enthält
|
||||||
- if (( 500 < $(wc -l <diff.log) )); then echo 1>&2 "Unerwartet große Änderungen in $PWD/diff.log!" && exit 1; fi
|
- if (( 500 < $(wc -l <diff.log) )); then echo 1>&2 "Unerwartet große Änderungen in $PWD/diff.log!" && exit 1; fi
|
||||||
sources:
|
|
||||||
- split/*.xml
|
|
||||||
generates:
|
|
||||||
- diff.log
|
|
||||||
status:
|
status:
|
||||||
# Task nicht ausführen, wenn weniger als zwei ZIP-Archive vorhanden
|
# Task nicht ausführen, wenn weniger als zwei ZIP-Archive vorhanden
|
||||||
- test -z $(ls -t *.zip | sed -n 2p)
|
- test -z $(ls -t *.zip | sed -n 2p)
|
||||||
|
@ -119,10 +103,6 @@ tasks:
|
||||||
- rm -rf links.txt
|
- rm -rf links.txt
|
||||||
# Logdatei auf status code != 2XX prüfen
|
# Logdatei auf status code != 2XX prüfen
|
||||||
- if grep '^[^2]' linkcheck.log; then echo 1>&2 "Logdatei $PWD/linkcheck.log enthält problematische status codes!" && exit 1; fi
|
- if grep '^[^2]' linkcheck.log; then echo 1>&2 "Logdatei $PWD/linkcheck.log enthält problematische status codes!" && exit 1; fi
|
||||||
sources:
|
|
||||||
- split/*.xml
|
|
||||||
generates:
|
|
||||||
- linkcheck.log
|
|
||||||
preconditions:
|
preconditions:
|
||||||
- test -n "{{.PROJECT}}"
|
- test -n "{{.PROJECT}}"
|
||||||
|
|
||||||
|
|
|
@ -3,22 +3,16 @@
|
||||||
version: '3'
|
version: '3'
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
# Tasks mit ":" sind für alle Datenquellen gleich in Taskfile.yml definiert
|
|
||||||
default:
|
default:
|
||||||
desc: OPUS Siegen
|
desc: OPUS Siegen
|
||||||
deps: [harvest]
|
deps: [harvest]
|
||||||
cmds:
|
cmds:
|
||||||
- task: refine
|
- task: refine
|
||||||
- task: :check
|
- task: check
|
||||||
vars: {PROJECT: "siegen", MINIMUM: "1250"}
|
- task: split
|
||||||
- task: :split
|
- task: validate
|
||||||
vars: {PROJECT: "siegen"}
|
- task: zip
|
||||||
- task: :validate
|
- task: diff
|
||||||
vars: {PROJECT: "siegen"}
|
|
||||||
- task: :zip
|
|
||||||
vars: {PROJECT: "siegen"}
|
|
||||||
- task: :diff
|
|
||||||
vars: {PROJECT: "siegen"}
|
|
||||||
|
|
||||||
harvest:
|
harvest:
|
||||||
dir: data/siegen/harvest
|
dir: data/siegen/harvest
|
||||||
|
@ -77,6 +71,7 @@ tasks:
|
||||||
- kill -9 $(lsof -t -i:$PORT) # SIGKILL (-9) verhindert unnötigen Speichervorgang
|
- kill -9 $(lsof -t -i:$PORT) # SIGKILL (-9) verhindert unnötigen Speichervorgang
|
||||||
- rm -rf ./*.project* && rm -f workspace.json # temporäre Dateien von OpenRefine löschen
|
- rm -rf ./*.project* && rm -f workspace.json # temporäre Dateien von OpenRefine löschen
|
||||||
sources:
|
sources:
|
||||||
|
# wenn "dir:" für task gesetzt ist, dann relative Links ausgehend von dir
|
||||||
- ../harvest/siegen.xml
|
- ../harvest/siegen.xml
|
||||||
- ../../../rules/siegen/*.json
|
- ../../../rules/siegen/*.json
|
||||||
- ../../../rules/siegen/template.txt
|
- ../../../rules/siegen/template.txt
|
||||||
|
@ -85,11 +80,61 @@ tasks:
|
||||||
- siegen.txt
|
- siegen.txt
|
||||||
- siegen-debug.tsv
|
- siegen-debug.tsv
|
||||||
|
|
||||||
|
check:
|
||||||
|
cmds:
|
||||||
|
# Tasks mit ":" sind für alle Datenquellen gleich in Taskfile.yml definiert
|
||||||
|
- task: :check
|
||||||
|
vars: {PROJECT: "siegen", MINIMUM: "1250"}
|
||||||
|
sources:
|
||||||
|
# wenn "dir:" für task nicht gesetzt ist, dann relative Links ausgehend von Taskfile.yml
|
||||||
|
- data/siegen/refine/openrefine.log
|
||||||
|
- data/siegen/refine/siegen.txt
|
||||||
|
|
||||||
|
split:
|
||||||
|
cmds:
|
||||||
|
- task: :split
|
||||||
|
vars: {PROJECT: "siegen"}
|
||||||
|
sources:
|
||||||
|
- data/siegen/refine/siegen.txt
|
||||||
|
generates:
|
||||||
|
- data/siegen/split/*.xml
|
||||||
|
|
||||||
|
validate:
|
||||||
|
cmds:
|
||||||
|
- task: :validate
|
||||||
|
vars: {PROJECT: "siegen"}
|
||||||
|
sources:
|
||||||
|
- data/siegen/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/siegen/validate.log
|
||||||
|
|
||||||
|
zip:
|
||||||
|
cmds:
|
||||||
|
- task: :zip
|
||||||
|
vars: {PROJECT: "siegen"}
|
||||||
|
sources:
|
||||||
|
- data/siegen/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/siegen/siegen_{{.DATE}}.zip
|
||||||
|
|
||||||
|
diff:
|
||||||
|
cmds:
|
||||||
|
- task: :diff
|
||||||
|
vars: {PROJECT: "siegen"}
|
||||||
|
sources:
|
||||||
|
- data/siegen/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/siegen/diff.log
|
||||||
|
|
||||||
linkcheck:
|
linkcheck:
|
||||||
desc: OPUS Siegen links überprüfen
|
desc: OPUS Siegen links überprüfen
|
||||||
cmds:
|
cmds:
|
||||||
- task: :linkcheck
|
- task: :linkcheck
|
||||||
vars: {PROJECT: "siegen"}
|
vars: {PROJECT: "siegen"}
|
||||||
|
sources:
|
||||||
|
- data/siegen/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/siegen/linkcheck.log
|
||||||
|
|
||||||
delete:
|
delete:
|
||||||
desc: OPUS Siegen cache löschen
|
desc: OPUS Siegen cache löschen
|
||||||
|
|
|
@ -9,16 +9,11 @@ tasks:
|
||||||
deps: [harvest]
|
deps: [harvest]
|
||||||
cmds:
|
cmds:
|
||||||
- task: refine
|
- task: refine
|
||||||
- task: :check
|
- task: check
|
||||||
vars: {PROJECT: "wuppertal", MINIMUM: "1300"}
|
- task: split
|
||||||
- task: :split
|
- task: validate
|
||||||
vars: {PROJECT: "wuppertal"}
|
- task: zip
|
||||||
- task: :validate
|
- task: diff
|
||||||
vars: {PROJECT: "wuppertal"}
|
|
||||||
- task: :zip
|
|
||||||
vars: {PROJECT: "wuppertal"}
|
|
||||||
- task: :diff
|
|
||||||
vars: {PROJECT: "wuppertal"}
|
|
||||||
|
|
||||||
harvest:
|
harvest:
|
||||||
dir: data/wuppertal/harvest
|
dir: data/wuppertal/harvest
|
||||||
|
@ -79,6 +74,7 @@ tasks:
|
||||||
- kill -9 $(lsof -t -i:$PORT) # SIGKILL (-9) verhindert unnötigen Speichervorgang
|
- kill -9 $(lsof -t -i:$PORT) # SIGKILL (-9) verhindert unnötigen Speichervorgang
|
||||||
- rm -rf ./*.project* && rm -f workspace.json # temporäre Dateien von OpenRefine löschen
|
- rm -rf ./*.project* && rm -f workspace.json # temporäre Dateien von OpenRefine löschen
|
||||||
sources:
|
sources:
|
||||||
|
# wenn "dir:" für task gesetzt ist, dann relative Links ausgehend von dir
|
||||||
- ../harvest/wuppertal.xml
|
- ../harvest/wuppertal.xml
|
||||||
- ../../../rules/wuppertal/*.json
|
- ../../../rules/wuppertal/*.json
|
||||||
- ../../../rules/wuppertal/template.txt
|
- ../../../rules/wuppertal/template.txt
|
||||||
|
@ -87,11 +83,62 @@ tasks:
|
||||||
- wuppertal.txt
|
- wuppertal.txt
|
||||||
- wuppertal-debug.tsv
|
- wuppertal-debug.tsv
|
||||||
|
|
||||||
|
check:
|
||||||
|
cmds:
|
||||||
|
# Tasks mit ":" sind für alle Datenquellen gleich in Taskfile.yml definiert
|
||||||
|
- task: :check
|
||||||
|
vars: {PROJECT: "wuppertal", MINIMUM: "1300"}
|
||||||
|
sources:
|
||||||
|
# wenn "dir:" für task nicht gesetzt ist, dann relative Links ausgehend von Taskfile.yml
|
||||||
|
- data/wuppertal/refine/openrefine.log
|
||||||
|
- data/wuppertal/refine/siegen.txt
|
||||||
|
|
||||||
|
split:
|
||||||
|
cmds:
|
||||||
|
- task: :split
|
||||||
|
vars: {PROJECT: "wuppertal"}
|
||||||
|
sources:
|
||||||
|
# wenn "dir:" für task nicht gesetzt ist, dann relative Links ausgehend von Taskfile.yml
|
||||||
|
- data/wuppertal/refine/wuppertal.txt
|
||||||
|
generates:
|
||||||
|
- data/wuppertal/split/*.xml
|
||||||
|
|
||||||
|
validate:
|
||||||
|
cmds:
|
||||||
|
- task: :validate
|
||||||
|
vars: {PROJECT: "wuppertal"}
|
||||||
|
sources:
|
||||||
|
- data/wuppertal/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/wuppertal/validate.log
|
||||||
|
|
||||||
|
zip:
|
||||||
|
cmds:
|
||||||
|
- task: :zip
|
||||||
|
vars: {PROJECT: "wuppertal"}
|
||||||
|
sources:
|
||||||
|
- data/wuppertal/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/wuppertal/wuppertal_{{.DATE}}.zip
|
||||||
|
|
||||||
|
diff:
|
||||||
|
cmds:
|
||||||
|
- task: :diff
|
||||||
|
vars: {PROJECT: "wuppertal"}
|
||||||
|
sources:
|
||||||
|
- data/wuppertal/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/wuppertal/diff.log
|
||||||
|
|
||||||
linkcheck:
|
linkcheck:
|
||||||
desc: Elpub Wuppertal links überprüfen
|
desc: Elpub Wuppertal links überprüfen
|
||||||
cmds:
|
cmds:
|
||||||
- task: :linkcheck
|
- task: :linkcheck
|
||||||
vars: {PROJECT: "wuppertal"}
|
vars: {PROJECT: "wuppertal"}
|
||||||
|
sources:
|
||||||
|
- data/wuppertal/split/*.xml
|
||||||
|
generates:
|
||||||
|
- data/wuppertal/linkcheck.log
|
||||||
|
|
||||||
delete:
|
delete:
|
||||||
desc: Elpub Wuppertal cache löschen
|
desc: Elpub Wuppertal cache löschen
|
||||||
|
|
Loading…
Reference in New Issue