version: '3' tasks: main: desc: Konvertierung von Alephino nach PICA3/CSV vars: DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name cmds: - task: refine-pre vars: {PROJECT: leipzig} - task: refine-pre vars: {PROJECT: riesa} - task: refine-main refine-pre: dir: ./{{.DIR}} label: '{{.TASK}}-{{.PROJECT}}' vars: DIR: '{{splitList ":" .TASK | first}}' PORT: 3335 # assign a different port for each project RAM: 8192M # maximum RAM for OpenRefine java heap space LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1' cmds: - echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}" - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # Import Titel "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/{{.PROJECT}}-titel.txt)" --format fixed-width --encoding UTF-8 --columnWidths 5 --skipDataLines 0 --storeBlankRows false --projectName titel > {{.LOG}} - > # Import Exemplare "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/{{.PROJECT}}-exemplare.txt)" --format fixed-width --encoding UTF-8 --columnWidths 5 --skipDataLines 0 --storeBlankRows false --projectName exemplare > {{.LOG}} - | # Titel: Korrekturen Einzelfälle "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-einzelfaelle.json > {{.LOG}} - | # Prefix M bzw. E für Feldnamen "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-prefix.json > {{.LOG}} "$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-prefix.json > {{.LOG}} - | # Datensätze und Feldnamen sortieren "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-sortieren.json > {{.LOG}} "$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-sortieren.json > {{.LOG}} - | # Mehrfachbelegungen zusammenführen "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-mehrfachbelegungen.json > {{.LOG}} "$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-mehrfachbelegungen.json > {{.LOG}} - | # Felder löschen "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-loeschen.json > {{.LOG}} - | # Transponieren "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-transponieren.json > {{.LOG}} "$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-transponieren.json > {{.LOG}} - | # Titel-ID separieren "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-id-separieren.json > {{.LOG}} "$CLIENT" -P {{.PORT}} exemplare --apply config/pre/exemplare-id-separieren.json > {{.LOG}} - | # Titel: Exemplare anreichern "$CLIENT" -P {{.PORT}} titel --apply config/pre/titel-anreichern.json > {{.LOG}} - mkdir -p output - > # Export "$CLIENT" -P {{.PORT}} titel --output "$(readlink -m output/{{.PROJECT}}.tsv)" > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :kill # shut down OpenRefine immediately to save time and disk space vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}'} - task: :check # check OpenRefine log for any warnings and exit on error vars: {DIR: '{{.DIR}}'} sources: - input/{{.PROJECT}}*.txt - config/pre/** generates: - output/{{.PROJECT}}*.tsv ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 refine-main: dir: ./{{.DIR}} vars: DIR: '{{splitList ":" .TASK | first}}' PROJECT: alephino PORT: 3335 # assign a different port for each project RAM: 8192M # maximum RAM for OpenRefine java heap space LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1' cmds: - echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}" - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # Zip-Archiv mit Output der Vorverarbeitung erstellen zip -j tmp.zip output/leipzig.tsv output/riesa.tsv - > # Import Zip-Archiv "$CLIENT" -P {{.PORT}} --create "$(readlink -m tmp.zip)" --format tsv --includeFileSources true --projectName {{.PROJECT}} > {{.LOG}} && rm tmp.zip - | # Transformationen anwenden for f in config/main/*.json; do "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply "$f" > {{.LOG}} done - > # Export der Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427 "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/barcodes.txt)" --template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}" --rowSeparator "" > {{.LOG}} - > # Export als PICA+ "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/{{.PROJECT}}.pica)" --template "$(< config/main/template.txt)" --rowSeparator "" > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} - task: :check # check OpenRefine log for any warnings and exit on error vars: {DIR: '{{.DIR}}'} sources: - output/*.tsv - config/main/** generates: - log/{{.PROJECT}}.openrefine.tar.gz - output/alephino.pica - output/barcodes.txt ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 default: # enable standalone execution (running `task` in project directory) cmds: - DIR="${PWD##*/}:main" && cd .. && task "$DIR"