version: '3' tasks: main: desc: PICA3/CSV aus Bibliotheca und Alephino zusammenführen, Exemplare clustern, anreichern und in PICA+ konvertieren vars: DIR: '{{splitList ":" .TASK | first}}' deps: - task: :alephino:main - task: :bibliotheca:main cmds: - task: refine refine: dir: ./{{.DIR}} vars: DIR: '{{splitList ":" .TASK | first}}' PROJECT: pica+ PORT: 3334 # assign a different port for each project RAM: 8192M # maximum RAM for OpenRefine java heap space LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1' cmds: - echo "{{now | date "2006-01-02 15:04:05"}} {{.PROJECT}}" - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # Zip-Archiv mit Output der vorigen Tasks erstellen zip -j tmp.zip ../bibliotheca/output/bibliotheca.csv # ../alephino/output/alephino.csv - > # Import ZIP-Archiv "$CLIENT" -P {{.PORT}} --create "$(readlink -m tmp.zip)" --format csv --includeFileSources false --projectName {{.PROJECT}} > {{.LOG}} && rm tmp.zip - > # spec_Z_04: PPN anreichern über ISBN "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/ppn.json > {{.LOG}} - > # spec_Z_05: Exemplare clustern "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/clustern.json > {{.LOG}} - mkdir -p output - > # Export dubletter Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427 "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/barcodes.txt)" --template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}" --rowSeparator "" > {{.LOG}} - > # spec_Z_06: Dublette Barcodes löschen "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/dedup.json > {{.LOG}} - > # Export als PICA+ "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/{{.PROJECT}}.txt)" --template "$(< config/template.txt)" --rowSeparator "" > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} - task: :check # check OpenRefine log for any warnings and exit on error vars: {DIR: '{{.DIR}}'} sources: - Taskfile.yml # - ../alephino/output/alephino.csv - ../bibliotheca/output/bibliotheca.csv - config/** generates: - log/{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error # https://github.com/go-task/task/issues/141 default: # enable standalone execution (running `task` in project directory) cmds: - DIR="${PWD##*/}:main" && cd .. && task "$DIR"