80 lines
2.7 KiB
YAML
80 lines
2.7 KiB
YAML
|
version: '3'
|
||
|
|
||
|
tasks:
|
||
|
main:
|
||
|
desc: Powerhouse Museum Tutorial
|
||
|
cmds:
|
||
|
- task: refine
|
||
|
- task: :check # check OpenRefine log for any warnings and exit on error
|
||
|
vars: {PROJECT: '{{splitList ":" .TASK | first}}'}
|
||
|
|
||
|
refine:
|
||
|
vars:
|
||
|
PORT: 3336 # assign a different port for each project
|
||
|
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||
|
PROJECT: '{{splitList ":" .TASK | first}}'
|
||
|
deps: # will be executed each run independent of up-to-date check
|
||
|
- task: download
|
||
|
cmds: # tasks prepended with ":" are defined in Taskfile.yml
|
||
|
- task: :start
|
||
|
vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||
|
- task: import
|
||
|
vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||
|
- task: apply
|
||
|
vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||
|
- task: export
|
||
|
vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||
|
- task: stats
|
||
|
vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||
|
- task: :stop
|
||
|
vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||
|
sources:
|
||
|
- input/**
|
||
|
- config/**
|
||
|
generates:
|
||
|
- output/openrefine.log
|
||
|
- output/{{.PROJECT}}.openrefine.tar.gz
|
||
|
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
|
||
|
|
||
|
download:
|
||
|
cmds:
|
||
|
- mkdir -p input config
|
||
|
- wget --no-verbose -O input/phm-collection.tsv https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/input/phm-collection.tsv
|
||
|
- wget --no-verbose -O config/phm-transform.json https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/config/phm-transform.json
|
||
|
|
||
|
import:
|
||
|
dir: input
|
||
|
cmds:
|
||
|
- | # import file
|
||
|
../../openrefine/client -P {{.PORT}} \
|
||
|
--create phm-collection.tsv \
|
||
|
--processQuotes false \
|
||
|
--guessCellValueTypes true \
|
||
|
--projectName {{.PROJECT}}
|
||
|
ignore_error: true # workaround
|
||
|
|
||
|
apply:
|
||
|
dir: config
|
||
|
cmds:
|
||
|
- | # apply transformation rules
|
||
|
../../openrefine/client -P {{.PORT}} {{.PROJECT}} \
|
||
|
--apply phm-transform.json
|
||
|
ignore_error: true # workaround
|
||
|
|
||
|
export:
|
||
|
dir: output
|
||
|
cmds:
|
||
|
- | # export to file; use readlink to log full path to output file
|
||
|
../../openrefine/client -P {{.PORT}} {{.PROJECT}} \
|
||
|
--output "$(readlink -m phm-results.tsv)"
|
||
|
ignore_error: true # workaround
|
||
|
|
||
|
stats:
|
||
|
cmds:
|
||
|
- ps -o start,etime,%mem,%cpu,rss -p $(lsof -t -i:{{.PORT}}) # print allocated system resources
|
||
|
ignore_error: true # workaround
|
||
|
|
||
|
default: # enable standalone execution (running `task` in project directory)
|
||
|
cmds:
|
||
|
- PROJECT="${PWD##*/}:main" && cd .. && task "$PROJECT"
|