version: '3' tasks: main: desc: Powerhouse Museum Tutorial cmds: - task: refine - task: :check # check OpenRefine log for any warnings and exit on error vars: {PROJECT: '{{splitList ":" .TASK | first}}'} refine: vars: PORT: 3336 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space PROJECT: '{{splitList ":" .TASK | first}}' deps: # will be executed each run independent of up-to-date check - task: download cmds: # tasks prepended with ":" are defined in Taskfile.yml - task: :start vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - task: import vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} - task: apply vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} - task: export vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} - task: stats vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} - task: :stop vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: - input/** - config/** generates: - output/openrefine.log - output/{{.PROJECT}}.openrefine.tar.gz ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 download: cmds: - mkdir -p input config - wget --no-verbose -O input/phm-collection.tsv https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/input/phm-collection.tsv - wget --no-verbose -O config/phm-transform.json https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/config/phm-transform.json import: dir: input cmds: - | # import file ../../openrefine/client -P {{.PORT}} \ --create phm-collection.tsv \ --processQuotes false \ --guessCellValueTypes true \ --projectName {{.PROJECT}} ignore_error: true # workaround apply: dir: config cmds: - | # apply transformation rules ../../openrefine/client -P {{.PORT}} {{.PROJECT}} \ --apply phm-transform.json ignore_error: true # workaround export: dir: output cmds: - | # export to file; use readlink to log full path to output file ../../openrefine/client -P {{.PORT}} {{.PROJECT}} \ --output "$(readlink -m phm-results.tsv)" ignore_error: true # workaround stats: cmds: - ps -o start,etime,%mem,%cpu,rss -p $(lsof -t -i:{{.PORT}}) # print allocated system resources ignore_error: true # workaround default: # enable standalone execution (running `task` in project directory) cmds: - PROJECT="${PWD##*/}:main" && cd .. && task "$PROJECT"