version: '3' tasks: main: desc: Powerhouse Museum Tutorial cmds: - task: refine - task: :check # check OpenRefine log for any warnings and exit on error vars: {PROJECT: '{{splitList ":" .TASK | first}}'} refine: dir: ./{{.PROJECT}} vars: PORT: 3336 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space PROJECT: '{{splitList ":" .TASK | first}}' deps: # will be executed each run independent of up-to-date check - task: download cmds: - task: :start # launch OpenRefine vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file ../openrefine/client -P {{.PORT}} --create "$(readlink -m input/phm-collection.tsv)" --processQuotes false --guessCellValueTypes true --projectName {{.PROJECT}} - > # apply transformation rules ../openrefine/client -P {{.PORT}} {{.PROJECT}} --apply config/phm-transform.json - > # export to file mkdir -p output && ../openrefine/client -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/phm-results.tsv)" - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: - input/** - config/** generates: - openrefine.log - ./{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error # https://github.com/go-task/task/issues/141 download: dir: '{{splitList ":" .TASK | first}}' cmds: - mkdir -p input config - > # Download input wget --no-verbose -O input/phm-collection.tsv https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/input/phm-collection.tsv - > # Download config wget --no-verbose -O config/phm-transform.json https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/config/phm-transform.json default: # enable standalone execution (running `task` in project directory) cmds: - PROJECT="${PWD##*/}:main" && cd .. && task "$PROJECT"