version: '3' tasks: main: desc: Powerhouse Museum Tutorial vars: DIR: '{{splitList ":" .TASK | first}}' cmds: - task: refine - task: :check # check OpenRefine log for any warnings and exit on error vars: {DIR: '{{.DIR}}'} refine: dir: ./{{.DIR}} vars: DIR: '{{splitList ":" .TASK | first}}' PROJECT: phm PORT: 3336 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/phm-collection.tsv)" --processQuotes false --guessCellValueTypes true --projectName "{{.PROJECT}}" > {{.LOG}} - > # apply transformation rules "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/phm-transform.json > {{.LOG}} - > # export to file mkdir -p output && "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --output "$(readlink -m output/phm-results.tsv)" > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'} sources: - Taskfile.yml - input/** - config/** generates: - ./{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error # https://github.com/go-task/task/issues/141 download: dir: ./{{.DIR}} vars: DIR: '{{splitList ":" .TASK | first}}' cmds: - mkdir -p input config - > # Download input wget --no-verbose -O input/phm-collection.tsv https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/input/phm-collection.tsv - > # Download config wget --no-verbose -O config/phm-transform.json https://github.com/opencultureconsulting/openrefine-batch/raw/master/examples/powerhouse-museum/config/phm-transform.json default: # enable standalone execution (running `task` in project directory) cmds: - DIR="${PWD##*/}:main" && cd .. && task "$DIR"