version: '3' tasks: main: desc: Library Carpentry Lesson covering DOAJ vars: DIR: '{{splitList ":" .TASK | first}}' cmds: - task: refine - task: :check # check OpenRefine log for any warnings and exit on error vars: {DIR: '{{.DIR}}'} refine: dir: ./{{.DIR}} vars: DIR: '{{splitList ":" .TASK | first}}' PROJECT: doaj PORT: 3334 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space LOG: '>(tee -a openrefine.log) 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/doaj-article-sample.csv)" --projectName {{.PROJECT}} > {{.LOG}} - > # apply transformation rules "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/doaj-openrefine.json > {{.LOG}} - > # export to file mkdir -p output && "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/doaj-results.tsv)" > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: - Taskfile.yml - input/** - config/** generates: - ./{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error # https://github.com/go-task/task/issues/141 download: dir: ./{{.DIR}} vars: DIR: '{{splitList ":" .TASK | first}}' cmds: - mkdir -p input config - > # Download input wget --no-verbose -O input/doaj-article-sample.csv https://github.com/felixlohmeier/openrefine-kimws2019/raw/master/doaj-article-sample.csv - > # Download config wget --no-verbose -O config/doaj-openrefine.json https://github.com/felixlohmeier/openrefine-kimws2019/raw/master/doaj-openrefine.json default: # enable standalone execution (running `task` in project directory) cmds: - DIR="${PWD##*/}:main" && cd .. && task "$DIR"