# https://github.com/opencultureconsulting/openrefine-tasks version: '3' includes: example-doaj: taskfile: example-doaj dir: example-doaj example-duplicates: taskfile: example-duplicates dir: example-duplicates example-powerhouse: taskfile: example-powerhouse dir: example-powerhouse # add your project here silent: true output: prefixed tasks: default: desc: execute all projects in parallel deps: - task: example-doaj:refine - task: example-duplicates:refine - task: example-powerhouse:refine # add your project here cmds: - task: check install: desc: (re)install OpenRefine and openrefine-client into subdirectory openrefine cmds: - | # delete existing install and recreate folder rm -rf openrefine; mkdir -p openrefine - | # install OpenRefine into subdirectory openrefine wget --no-verbose -O openrefine.tar.gz https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz tar -xzf openrefine.tar.gz -C openrefine --strip 1 && rm openrefine.tar.gz - sed -i 's/cd `dirname $0`/cd "$(dirname "$0")"/' "openrefine/refine" # fix path issue in OpenRefine startup file - sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' "openrefine/refine.ini" # do not try to open OpenRefine in browser - sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' "openrefine/refine.ini" # set autosave period from 5 minutes to 25 hours - | # install openrefine-client into subdirectory openrefine wget --no-verbose -O openrefine/client https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux chmod +x openrefine/client start: dir: ./{{.PROJECT}}/output cmds: - | # check install and delete any temporary OpenRefine files if [ ! -f "../../openrefine/refine" ]; then echo 1>&2 "OpenRefine missing; try task install"; exit 1 fi rm -rf ./*.project* workspace.json - | # launch OpenRefine with specific data directory and redirect its output to a log file ../../openrefine/refine -v warn -p {{.PORT}} -m {{.RAM}} \ -d ../{{.PROJECT}}/output \ > openrefine.log 2>&1 & - | # wait until OpenRefine API is available timeout 30s bash -c "until wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine do sleep 1 done" stop: dir: ./{{.PROJECT}}/output cmds: - | # shut down OpenRefine PID=$(lsof -t -i:{{.PORT}}) kill $PID while ps -p $PID > /dev/null; do sleep 1; done - | # archive the OpenRefine project tar cfz \ {{.PROJECT}}.openrefine.tar.gz \ -C $(grep -l {{.PROJECT}} *.project/metadata.json | cut -d '/' -f 1) \ . check: desc: check OpenRefine log for any warnings and exit on error dir: ./{{.PROJECT}} cmds: - | # find log file(s) and check for "exception" or "error" if grep -i 'exception\|error' $(find . -name openrefine.log); then echo 1>&2 "log contains warnings!"; exit 1 fi