diff --git a/Taskfile.yml b/Taskfile.yml index d209c66..23e8bff 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -56,11 +56,11 @@ tasks: if [ ! -f "$OPENREFINE" ]; then echo 1>&2 "OpenRefine missing; try task install"; exit 1 fi - rm -rf ./*.project* workspace.json openrefine.log + rm -rf ./*.project* workspace.json "{{.PROJECT}}.log" - > # launch OpenRefine with specific data directory and redirect its output to a log file "$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}} -d ../{{.DIR}} - >> openrefine.log 2>&1 & + >> "{{.PROJECT}}.log" 2>&1 & - | # wait until OpenRefine API is available timeout 30s bash -c "until wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine @@ -86,6 +86,6 @@ tasks: dir: ./{{.DIR}} cmds: - | # find log file(s) and check for "exception" or "error" - if grep -i 'exception\|error' $(find . -name openrefine.log); then + if grep -i 'exception\|error' $(find . -name *.log); then echo 1>&2 "log contains warnings!"; exit 1 fi diff --git a/example-doaj/Taskfile.yml b/example-doaj/Taskfile.yml index acfc1c4..9bc3f1d 100644 --- a/example-doaj/Taskfile.yml +++ b/example-doaj/Taskfile.yml @@ -17,24 +17,24 @@ tasks: PROJECT: doaj PORT: 3334 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a openrefine.log) 2>&1' + LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: - task: :start # launch OpenRefine - vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} + vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/doaj-article-sample.csv)" - --projectName {{.PROJECT}} + --projectName "{{.PROJECT}}" > {{.LOG}} - > # apply transformation rules - "$CLIENT" -P {{.PORT}} {{.PROJECT}} + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/doaj-openrefine.json > {{.LOG}} - > # export to file mkdir -p output && - "$CLIENT" -P {{.PORT}} {{.PROJECT}} + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --output "$(readlink -m output/doaj-results.tsv)" > {{.LOG}} - | # print allocated system resources diff --git a/example-duplicates/Taskfile.yml b/example-duplicates/Taskfile.yml index e61ca96..ee8c742 100644 --- a/example-duplicates/Taskfile.yml +++ b/example-duplicates/Taskfile.yml @@ -17,23 +17,23 @@ tasks: PROJECT: duplicates PORT: 3335 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a openrefine.log) 2>&1' + LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' cmds: - task: :start # launch OpenRefine - vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} + vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/duplicates.csv)" --encoding UTF-8 - --projectName {{.PROJECT}} + --projectName "{{.PROJECT}}" > {{.LOG}} - > # apply transformation rules - "$CLIENT" -P {{.PORT}} {{.PROJECT}} + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/duplicates-deletion.json > {{.LOG}} - > # export to file mkdir -p output && - "$CLIENT" -P {{.PORT}} {{.PROJECT}} + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --output "$(readlink -m output/deduped.xls)" > {{.LOG}} - | # print allocated system resources diff --git a/example-powerhouse/Taskfile.yml b/example-powerhouse/Taskfile.yml index 4aec8f8..e538a4b 100644 --- a/example-powerhouse/Taskfile.yml +++ b/example-powerhouse/Taskfile.yml @@ -17,26 +17,26 @@ tasks: PROJECT: phm PORT: 3336 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a openrefine.log) 2>&1' + LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: - task: :start # launch OpenRefine - vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} + vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/phm-collection.tsv)" --processQuotes false --guessCellValueTypes true - --projectName {{.PROJECT}} + --projectName "{{.PROJECT}}" > {{.LOG}} - > # apply transformation rules - "$CLIENT" -P {{.PORT}} {{.PROJECT}} + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/phm-transform.json > {{.LOG}} - > # export to file mkdir -p output && - "$CLIENT" -P {{.PORT}} {{.PROJECT}} + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --output "$(readlink -m output/phm-results.tsv)" > {{.LOG}} - | # print allocated system resources