diff --git a/example-doaj/Taskfile.yml b/example-doaj/Taskfile.yml index f7ce490..acfc1c4 100644 --- a/example-doaj/Taskfile.yml +++ b/example-doaj/Taskfile.yml @@ -17,6 +17,7 @@ tasks: PROJECT: doaj PORT: 3334 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space + LOG: '>(tee -a openrefine.log) 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: @@ -26,22 +27,20 @@ tasks: "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/doaj-article-sample.csv)" --projectName {{.PROJECT}} - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - > # apply transformation rules "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/doaj-openrefine.json - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - > # export to file mkdir -p output && "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/doaj-results.tsv)" - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" - echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ - > >(tee -a openrefine.log) - echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \ - > >(tee -a openrefine.log) + echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} + echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: @@ -49,7 +48,6 @@ tasks: - input/** - config/** generates: - - openrefine.log - ./{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error diff --git a/example-duplicates/Taskfile.yml b/example-duplicates/Taskfile.yml index b514be0..e61ca96 100644 --- a/example-duplicates/Taskfile.yml +++ b/example-duplicates/Taskfile.yml @@ -17,6 +17,7 @@ tasks: PROJECT: duplicates PORT: 3335 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space + LOG: '>(tee -a openrefine.log) 2>&1' cmds: - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} @@ -25,22 +26,20 @@ tasks: --create "$(readlink -m input/duplicates.csv)" --encoding UTF-8 --projectName {{.PROJECT}} - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - > # apply transformation rules "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/duplicates-deletion.json - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - > # export to file mkdir -p output && "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/deduped.xls)" - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" - echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ - > >(tee -a openrefine.log) - echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \ - > >(tee -a openrefine.log) + echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} + echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: @@ -48,7 +47,6 @@ tasks: - input/** - config/** generates: - - openrefine.log - ./{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error diff --git a/example-powerhouse/Taskfile.yml b/example-powerhouse/Taskfile.yml index 77eaa19..4aec8f8 100644 --- a/example-powerhouse/Taskfile.yml +++ b/example-powerhouse/Taskfile.yml @@ -17,6 +17,7 @@ tasks: PROJECT: phm PORT: 3336 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space + LOG: '>(tee -a openrefine.log) 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: @@ -28,22 +29,20 @@ tasks: --processQuotes false --guessCellValueTypes true --projectName {{.PROJECT}} - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - > # apply transformation rules "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/phm-transform.json - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - > # export to file mkdir -p output && "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/phm-results.tsv)" - > >(tee -a openrefine.log) 2>&1 + > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" - echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ - > >(tee -a openrefine.log) - echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \ - > >(tee -a openrefine.log) + echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} + echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'} sources: @@ -51,7 +50,6 @@ tasks: - input/** - config/** generates: - - openrefine.log - ./{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error