♻️ use var for client logging

This commit is contained in:
Felix Lohmeier 2021-02-24 19:13:41 +01:00
parent f7472b4683
commit b77bdb6eb1
3 changed files with 18 additions and 24 deletions

View File

@ -17,6 +17,7 @@ tasks:
PROJECT: doaj PROJECT: doaj
PORT: 3334 # assign a different port for each project PORT: 3334 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1'
deps: # will be executed each run independent of up-to-date check deps: # will be executed each run independent of up-to-date check
- task: download - task: download
cmds: cmds:
@ -26,22 +27,20 @@ tasks:
"$CLIENT" -P {{.PORT}} "$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/doaj-article-sample.csv)" --create "$(readlink -m input/doaj-article-sample.csv)"
--projectName {{.PROJECT}} --projectName {{.PROJECT}}
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- > # apply transformation rules - > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} {{.PROJECT}}
--apply config/doaj-openrefine.json --apply config/doaj-openrefine.json
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- > # export to file - > # export to file
mkdir -p output && mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/doaj-results.tsv)" --output "$(readlink -m output/doaj-results.tsv)"
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- | # print allocated system resources - | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})" PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
> >(tee -a openrefine.log) echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
> >(tee -a openrefine.log)
- task: :stop # shut down OpenRefine and archive the OpenRefine project - task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
sources: sources:
@ -49,7 +48,6 @@ tasks:
- input/** - input/**
- config/** - config/**
generates: generates:
- openrefine.log
- ./{{.PROJECT}}.openrefine.tar.gz - ./{{.PROJECT}}.openrefine.tar.gz
- output/** - output/**
ignore_error: true # workaround to avoid an orphaned Java process on error ignore_error: true # workaround to avoid an orphaned Java process on error

View File

@ -17,6 +17,7 @@ tasks:
PROJECT: duplicates PROJECT: duplicates
PORT: 3335 # assign a different port for each project PORT: 3335 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1'
cmds: cmds:
- task: :start # launch OpenRefine - task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
@ -25,22 +26,20 @@ tasks:
--create "$(readlink -m input/duplicates.csv)" --create "$(readlink -m input/duplicates.csv)"
--encoding UTF-8 --encoding UTF-8
--projectName {{.PROJECT}} --projectName {{.PROJECT}}
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- > # apply transformation rules - > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} {{.PROJECT}}
--apply config/duplicates-deletion.json --apply config/duplicates-deletion.json
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- > # export to file - > # export to file
mkdir -p output && mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/deduped.xls)" --output "$(readlink -m output/deduped.xls)"
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- | # print allocated system resources - | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})" PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
> >(tee -a openrefine.log) echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
> >(tee -a openrefine.log)
- task: :stop # shut down OpenRefine and archive the OpenRefine project - task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
sources: sources:
@ -48,7 +47,6 @@ tasks:
- input/** - input/**
- config/** - config/**
generates: generates:
- openrefine.log
- ./{{.PROJECT}}.openrefine.tar.gz - ./{{.PROJECT}}.openrefine.tar.gz
- output/** - output/**
ignore_error: true # workaround to avoid an orphaned Java process on error ignore_error: true # workaround to avoid an orphaned Java process on error

View File

@ -17,6 +17,7 @@ tasks:
PROJECT: phm PROJECT: phm
PORT: 3336 # assign a different port for each project PORT: 3336 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1'
deps: # will be executed each run independent of up-to-date check deps: # will be executed each run independent of up-to-date check
- task: download - task: download
cmds: cmds:
@ -28,22 +29,20 @@ tasks:
--processQuotes false --processQuotes false
--guessCellValueTypes true --guessCellValueTypes true
--projectName {{.PROJECT}} --projectName {{.PROJECT}}
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- > # apply transformation rules - > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} {{.PROJECT}}
--apply config/phm-transform.json --apply config/phm-transform.json
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- > # export to file - > # export to file
mkdir -p output && mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/phm-results.tsv)" --output "$(readlink -m output/phm-results.tsv)"
> >(tee -a openrefine.log) 2>&1 > {{.LOG}}
- | # print allocated system resources - | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})" PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
> >(tee -a openrefine.log) echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
> >(tee -a openrefine.log)
- task: :stop # shut down OpenRefine and archive the OpenRefine project - task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'} vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'}
sources: sources:
@ -51,7 +50,6 @@ tasks:
- input/** - input/**
- config/** - config/**
generates: generates:
- openrefine.log
- ./{{.PROJECT}}.openrefine.tar.gz - ./{{.PROJECT}}.openrefine.tar.gz
- output/** - output/**
ignore_error: true # workaround to avoid an orphaned Java process on error ignore_error: true # workaround to avoid an orphaned Java process on error