♻️ use var for client logging

This commit is contained in:
Felix Lohmeier 2021-02-24 19:13:41 +01:00
parent f7472b4683
commit b77bdb6eb1
3 changed files with 18 additions and 24 deletions

View File

@ -17,6 +17,7 @@ tasks:
PROJECT: doaj
PORT: 3334 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1'
deps: # will be executed each run independent of up-to-date check
- task: download
cmds:
@ -26,22 +27,20 @@ tasks:
"$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/doaj-article-sample.csv)"
--projectName {{.PROJECT}}
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--apply config/doaj-openrefine.json
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- > # export to file
mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/doaj-results.tsv)"
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \
> >(tee -a openrefine.log)
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
> >(tee -a openrefine.log)
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
sources:
@ -49,7 +48,6 @@ tasks:
- input/**
- config/**
generates:
- openrefine.log
- ./{{.PROJECT}}.openrefine.tar.gz
- output/**
ignore_error: true # workaround to avoid an orphaned Java process on error

View File

@ -17,6 +17,7 @@ tasks:
PROJECT: duplicates
PORT: 3335 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1'
cmds:
- task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
@ -25,22 +26,20 @@ tasks:
--create "$(readlink -m input/duplicates.csv)"
--encoding UTF-8
--projectName {{.PROJECT}}
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--apply config/duplicates-deletion.json
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- > # export to file
mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/deduped.xls)"
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \
> >(tee -a openrefine.log)
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
> >(tee -a openrefine.log)
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
sources:
@ -48,7 +47,6 @@ tasks:
- input/**
- config/**
generates:
- openrefine.log
- ./{{.PROJECT}}.openrefine.tar.gz
- output/**
ignore_error: true # workaround to avoid an orphaned Java process on error

View File

@ -17,6 +17,7 @@ tasks:
PROJECT: phm
PORT: 3336 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1'
deps: # will be executed each run independent of up-to-date check
- task: download
cmds:
@ -28,22 +29,20 @@ tasks:
--processQuotes false
--guessCellValueTypes true
--projectName {{.PROJECT}}
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--apply config/phm-transform.json
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- > # export to file
mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/phm-results.tsv)"
> >(tee -a openrefine.log) 2>&1
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \
> >(tee -a openrefine.log)
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
> >(tee -a openrefine.log)
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
- task: :stop # shut down OpenRefine and archive the OpenRefine project
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'}
sources:
@ -51,7 +50,6 @@ tasks:
- input/**
- config/**
generates:
- openrefine.log
- ./{{.PROJECT}}.openrefine.tar.gz
- output/**
ignore_error: true # workaround to avoid an orphaned Java process on error