♻️ use var for client logging
This commit is contained in:
parent
f7472b4683
commit
b77bdb6eb1
|
@ -17,6 +17,7 @@ tasks:
|
||||||
PROJECT: doaj
|
PROJECT: doaj
|
||||||
PORT: 3334 # assign a different port for each project
|
PORT: 3334 # assign a different port for each project
|
||||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||||
|
LOG: '>(tee -a openrefine.log) 2>&1'
|
||||||
deps: # will be executed each run independent of up-to-date check
|
deps: # will be executed each run independent of up-to-date check
|
||||||
- task: download
|
- task: download
|
||||||
cmds:
|
cmds:
|
||||||
|
@ -26,22 +27,20 @@ tasks:
|
||||||
"$CLIENT" -P {{.PORT}}
|
"$CLIENT" -P {{.PORT}}
|
||||||
--create "$(readlink -m input/doaj-article-sample.csv)"
|
--create "$(readlink -m input/doaj-article-sample.csv)"
|
||||||
--projectName {{.PROJECT}}
|
--projectName {{.PROJECT}}
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- > # apply transformation rules
|
- > # apply transformation rules
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||||
--apply config/doaj-openrefine.json
|
--apply config/doaj-openrefine.json
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- > # export to file
|
- > # export to file
|
||||||
mkdir -p output &&
|
mkdir -p output &&
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||||
--output "$(readlink -m output/doaj-results.tsv)"
|
--output "$(readlink -m output/doaj-results.tsv)"
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- | # print allocated system resources
|
- | # print allocated system resources
|
||||||
PID="$(lsof -t -i:{{.PORT}})"
|
PID="$(lsof -t -i:{{.PORT}})"
|
||||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \
|
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||||
> >(tee -a openrefine.log)
|
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
|
|
||||||
> >(tee -a openrefine.log)
|
|
||||||
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
||||||
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||||||
sources:
|
sources:
|
||||||
|
@ -49,7 +48,6 @@ tasks:
|
||||||
- input/**
|
- input/**
|
||||||
- config/**
|
- config/**
|
||||||
generates:
|
generates:
|
||||||
- openrefine.log
|
|
||||||
- ./{{.PROJECT}}.openrefine.tar.gz
|
- ./{{.PROJECT}}.openrefine.tar.gz
|
||||||
- output/**
|
- output/**
|
||||||
ignore_error: true # workaround to avoid an orphaned Java process on error
|
ignore_error: true # workaround to avoid an orphaned Java process on error
|
||||||
|
|
|
@ -17,6 +17,7 @@ tasks:
|
||||||
PROJECT: duplicates
|
PROJECT: duplicates
|
||||||
PORT: 3335 # assign a different port for each project
|
PORT: 3335 # assign a different port for each project
|
||||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||||
|
LOG: '>(tee -a openrefine.log) 2>&1'
|
||||||
cmds:
|
cmds:
|
||||||
- task: :start # launch OpenRefine
|
- task: :start # launch OpenRefine
|
||||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||||
|
@ -25,22 +26,20 @@ tasks:
|
||||||
--create "$(readlink -m input/duplicates.csv)"
|
--create "$(readlink -m input/duplicates.csv)"
|
||||||
--encoding UTF-8
|
--encoding UTF-8
|
||||||
--projectName {{.PROJECT}}
|
--projectName {{.PROJECT}}
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- > # apply transformation rules
|
- > # apply transformation rules
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||||
--apply config/duplicates-deletion.json
|
--apply config/duplicates-deletion.json
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- > # export to file
|
- > # export to file
|
||||||
mkdir -p output &&
|
mkdir -p output &&
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||||
--output "$(readlink -m output/deduped.xls)"
|
--output "$(readlink -m output/deduped.xls)"
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- | # print allocated system resources
|
- | # print allocated system resources
|
||||||
PID="$(lsof -t -i:{{.PORT}})"
|
PID="$(lsof -t -i:{{.PORT}})"
|
||||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \
|
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||||
> >(tee -a openrefine.log)
|
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
|
|
||||||
> >(tee -a openrefine.log)
|
|
||||||
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
||||||
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'}
|
||||||
sources:
|
sources:
|
||||||
|
@ -48,7 +47,6 @@ tasks:
|
||||||
- input/**
|
- input/**
|
||||||
- config/**
|
- config/**
|
||||||
generates:
|
generates:
|
||||||
- openrefine.log
|
|
||||||
- ./{{.PROJECT}}.openrefine.tar.gz
|
- ./{{.PROJECT}}.openrefine.tar.gz
|
||||||
- output/**
|
- output/**
|
||||||
ignore_error: true # workaround to avoid an orphaned Java process on error
|
ignore_error: true # workaround to avoid an orphaned Java process on error
|
||||||
|
|
|
@ -17,6 +17,7 @@ tasks:
|
||||||
PROJECT: phm
|
PROJECT: phm
|
||||||
PORT: 3336 # assign a different port for each project
|
PORT: 3336 # assign a different port for each project
|
||||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||||
|
LOG: '>(tee -a openrefine.log) 2>&1'
|
||||||
deps: # will be executed each run independent of up-to-date check
|
deps: # will be executed each run independent of up-to-date check
|
||||||
- task: download
|
- task: download
|
||||||
cmds:
|
cmds:
|
||||||
|
@ -28,22 +29,20 @@ tasks:
|
||||||
--processQuotes false
|
--processQuotes false
|
||||||
--guessCellValueTypes true
|
--guessCellValueTypes true
|
||||||
--projectName {{.PROJECT}}
|
--projectName {{.PROJECT}}
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- > # apply transformation rules
|
- > # apply transformation rules
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||||
--apply config/phm-transform.json
|
--apply config/phm-transform.json
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- > # export to file
|
- > # export to file
|
||||||
mkdir -p output &&
|
mkdir -p output &&
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||||
--output "$(readlink -m output/phm-results.tsv)"
|
--output "$(readlink -m output/phm-results.tsv)"
|
||||||
> >(tee -a openrefine.log) 2>&1
|
> {{.LOG}}
|
||||||
- | # print allocated system resources
|
- | # print allocated system resources
|
||||||
PID="$(lsof -t -i:{{.PORT}})"
|
PID="$(lsof -t -i:{{.PORT}})"
|
||||||
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \
|
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}
|
||||||
> >(tee -a openrefine.log)
|
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}}
|
||||||
echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \
|
|
||||||
> >(tee -a openrefine.log)
|
|
||||||
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
- task: :stop # shut down OpenRefine and archive the OpenRefine project
|
||||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'}
|
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'}
|
||||||
sources:
|
sources:
|
||||||
|
@ -51,7 +50,6 @@ tasks:
|
||||||
- input/**
|
- input/**
|
||||||
- config/**
|
- config/**
|
||||||
generates:
|
generates:
|
||||||
- openrefine.log
|
|
||||||
- ./{{.PROJECT}}.openrefine.tar.gz
|
- ./{{.PROJECT}}.openrefine.tar.gz
|
||||||
- output/**
|
- output/**
|
||||||
ignore_error: true # workaround to avoid an orphaned Java process on error
|
ignore_error: true # workaround to avoid an orphaned Java process on error
|
||||||
|
|
Loading…
Reference in New Issue