🚚 add flexibility by using project var for name of log file

This commit is contained in:
Felix Lohmeier 2021-02-24 21:06:12 +01:00
parent b77bdb6eb1
commit 71a9698fad
4 changed files with 18 additions and 18 deletions

View File

@ -56,11 +56,11 @@ tasks:
if [ ! -f "$OPENREFINE" ]; then if [ ! -f "$OPENREFINE" ]; then
echo 1>&2 "OpenRefine missing; try task install"; exit 1 echo 1>&2 "OpenRefine missing; try task install"; exit 1
fi fi
rm -rf ./*.project* workspace.json openrefine.log rm -rf ./*.project* workspace.json "{{.PROJECT}}.log"
- > # launch OpenRefine with specific data directory and redirect its output to a log file - > # launch OpenRefine with specific data directory and redirect its output to a log file
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}} "$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
-d ../{{.DIR}} -d ../{{.DIR}}
>> openrefine.log 2>&1 & >> "{{.PROJECT}}.log" 2>&1 &
- | # wait until OpenRefine API is available - | # wait until OpenRefine API is available
timeout 30s bash -c "until timeout 30s bash -c "until
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
@ -86,6 +86,6 @@ tasks:
dir: ./{{.DIR}} dir: ./{{.DIR}}
cmds: cmds:
- | # find log file(s) and check for "exception" or "error" - | # find log file(s) and check for "exception" or "error"
if grep -i 'exception\|error' $(find . -name openrefine.log); then if grep -i 'exception\|error' $(find . -name *.log); then
echo 1>&2 "log contains warnings!"; exit 1 echo 1>&2 "log contains warnings!"; exit 1
fi fi

View File

@ -17,24 +17,24 @@ tasks:
PROJECT: doaj PROJECT: doaj
PORT: 3334 # assign a different port for each project PORT: 3334 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1' LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
deps: # will be executed each run independent of up-to-date check deps: # will be executed each run independent of up-to-date check
- task: download - task: download
cmds: cmds:
- task: :start # launch OpenRefine - task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # import file - > # import file
"$CLIENT" -P {{.PORT}} "$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/doaj-article-sample.csv)" --create "$(readlink -m input/doaj-article-sample.csv)"
--projectName {{.PROJECT}} --projectName "{{.PROJECT}}"
> {{.LOG}} > {{.LOG}}
- > # apply transformation rules - > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
--apply config/doaj-openrefine.json --apply config/doaj-openrefine.json
> {{.LOG}} > {{.LOG}}
- > # export to file - > # export to file
mkdir -p output && mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
--output "$(readlink -m output/doaj-results.tsv)" --output "$(readlink -m output/doaj-results.tsv)"
> {{.LOG}} > {{.LOG}}
- | # print allocated system resources - | # print allocated system resources

View File

@ -17,23 +17,23 @@ tasks:
PROJECT: duplicates PROJECT: duplicates
PORT: 3335 # assign a different port for each project PORT: 3335 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1' LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
cmds: cmds:
- task: :start # launch OpenRefine - task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # import file - > # import file
"$CLIENT" -P {{.PORT}} "$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/duplicates.csv)" --create "$(readlink -m input/duplicates.csv)"
--encoding UTF-8 --encoding UTF-8
--projectName {{.PROJECT}} --projectName "{{.PROJECT}}"
> {{.LOG}} > {{.LOG}}
- > # apply transformation rules - > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
--apply config/duplicates-deletion.json --apply config/duplicates-deletion.json
> {{.LOG}} > {{.LOG}}
- > # export to file - > # export to file
mkdir -p output && mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
--output "$(readlink -m output/deduped.xls)" --output "$(readlink -m output/deduped.xls)"
> {{.LOG}} > {{.LOG}}
- | # print allocated system resources - | # print allocated system resources

View File

@ -17,26 +17,26 @@ tasks:
PROJECT: phm PROJECT: phm
PORT: 3336 # assign a different port for each project PORT: 3336 # assign a different port for each project
RAM: 2048M # maximum RAM for OpenRefine java heap space RAM: 2048M # maximum RAM for OpenRefine java heap space
LOG: '>(tee -a openrefine.log) 2>&1' LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
deps: # will be executed each run independent of up-to-date check deps: # will be executed each run independent of up-to-date check
- task: download - task: download
cmds: cmds:
- task: :start # launch OpenRefine - task: :start # launch OpenRefine
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
- > # import file - > # import file
"$CLIENT" -P {{.PORT}} "$CLIENT" -P {{.PORT}}
--create "$(readlink -m input/phm-collection.tsv)" --create "$(readlink -m input/phm-collection.tsv)"
--processQuotes false --processQuotes false
--guessCellValueTypes true --guessCellValueTypes true
--projectName {{.PROJECT}} --projectName "{{.PROJECT}}"
> {{.LOG}} > {{.LOG}}
- > # apply transformation rules - > # apply transformation rules
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
--apply config/phm-transform.json --apply config/phm-transform.json
> {{.LOG}} > {{.LOG}}
- > # export to file - > # export to file
mkdir -p output && mkdir -p output &&
"$CLIENT" -P {{.PORT}} {{.PROJECT}} "$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
--output "$(readlink -m output/phm-results.tsv)" --output "$(readlink -m output/phm-results.tsv)"
> {{.LOG}} > {{.LOG}}
- | # print allocated system resources - | # print allocated system resources