🚚 add flexibility by using project var for name of log file
This commit is contained in:
parent
b77bdb6eb1
commit
71a9698fad
|
@ -56,11 +56,11 @@ tasks:
|
||||||
if [ ! -f "$OPENREFINE" ]; then
|
if [ ! -f "$OPENREFINE" ]; then
|
||||||
echo 1>&2 "OpenRefine missing; try task install"; exit 1
|
echo 1>&2 "OpenRefine missing; try task install"; exit 1
|
||||||
fi
|
fi
|
||||||
rm -rf ./*.project* workspace.json openrefine.log
|
rm -rf ./*.project* workspace.json "{{.PROJECT}}.log"
|
||||||
- > # launch OpenRefine with specific data directory and redirect its output to a log file
|
- > # launch OpenRefine with specific data directory and redirect its output to a log file
|
||||||
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
|
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
|
||||||
-d ../{{.DIR}}
|
-d ../{{.DIR}}
|
||||||
>> openrefine.log 2>&1 &
|
>> "{{.PROJECT}}.log" 2>&1 &
|
||||||
- | # wait until OpenRefine API is available
|
- | # wait until OpenRefine API is available
|
||||||
timeout 30s bash -c "until
|
timeout 30s bash -c "until
|
||||||
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
|
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
|
||||||
|
@ -86,6 +86,6 @@ tasks:
|
||||||
dir: ./{{.DIR}}
|
dir: ./{{.DIR}}
|
||||||
cmds:
|
cmds:
|
||||||
- | # find log file(s) and check for "exception" or "error"
|
- | # find log file(s) and check for "exception" or "error"
|
||||||
if grep -i 'exception\|error' $(find . -name openrefine.log); then
|
if grep -i 'exception\|error' $(find . -name *.log); then
|
||||||
echo 1>&2 "log contains warnings!"; exit 1
|
echo 1>&2 "log contains warnings!"; exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -17,24 +17,24 @@ tasks:
|
||||||
PROJECT: doaj
|
PROJECT: doaj
|
||||||
PORT: 3334 # assign a different port for each project
|
PORT: 3334 # assign a different port for each project
|
||||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||||
LOG: '>(tee -a openrefine.log) 2>&1'
|
LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
|
||||||
deps: # will be executed each run independent of up-to-date check
|
deps: # will be executed each run independent of up-to-date check
|
||||||
- task: download
|
- task: download
|
||||||
cmds:
|
cmds:
|
||||||
- task: :start # launch OpenRefine
|
- task: :start # launch OpenRefine
|
||||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||||
- > # import file
|
- > # import file
|
||||||
"$CLIENT" -P {{.PORT}}
|
"$CLIENT" -P {{.PORT}}
|
||||||
--create "$(readlink -m input/doaj-article-sample.csv)"
|
--create "$(readlink -m input/doaj-article-sample.csv)"
|
||||||
--projectName {{.PROJECT}}
|
--projectName "{{.PROJECT}}"
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- > # apply transformation rules
|
- > # apply transformation rules
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||||
--apply config/doaj-openrefine.json
|
--apply config/doaj-openrefine.json
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- > # export to file
|
- > # export to file
|
||||||
mkdir -p output &&
|
mkdir -p output &&
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||||
--output "$(readlink -m output/doaj-results.tsv)"
|
--output "$(readlink -m output/doaj-results.tsv)"
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- | # print allocated system resources
|
- | # print allocated system resources
|
||||||
|
|
|
@ -17,23 +17,23 @@ tasks:
|
||||||
PROJECT: duplicates
|
PROJECT: duplicates
|
||||||
PORT: 3335 # assign a different port for each project
|
PORT: 3335 # assign a different port for each project
|
||||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||||
LOG: '>(tee -a openrefine.log) 2>&1'
|
LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
|
||||||
cmds:
|
cmds:
|
||||||
- task: :start # launch OpenRefine
|
- task: :start # launch OpenRefine
|
||||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||||
- > # import file
|
- > # import file
|
||||||
"$CLIENT" -P {{.PORT}}
|
"$CLIENT" -P {{.PORT}}
|
||||||
--create "$(readlink -m input/duplicates.csv)"
|
--create "$(readlink -m input/duplicates.csv)"
|
||||||
--encoding UTF-8
|
--encoding UTF-8
|
||||||
--projectName {{.PROJECT}}
|
--projectName "{{.PROJECT}}"
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- > # apply transformation rules
|
- > # apply transformation rules
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||||
--apply config/duplicates-deletion.json
|
--apply config/duplicates-deletion.json
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- > # export to file
|
- > # export to file
|
||||||
mkdir -p output &&
|
mkdir -p output &&
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||||
--output "$(readlink -m output/deduped.xls)"
|
--output "$(readlink -m output/deduped.xls)"
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- | # print allocated system resources
|
- | # print allocated system resources
|
||||||
|
|
|
@ -17,26 +17,26 @@ tasks:
|
||||||
PROJECT: phm
|
PROJECT: phm
|
||||||
PORT: 3336 # assign a different port for each project
|
PORT: 3336 # assign a different port for each project
|
||||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||||
LOG: '>(tee -a openrefine.log) 2>&1'
|
LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
|
||||||
deps: # will be executed each run independent of up-to-date check
|
deps: # will be executed each run independent of up-to-date check
|
||||||
- task: download
|
- task: download
|
||||||
cmds:
|
cmds:
|
||||||
- task: :start # launch OpenRefine
|
- task: :start # launch OpenRefine
|
||||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||||
- > # import file
|
- > # import file
|
||||||
"$CLIENT" -P {{.PORT}}
|
"$CLIENT" -P {{.PORT}}
|
||||||
--create "$(readlink -m input/phm-collection.tsv)"
|
--create "$(readlink -m input/phm-collection.tsv)"
|
||||||
--processQuotes false
|
--processQuotes false
|
||||||
--guessCellValueTypes true
|
--guessCellValueTypes true
|
||||||
--projectName {{.PROJECT}}
|
--projectName "{{.PROJECT}}"
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- > # apply transformation rules
|
- > # apply transformation rules
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||||
--apply config/phm-transform.json
|
--apply config/phm-transform.json
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- > # export to file
|
- > # export to file
|
||||||
mkdir -p output &&
|
mkdir -p output &&
|
||||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||||
--output "$(readlink -m output/phm-results.tsv)"
|
--output "$(readlink -m output/phm-results.tsv)"
|
||||||
> {{.LOG}}
|
> {{.LOG}}
|
||||||
- | # print allocated system resources
|
- | # print allocated system resources
|
||||||
|
|
Loading…
Reference in New Issue