🚚 add flexibility by using project var for name of log file
This commit is contained in:
parent
b77bdb6eb1
commit
71a9698fad
|
@ -56,11 +56,11 @@ tasks:
|
|||
if [ ! -f "$OPENREFINE" ]; then
|
||||
echo 1>&2 "OpenRefine missing; try task install"; exit 1
|
||||
fi
|
||||
rm -rf ./*.project* workspace.json openrefine.log
|
||||
rm -rf ./*.project* workspace.json "{{.PROJECT}}.log"
|
||||
- > # launch OpenRefine with specific data directory and redirect its output to a log file
|
||||
"$OPENREFINE" -v warn -p {{.PORT}} -m {{.RAM}}
|
||||
-d ../{{.DIR}}
|
||||
>> openrefine.log 2>&1 &
|
||||
>> "{{.PROJECT}}.log" 2>&1 &
|
||||
- | # wait until OpenRefine API is available
|
||||
timeout 30s bash -c "until
|
||||
wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine
|
||||
|
@ -86,6 +86,6 @@ tasks:
|
|||
dir: ./{{.DIR}}
|
||||
cmds:
|
||||
- | # find log file(s) and check for "exception" or "error"
|
||||
if grep -i 'exception\|error' $(find . -name openrefine.log); then
|
||||
if grep -i 'exception\|error' $(find . -name *.log); then
|
||||
echo 1>&2 "log contains warnings!"; exit 1
|
||||
fi
|
||||
|
|
|
@ -17,24 +17,24 @@ tasks:
|
|||
PROJECT: doaj
|
||||
PORT: 3334 # assign a different port for each project
|
||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a openrefine.log) 2>&1'
|
||||
LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
|
||||
deps: # will be executed each run independent of up-to-date check
|
||||
- task: download
|
||||
cmds:
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # import file
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/doaj-article-sample.csv)"
|
||||
--projectName {{.PROJECT}}
|
||||
--projectName "{{.PROJECT}}"
|
||||
> {{.LOG}}
|
||||
- > # apply transformation rules
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||
--apply config/doaj-openrefine.json
|
||||
> {{.LOG}}
|
||||
- > # export to file
|
||||
mkdir -p output &&
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||
--output "$(readlink -m output/doaj-results.tsv)"
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
|
|
|
@ -17,23 +17,23 @@ tasks:
|
|||
PROJECT: duplicates
|
||||
PORT: 3335 # assign a different port for each project
|
||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a openrefine.log) 2>&1'
|
||||
LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
|
||||
cmds:
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # import file
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/duplicates.csv)"
|
||||
--encoding UTF-8
|
||||
--projectName {{.PROJECT}}
|
||||
--projectName "{{.PROJECT}}"
|
||||
> {{.LOG}}
|
||||
- > # apply transformation rules
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||
--apply config/duplicates-deletion.json
|
||||
> {{.LOG}}
|
||||
- > # export to file
|
||||
mkdir -p output &&
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||
--output "$(readlink -m output/deduped.xls)"
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
|
|
|
@ -17,26 +17,26 @@ tasks:
|
|||
PROJECT: phm
|
||||
PORT: 3336 # assign a different port for each project
|
||||
RAM: 2048M # maximum RAM for OpenRefine java heap space
|
||||
LOG: '>(tee -a openrefine.log) 2>&1'
|
||||
LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1'
|
||||
deps: # will be executed each run independent of up-to-date check
|
||||
- task: download
|
||||
cmds:
|
||||
- task: :start # launch OpenRefine
|
||||
vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'}
|
||||
- > # import file
|
||||
"$CLIENT" -P {{.PORT}}
|
||||
--create "$(readlink -m input/phm-collection.tsv)"
|
||||
--processQuotes false
|
||||
--guessCellValueTypes true
|
||||
--projectName {{.PROJECT}}
|
||||
--projectName "{{.PROJECT}}"
|
||||
> {{.LOG}}
|
||||
- > # apply transformation rules
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||
--apply config/phm-transform.json
|
||||
> {{.LOG}}
|
||||
- > # export to file
|
||||
mkdir -p output &&
|
||||
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
|
||||
"$CLIENT" -P {{.PORT}} "{{.PROJECT}}"
|
||||
--output "$(readlink -m output/phm-results.tsv)"
|
||||
> {{.LOG}}
|
||||
- | # print allocated system resources
|
||||
|
|
Loading…
Reference in New Issue