diff --git a/README.md b/README.md index e86fb65..d0c1416 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,6 @@ Please file an [issue](https://github.com/opencultureconsulting/openrefine-task- ## To do - [ ] Codacy badge (needs to be public) -- [ ] add client log messages to openrefine.log (tee -a) - [ ] differentiate examples - [ ] example for loading multiple input files by providing a zip archive - [ ] example for download "fresh" input data as a dependent task and generating archives/diffs diff --git a/Taskfile.yml b/Taskfile.yml index 473383c..fe686ca 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -50,11 +50,11 @@ tasks: if [ ! -f "../openrefine/refine" ]; then echo 1>&2 "OpenRefine missing; try task install"; exit 1 fi - rm -rf ./*.project* workspace.json + rm -rf ./*.project* workspace.json openrefine.log - > # launch OpenRefine with specific data directory and redirect its output to a log file ../openrefine/refine -v warn -p {{.PORT}} -m {{.RAM}} -d ../{{.PROJECT}} - > openrefine.log 2>&1 & + >> openrefine.log 2>&1 & - | # wait until OpenRefine API is available timeout 30s bash -c "until wget -q -O - http://localhost:{{.PORT}} | cat | grep -q -o OpenRefine diff --git a/example-doaj/Taskfile.yml b/example-doaj/Taskfile.yml index c47dae6..a03ede8 100644 --- a/example-doaj/Taskfile.yml +++ b/example-doaj/Taskfile.yml @@ -23,17 +23,22 @@ tasks: ../openrefine/client -P {{.PORT}} --create "$(readlink -m input/doaj-article-sample.csv)" --projectName {{.PROJECT}} + > >(tee -a openrefine.log) 2>&1 - > # apply transformation rules ../openrefine/client -P {{.PORT}} {{.PROJECT}} --apply config/doaj-openrefine.json + > >(tee -a openrefine.log) 2>&1 - > # export to file mkdir -p output && ../openrefine/client -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/doaj-results.tsv)" + > >(tee -a openrefine.log) 2>&1 - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" - echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" - echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" + echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ + > >(tee -a openrefine.log) + echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \ + > >(tee -a openrefine.log) - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: diff --git a/example-duplicates/Taskfile.yml b/example-duplicates/Taskfile.yml index 9016234..ae838c5 100644 --- a/example-duplicates/Taskfile.yml +++ b/example-duplicates/Taskfile.yml @@ -22,17 +22,22 @@ tasks: --create "$(readlink -m input/duplicates.csv)" --encoding UTF-8 --projectName {{.PROJECT}} + > >(tee -a openrefine.log) 2>&1 - > # apply transformation rules ../openrefine/client -P {{.PORT}} {{.PROJECT}} --apply config/duplicates-deletion.json + > >(tee -a openrefine.log) 2>&1 - > # export to file mkdir -p output && ../openrefine/client -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/deduped.xls)" + > >(tee -a openrefine.log) 2>&1 - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" - echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" - echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" + echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ + > >(tee -a openrefine.log) + echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \ + > >(tee -a openrefine.log) - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: diff --git a/example-powerhouse/Taskfile.yml b/example-powerhouse/Taskfile.yml index ffcb044..ba528a7 100644 --- a/example-powerhouse/Taskfile.yml +++ b/example-powerhouse/Taskfile.yml @@ -25,17 +25,22 @@ tasks: --processQuotes false --guessCellValueTypes true --projectName {{.PROJECT}} + > >(tee -a openrefine.log) 2>&1 - > # apply transformation rules ../openrefine/client -P {{.PORT}} {{.PROJECT}} --apply config/phm-transform.json + > >(tee -a openrefine.log) 2>&1 - > # export to file mkdir -p output && ../openrefine/client -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/phm-results.tsv)" + > >(tee -a openrefine.log) 2>&1 - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" - echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" - echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" + echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" \ + > >(tee -a openrefine.log) + echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" \ + > >(tee -a openrefine.log) - task: :stop # shut down OpenRefine and archive the OpenRefine project vars: {PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: