From 1a8cfee580b22e5b633ebddf0359de0e5faca59a Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Wed, 24 Feb 2021 22:40:49 +0100 Subject: [PATCH] :goal_net: exemplify log dir in examples --- .gitignore | 3 +-- README.md | 2 ++ example-doaj/Taskfile.yml | 8 ++++---- example-duplicates/Taskfile.yml | 8 ++++---- example-powerhouse/Taskfile.yml | 8 ++++---- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index d3e2c04..f5dd305 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,7 @@ .task openrefine */output -*/openrefine.log -*/*.openrefine.tar.gz +*/log example-doaj/input example-doaj/config example-powerhouse/input diff --git a/README.md b/README.md index a427fd4..558bdd5 100644 --- a/README.md +++ b/README.md @@ -128,12 +128,14 @@ Please file an [issue](https://github.com/opencultureconsulting/openrefine-task- ## To do +- [ ] add kill task to save time by skipping storing an OpenRefine project archive - [ ] differentiate examples - [ ] example for loading multiple input files by providing a zip archive - [ ] example for download "fresh" input data as a dependent task and generating archives/diffs - [ ] example for applying multiple json files - [ ] example for templating xml and validation with xmllint - [ ] example for multiple projects in one directory/taskfile + - [ ] example for using kill task to save time and disk space - [ ] describe example datasets (and differences) with source code examples - [ ] elaborate how-to for developing tasks - [ ] document openrefine-client options and defaults (tables for input and output with file-format-specific defaults) including templating diff --git a/example-doaj/Taskfile.yml b/example-doaj/Taskfile.yml index 9bc3f1d..de8c0f8 100644 --- a/example-doaj/Taskfile.yml +++ b/example-doaj/Taskfile.yml @@ -17,12 +17,12 @@ tasks: PROJECT: doaj PORT: 3334 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' + LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: - task: :start # launch OpenRefine - vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} + vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/doaj-article-sample.csv)" @@ -42,13 +42,13 @@ tasks: echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project - vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} + vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: - Taskfile.yml - input/** - config/** generates: - - ./{{.PROJECT}}.openrefine.tar.gz + - log/{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error # https://github.com/go-task/task/issues/141 diff --git a/example-duplicates/Taskfile.yml b/example-duplicates/Taskfile.yml index ee8c742..42fbb1d 100644 --- a/example-duplicates/Taskfile.yml +++ b/example-duplicates/Taskfile.yml @@ -17,10 +17,10 @@ tasks: PROJECT: duplicates PORT: 3335 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' + LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1' cmds: - task: :start # launch OpenRefine - vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} + vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/duplicates.csv)" @@ -41,13 +41,13 @@ tasks: echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project - vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} + vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}'} sources: - Taskfile.yml - input/** - config/** generates: - - ./{{.PROJECT}}.openrefine.tar.gz + - log/{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error # https://github.com/go-task/task/issues/141 diff --git a/example-powerhouse/Taskfile.yml b/example-powerhouse/Taskfile.yml index e538a4b..2ee9391 100644 --- a/example-powerhouse/Taskfile.yml +++ b/example-powerhouse/Taskfile.yml @@ -17,12 +17,12 @@ tasks: PROJECT: phm PORT: 3336 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' + LOG: '>(tee -a "log/{{.PROJECT}}.log") 2>&1' deps: # will be executed each run independent of up-to-date check - task: download cmds: - task: :start # launch OpenRefine - vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} + vars: {DIR: '{{.DIR}}/log', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} - > # import file "$CLIENT" -P {{.PORT}} --create "$(readlink -m input/phm-collection.tsv)" @@ -44,13 +44,13 @@ tasks: echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} echo "used $(ps --no-headers -o cputime -p "$PID") CPU time" > {{.LOG}} - task: :stop # shut down OpenRefine and archive the OpenRefine project - vars: {DIR: '{{.DIR}}', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'} + vars: {DIR: '{{.DIR}}/log', PORT: '{{.PORT}}', PROJECT: '{{.PROJECT}}'} sources: - Taskfile.yml - input/** - config/** generates: - - ./{{.PROJECT}}.openrefine.tar.gz + - log/{{.PROJECT}}.openrefine.tar.gz - output/** ignore_error: true # workaround to avoid an orphaned Java process on error # https://github.com/go-task/task/issues/141