From a6d455580ae7d6a50601afb8372ba30b5602eb8f Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Thu, 25 Feb 2021 12:33:27 +0100 Subject: [PATCH] :bulb: add some more source code comments --- .gitignore | 2 +- README.md | 3 +++ example-doaj/Taskfile.yml | 11 +++++------ example-duplicates/Taskfile.yml | 7 +++---- example-powerhouse/Taskfile.yml | 12 ++++++------ 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index d3e2c04..9f4b3b3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ .task openrefine */output -*/openrefine.log +*/*.log */*.openrefine.tar.gz example-doaj/input example-doaj/config diff --git a/README.md b/README.md index a427fd4..5772a37 100644 --- a/README.md +++ b/README.md @@ -128,12 +128,15 @@ Please file an [issue](https://github.com/opencultureconsulting/openrefine-task- ## To do +- [ ] hide OpenRefine install by renaming directory to .openrefine +- [ ] add kill task to save time by skipping storing an OpenRefine project archive - [ ] differentiate examples - [ ] example for loading multiple input files by providing a zip archive - [ ] example for download "fresh" input data as a dependent task and generating archives/diffs - [ ] example for applying multiple json files - [ ] example for templating xml and validation with xmllint - [ ] example for multiple projects in one directory/taskfile + - [ ] example for using kill task to save time and disk space - [ ] describe example datasets (and differences) with source code examples - [ ] elaborate how-to for developing tasks - [ ] document openrefine-client options and defaults (tables for input and output with file-format-specific defaults) including templating diff --git a/example-doaj/Taskfile.yml b/example-doaj/Taskfile.yml index 9bc3f1d..fa69470 100644 --- a/example-doaj/Taskfile.yml +++ b/example-doaj/Taskfile.yml @@ -4,7 +4,7 @@ tasks: main: desc: Library Carpentry Lesson covering DOAJ vars: - DIR: '{{splitList ":" .TASK | first}}' + DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name cmds: - task: refine - task: :check # check OpenRefine log for any warnings and exit on error @@ -17,9 +17,9 @@ tasks: PROJECT: doaj PORT: 3334 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' - deps: # will be executed each run independent of up-to-date check - - task: download + LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' # be careful when making changes here, as the path to the log file should match the server log (see main task "start") + deps: + - task: download # will be executed each run independent of up-to-date check cmds: - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} @@ -50,8 +50,7 @@ tasks: generates: - ./{{.PROJECT}}.openrefine.tar.gz - output/** - ignore_error: true # workaround to avoid an orphaned Java process on error - # https://github.com/go-task/task/issues/141 + ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 download: dir: ./{{.DIR}} diff --git a/example-duplicates/Taskfile.yml b/example-duplicates/Taskfile.yml index ee8c742..48f9813 100644 --- a/example-duplicates/Taskfile.yml +++ b/example-duplicates/Taskfile.yml @@ -4,7 +4,7 @@ tasks: main: desc: Removing duplicates in a very small test dataset vars: - DIR: '{{splitList ":" .TASK | first}}' + DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name cmds: - task: refine - task: :check # check OpenRefine log for any warnings and exit on error @@ -17,7 +17,7 @@ tasks: PROJECT: duplicates PORT: 3335 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' + LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' # be careful when making changes here, as the path to the log file should match the server log (see main task "start") cmds: - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} @@ -49,8 +49,7 @@ tasks: generates: - ./{{.PROJECT}}.openrefine.tar.gz - output/** - ignore_error: true # workaround to avoid an orphaned Java process on error - # https://github.com/go-task/task/issues/141 + ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 default: # enable standalone execution (running `task` in project directory) cmds: diff --git a/example-powerhouse/Taskfile.yml b/example-powerhouse/Taskfile.yml index e538a4b..524dc1b 100644 --- a/example-powerhouse/Taskfile.yml +++ b/example-powerhouse/Taskfile.yml @@ -4,7 +4,7 @@ tasks: main: desc: Powerhouse Museum Tutorial vars: - DIR: '{{splitList ":" .TASK | first}}' + DIR: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name cmds: - task: refine - task: :check # check OpenRefine log for any warnings and exit on error @@ -17,9 +17,9 @@ tasks: PROJECT: phm PORT: 3336 # assign a different port for each project RAM: 2048M # maximum RAM for OpenRefine java heap space - LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' - deps: # will be executed each run independent of up-to-date check - - task: download + LOG: '>(tee -a "{{.PROJECT}}.log") 2>&1' # be careful when making changes here, as the path to the log file should match the server log (see main task "start") + deps: + - task: download # will be executed each run independent of up-to-date check cmds: - task: :start # launch OpenRefine vars: {DIR: '{{.DIR}}', PROJECT: '{{.PROJECT}}', PORT: '{{.PORT}}', RAM: '{{.RAM}}'} @@ -52,8 +52,8 @@ tasks: generates: - ./{{.PROJECT}}.openrefine.tar.gz - output/** - ignore_error: true # workaround to avoid an orphaned Java process on error - # https://github.com/go-task/task/issues/141 + ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 + download: dir: ./{{.DIR}} vars: