From b3403976c4e8975cd19165b4b2f3a499e9956bd6 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Tue, 25 Oct 2022 09:34:54 +0000 Subject: [PATCH 1/3] upgrade to OpenRefine 3.6.2 --- .gitpod.yml | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitpod.yml b/.gitpod.yml index 6b55f45..c80b529 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -3,7 +3,7 @@ tasks: - name: install bashly and OpenRefine before: gem install --silent bashly init: | - wget -q -O openrefine.tar.gz https://github.com/OpenRefine/OpenRefine/releases/download/3.5.2/openrefine-linux-3.5.2.tar.gz + wget -q -O openrefine.tar.gz "https://oss.sonatype.org/service/local/artifact/maven/content?r=releases&g=org.openrefine&a=openrefine&v=3.6.2&c=linux&p=tar.gz" tar --exclude 'licenses' --exclude 'LICENSE.txt' --exclude 'README.md' -xzf openrefine.tar.gz --strip 1 rm openrefine.tar.gz command: | diff --git a/README.md b/README.md index aaf75bc..a28e526 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org ## Features -* works with latest OpenRefine version (currently 3.5) +* works with latest OpenRefine version (currently 3.6) * batch processing (import, transform, export) * orcli takes care of starting and stopping OpenRefine with temporary workspaces * your existing OpenRefine data will not be touched From 03bcc05a897827e027f44761816f9761ff660a24 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Tue, 25 Oct 2022 10:41:13 +0000 Subject: [PATCH 2/3] getting started tutorial --- README.md | 105 +++++++++++++++++++++++++++++++++++++++++++------ orcli | 18 ++++----- src/bashly.yml | 6 +-- 3 files changed, 104 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index a28e526..860d364 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,10 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org ## Features * works with latest OpenRefine version (currently 3.6) -* batch processing (import, transform, export) +* run batch processes (import, transform, export) * orcli takes care of starting and stopping OpenRefine with temporary workspaces + * allows execution of arbitrary bash scripts + * interactive mode for playing around and debugging * your existing OpenRefine data will not be touched * import CSV, ~~TSV, line-based TXT, fixed-width TXT, JSON or XML~~ (and specify input options) * supports stdin, multiple files and URLs @@ -29,24 +31,101 @@ Bash script to control OpenRefine via [its HTTP API](https://docs.openrefine.org 2. Download bash script there and make it executable -```sh -wget https://github.com/opencultureconsulting/orcli/raw/main/orcli -chmod +x orcli -``` + ```sh + wget https://github.com/opencultureconsulting/orcli/raw/main/orcli + chmod +x orcli + ``` -3. Optional: Create a symlink in your $PATH (e.g. to ~/.local/bin) +Optional: -```sh -ln -s "${PWD}/orcli" ~/.local/bin/ -``` +* Create a symlink in your $PATH (e.g. to ~/.local/bin) + + ```sh + ln -s "${PWD}/orcli" ~/.local/bin/ + ``` + +* Install Bash tab completion + + * temporary + + ```sh + source <(orcli completions) + ``` + + * permanently + + ```sh + mkdir -p ~/.bashrc.d + orcli completions > ~/.bashrc.d/orcli + ``` + +## Getting Started + +1. Launch an interactive playground + + ```sh + ./orcli run --interactive + ``` + +2. Create OpenRefine project `duplicates` from comma-separated-values (CSV) file + + ```sh + orcli import csv "https://git.io/fj5hF" --projectName "duplicates" + ``` + +3. Show OpenRefine project's metadata + + ```sh + orcli info "duplicates" + ``` + +4. ~~~Remove duplicates~~~ + +5. Export data from OpenRefine project to tab-separated-values (TSV) file `duplicates.tsv` + + ```sh + orcli export tsv "duplicates" --output "duplicates.tsv" + ``` + +6. Write out your session history to file `example.sh` (and delete the last line to remove the history command) + + ```sh + history -a "example.sh" + sed -i '$ d' example.sh + ``` + +7. Exit playground + + ```sh + exit + ``` + +8. Run batch process + + ```sh + ./orcli run example.sh + ``` + +9. Cleanup example files + + ```sh + rm duplicates.tsv + rm`example.sh + ``` ## Usage -Use integrated help screens for available options and examples for each command. +* Use integrated help screens for available options and examples for each command. -```sh -orcli --help -``` + ```sh + orcli --help + ``` + +* If your OpenRefine is running on a server, then use the environment variable OPENREFINE_URL. + + ```sh + OPENREFINE_URL="http://localhost:3333" orcli list + ``` ## Development diff --git a/orcli b/orcli index d84968c..ee80fcd 100755 --- a/orcli +++ b/orcli @@ -35,10 +35,10 @@ orcli_usage() { # :command.usage_commands printf "Commands:\n" echo " completions Generate bash completions" - echo " import import commands" + echo " import commands to create OpenRefine projects from files or URLs" echo " list list projects on OpenRefine server" - echo " info show project metadata" - echo " export export commands" + echo " info show OpenRefine project's metadata" + echo " export commands to export data from OpenRefine projects to files" echo " run run tmp OpenRefine workspace and execute shell script(s)" echo @@ -122,11 +122,11 @@ orcli_completions_usage() { # :command.usage orcli_import_usage() { if [[ -n $long_usage ]]; then - printf "orcli import - import commands\n" + printf "orcli import - commands to create OpenRefine projects from files or URLs\n" echo else - printf "orcli import - import commands\n" + printf "orcli import - commands to create OpenRefine projects from files or URLs\n" echo fi @@ -253,11 +253,11 @@ orcli_list_usage() { # :command.usage orcli_info_usage() { if [[ -n $long_usage ]]; then - printf "orcli info - show project metadata\n" + printf "orcli info - show OpenRefine project's metadata\n" echo else - printf "orcli info - show project metadata\n" + printf "orcli info - show OpenRefine project's metadata\n" echo fi @@ -296,11 +296,11 @@ orcli_info_usage() { # :command.usage orcli_export_usage() { if [[ -n $long_usage ]]; then - printf "orcli export - export commands\n" + printf "orcli export - commands to export data from OpenRefine projects to files\n" echo else - printf "orcli export - export commands\n" + printf "orcli export - commands to export data from OpenRefine projects to files\n" echo fi diff --git a/src/bashly.yml b/src/bashly.yml index 2b670f3..320b860 100644 --- a/src/bashly.yml +++ b/src/bashly.yml @@ -38,7 +38,7 @@ commands: Usage: eval "\$(orcli completions)" - name: import - help: import commands + help: commands to create OpenRefine projects from files or URLs commands: - name: csv @@ -77,7 +77,7 @@ commands: help: list projects on OpenRefine server - name: info - help: show project metadata + help: show OpenRefine project's metadata args: - name: project help: project name or id @@ -87,7 +87,7 @@ commands: - info 1234567890123 - name: export - help: export commands + help: commands to export data from OpenRefine projects to files commands: - name: tsv From c79c4d7df6e112fd620f6909b68f7a1d5e782439 Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Tue, 25 Oct 2022 10:44:20 +0000 Subject: [PATCH 3/3] fix layout --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 860d364..62a744f 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ Optional: orcli info "duplicates" ``` -4. ~~~Remove duplicates~~~ +4. Remove duplicates (coming soon) 5. Export data from OpenRefine project to tab-separated-values (TSV) file `duplicates.tsv` @@ -110,7 +110,7 @@ Optional: ```sh rm duplicates.tsv - rm`example.sh + rm example.sh ``` ## Usage @@ -133,14 +133,14 @@ orcli uses [bashly](https://github.com/DannyBen/bashly/) for generating the one- 1. Install bashly (requires ruby) -```sh -gem install bashly -``` + ```sh + gem install bashly + ``` 2. Edit code in [src](src) directory 3. Generate script -```sh -bashly generate --upgrade -``` + ```sh + bashly generate --upgrade + ```