From 04fa7883cb4d85e84f765077da679a5848ace86e Mon Sep 17 00:00:00 2001 From: felixlohmeier Date: Tue, 1 Nov 2022 20:48:26 +0000 Subject: [PATCH] shortened tutorial and added simple stats --- .gitpod.yml | 2 +- README.md | 25 +++++++------------------ orcli | 4 +++- src/run_command.sh | 2 ++ 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/.gitpod.yml b/.gitpod.yml index c80b529..3e6dbd2 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,7 @@ tasks: before: gem install --silent bashly init: | wget -q -O openrefine.tar.gz "https://oss.sonatype.org/service/local/artifact/maven/content?r=releases&g=org.openrefine&a=openrefine&v=3.6.2&c=linux&p=tar.gz" - tar --exclude 'licenses' --exclude 'LICENSE.txt' --exclude 'README.md' -xzf openrefine.tar.gz --strip 1 + tar --exclude 'licenses' --exclude 'LICENSE.txt' --exclude 'licenses.xml' --exclude 'README.md' -xzf openrefine.tar.gz --strip 1 rm openrefine.tar.gz command: | sudo ln -s "${PWD}/orcli" /usr/local/bin/ diff --git a/README.md b/README.md index 62a744f..a4a05de 100644 --- a/README.md +++ b/README.md @@ -73,46 +73,33 @@ Optional: orcli import csv "https://git.io/fj5hF" --projectName "duplicates" ``` -3. Show OpenRefine project's metadata +3. Remove duplicates (coming soon) - ```sh - orcli info "duplicates" - ``` - -4. Remove duplicates (coming soon) - -5. Export data from OpenRefine project to tab-separated-values (TSV) file `duplicates.tsv` +4. Export data from OpenRefine project to tab-separated-values (TSV) file `duplicates.tsv` ```sh orcli export tsv "duplicates" --output "duplicates.tsv" ``` -6. Write out your session history to file `example.sh` (and delete the last line to remove the history command) +5. Write out your session history to file `example.sh` (and delete the last line to remove the history command) ```sh history -a "example.sh" sed -i '$ d' example.sh ``` -7. Exit playground +6. Exit playground ```sh exit ``` -8. Run batch process +7. Run whole process again ```sh ./orcli run example.sh ``` -9. Cleanup example files - - ```sh - rm duplicates.tsv - rm example.sh - ``` - ## Usage * Use integrated help screens for available options and examples for each command. @@ -127,6 +114,8 @@ Optional: OPENREFINE_URL="http://localhost:3333" orcli list ``` +* If OpenRefine does not have enough memory to process the data, it becomes slow and may even crash. Check the message after the run command finishes to see how much memory was used and adjust the memory allocated to OpenRefine accordingly with the `--memory` flag (default: 2048M). + ## Development orcli uses [bashly](https://github.com/DannyBen/bashly/) for generating the one-file script from files in the `src` directory diff --git a/orcli b/orcli index ee80fcd..136fc88 100755 --- a/orcli +++ b/orcli @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# This script was generated by bashly 0.8.9 (https://bashly.dannyb.co) +# This script was generated by bashly 0.8.10 (https://bashly.dannyb.co) # Modifying it manually is not recommended # :wrapper.bash3_bouncer @@ -930,6 +930,8 @@ orcli_run_command() { awk 1 "${files[$i]}" ) done + # print stats + log "used $(($(ps --no-headers -o rss -p "$OPENREFINE_PID") / 1024)) MB RAM and $(ps --no-headers -o cputime -p "$OPENREFINE_PID") CPU time" fi } diff --git a/src/run_command.sh b/src/run_command.sh index a7908f7..41c1dd0 100644 --- a/src/run_command.sh +++ b/src/run_command.sh @@ -88,4 +88,6 @@ else awk 1 "${files[$i]}" ) done + # print stats + log "used $(($(ps --no-headers -o rss -p "$OPENREFINE_PID") / 1024)) MB RAM and $(ps --no-headers -o cputime -p "$OPENREFINE_PID") CPU time" fi