From ca19d7ef163a2f3f4afce27814c8f41be037981a Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Thu, 17 Jun 2021 12:57:58 +0200 Subject: [PATCH] add jupyter notebook --- README.md | 10 +++++++++- demo.ipynb | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 demo.ipynb diff --git a/README.md b/README.md index fa802ca..68f99ec 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## OpenRefine batch processing (openrefine-batch.sh) -[![Codacy Badge](https://app.codacy.com/project/badge/Grade/ad8a97e42e634bbe87203ea48efb436e)](https://www.codacy.com/gh/opencultureconsulting/openrefine-batch/dashboard) +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/ad8a97e42e634bbe87203ea48efb436e)](https://www.codacy.com/gh/opencultureconsulting/openrefine-batch/dashboard) [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/opencultureconsulting/openrefine-batch/master?urlpath=lab/tree/demo.ipynb) Shell script to run OpenRefine in batch mode (import, transform, export). This bash script automatically... @@ -17,6 +17,14 @@ If you prefer a containerized approach, see a [variation of this script for Dock - **Step 1**: Do some experiments with your data (or parts of it) in the graphical user interface of OpenRefine. If you are fine with all transformation rules, [extract the json code](http://kb.refinepro.com/2012/06/google-refine-json-and-my-notepad-or.html) and save it as file (e.g. transform.json). - **Step 2**: Put your data and the json file(s) in two different directories and execute the script. The script will automatically import all data files in OpenRefine projects, apply the transformation rules in the json files to each project and export all projects to files in the format specified (default: TSV - tab-separated values). +### Demo via binder + +[![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/opencultureconsulting/openrefine-batch/master?urlpath=lab/tree/demo.ipynb) + +- free to use on-demand server with Jupyterlab and Bash Kernel +- no registration needed, will start within a few minutes +- [restricted](https://mybinder.readthedocs.io/en/latest/about/about.html#how-much-memory-am-i-given-when-using-binder) to 2 GB RAM and server will be deleted after 10 minutes of inactivity + ### Install Download the script and grant file permissions to execute: diff --git a/demo.ipynb b/demo.ipynb new file mode 100644 index 0000000..bd0020e --- /dev/null +++ b/demo.ipynb @@ -0,0 +1 @@ +{"metadata":{"language_info":{"name":"bash","codemirror_mode":"shell","mimetype":"text/x-sh","file_extension":".sh"},"kernelspec":{"name":"bash","display_name":"Bash","language":"bash"}},"nbformat_minor":5,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Example Powerhouse Museum\n\nOutput will be stored in examples/powerhouse-museum/output/phm-collection.tsv","metadata":{}},{"cell_type":"code","source":"./openrefine-batch.sh \\\n-a examples/powerhouse-museum/input/ \\\n-b examples/powerhouse-museum/config/ \\\n-c examples/powerhouse-museum/output/ \\\n-f tsv \\\n-i processQuotes=false \\\n-i guessCellValueTypes=true \\\n-RX","metadata":{"trusted":true},"execution_count":null,"outputs":[]}]} \ No newline at end of file