diff --git a/Makefile b/Makefile deleted file mode 100644 index 5ee5c1a..0000000 --- a/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# XXX have a Makefile written by someone that knows Makefiles... - -all: test build install - -readme: - # requires docutils, e.g. pip install docutils - rst2html.py README.rst > README.html - w3m -dump README.html | unix2dos > README.txt - -test: - python setup.py test - -# tests that don't require a Refine server running -smalltest: - python setup.py test --test-suite tests.test_refine_small - python setup.py test --test-suite tests.test_facet - python setup.py test --test-suite tests.test_history - -build: - python setup.py build - -install: - sudo python setup.py install - -clean: - find . -name '*.pyc' | xargs rm -f - # XXX is there some way of having setup.py clean up its junk? - rm -rf README.{html,txt} build dist refine_client.egg-info distribute-* - -upload: clean - python setup.py sdist upload - diff --git a/README.md b/README.md index 090ea66..913e21e 100644 --- a/README.md +++ b/README.md @@ -334,10 +334,10 @@ See also: ## Python -[openrefine-client](https://pypi.org/project/openrefine-client/) [![PyPI](https://img.shields.io/pypi/v/openrefine-client)](https://pypi.org/project/openrefine-client/) (requires Python 2.x) +[openrefine-client](https://pypi.org/project/openrefine-client/) [![PyPI](https://img.shields.io/pypi/v/openrefine-client)](https://pypi.org/project/openrefine-client/) (requires Python 3.x) ```sh -python2 -m pip install openrefine-client --user +python3 -m pip install openrefine-client --user ``` This will install the package `openrefine-client` containing modules in `google.refine`. @@ -352,7 +352,7 @@ openrefine-client --help Usage: same commands as explained above (see [Basic Commands](#basic-commands) and [Advanced Templating](#advanced-templating)) -### Option 2: using cli functions in Python 2.x environment +### Option 2: using cli functions in Python 3.x environment Import module cli: @@ -436,158 +436,9 @@ Commands: cli.delete(p1.project_id) ``` -### Option 3: the upstream way - -This fork can be used in the same way as the upstream [Python client library](https://github.com/PaulMakepeace/refine-client-py/). - -Some functions in the python client library are not yet compatible with OpenRefine >=3.0 (cf. [issue #19 in refine-client-py](https://github.com/paulmakepeace/refine-client-py/issues/19)). - -Import module refine: - -```python -from google.refine import refine -``` - -Server Commands: - -* set up connection: - - ```python - server1 = refine.Refine('http://localhost:3333') - ``` - -- show version: - - ```python - server1.server.get_version() - server1.server.version - ``` - -- list projects: - - ```python - server1.list_projects() - ``` - - - pretty print the returned dict with json.dumps: - - ```python - import json - print(json.dumps(server1.list_projects(), indent=1)) - ``` - -- create project: - - ```python - server1.new_project(project_file='duplicates.csv') - ``` - - * create and open the returned project in one step: - - ```python - project1 = server1.new_project(project_file='duplicates.csv') - ``` - -Project commands: - -* open project: - - ```python - project1 = server1.open_project('1234567890123') - ``` - -* print full URL to project: - - ```python - project1.project_url() - ``` - -* list columns: - - ```python - project1.columns - ``` - -* compute text facet on first column (**fails with OpenRefine >=3.2**): - - ```python - project1.compute_facets(facet.TextFacet(project1.columns[0])) - ``` - - * print returned object - - ```python - facets = project1.compute_facets(facet.TextFacet(project1.columns[0])).facets[0] - for k in sorted(facets.choices, key=lambda k: facets.choices[k].count, reverse=True): - print(facets.choices[k].count, k) - ``` - -* compute clusters on first column: - - ```python - project1.compute_clusters(project1.columns[0]) - ``` - -* apply rules from file to project: - - ```python - project1.apply_operations('duplicates-deletion.json') - ``` - -* export project: - - ```python - project1.export(export_format='tsv') - ``` - - * print the returned fileobject: - - ```python - print(project1.export(export_format='tsv').read()) - ``` - - * save the returned fileobject to file: - - ```python - with open('export.tsv', 'wb') as f: - f.write(project1.export(export_format='tsv').read()) - ``` - -* templating export (**function was added in this fork**, see [Advanced Templating](#advanced-templating) above): - - ```python - data = project1.export_templating( - prefix='''{ "events" : [ - ''',template=''' { "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }''', - rowSeparator=''', - ''',suffix=''' - ] }''') - print(data.read()) - ``` - -* print help screen with available commands (many more!): - - ```python - help(project1) - ``` - -* example for custom commands: - - ```python - project1.do_json('get-rows')['total'] - ``` - -* delete project: - - ```python - project1.delete() - ``` - See also: -- Jupyter notebook by Trevor Muñoz (2013-08-18): [Programmatic Use of Open Refine to Facet and Cluster Names of 'Dishes' from NYPL's What's on the menu?](https://nbviewer.jupyter.org/gist/trevormunoz/6265360) - Jupyter notebook by Tony Hirst (2019-01-09) [Notebook demonstrating how to control OpenRefine via a Python client.](https://nbviewer.jupyter.org/github/ouseful-PR/openrefineder/blob/4cef25a4ca6077536c5f49cafb531499fbcad96e/notebooks/OpenRefine%20Demos.ipynb) -- Unittests [test_refine.py](tests/test_refine.py) and [test_tutorial.py](tests/test_tutorial.py) (both importing [refinetest.py](tests/refinetest.py)) - [OpenRefine API](https://github.com/OpenRefine/OpenRefine/wiki/OpenRefine-API) in official OpenRefine wiki ## Binder @@ -598,29 +449,12 @@ See also: - no registration needed, will start within a few minutes - [restricted](https://mybinder.readthedocs.io/en/latest/faq.html#how-much-memory-am-i-given-when-using-binder) to 2 GB RAM and server will be deleted after 10 minutes of inactivity - [bash_kernel demo notebook](https://nbviewer.jupyter.org/github/felixlohmeier/openrefineder/blob/master/openrefine-client-bash.ipynb) for using the openrefine-client in a Linux Bash environment [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/felixlohmeier/openrefineder/master?urlpath=/tree/openrefine-client-bash.ipynb) -- [python2 demo notebook](https://nbviewer.jupyter.org/github/felixlohmeier/openrefineder/blob/master/openrefine-client-python.ipynb) for using the openrefine-client in a Python 2 environment [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/felixlohmeier/openrefineder/master?urlpath=/tree/openrefine-client-python.ipynb) ## Development -If you would like to contribute to the Python client library please consider a pull request to the upstream repository [refine-client-py](https://github.com/PaulMakepeace/refine-client-py/). - ### Tests -Ensure you have OpenRefine running (i.e. available at http://localhost:3333). If necessary set the environment variables `OPENREFINE_HOST` and `OPENREFINE_PORT` to change the URL. - -The Python client library includes several unit tests. - -- run all tests - - ```sh - python setup.py test - ``` - -- run subset test_facet - - ```sh - python setup.py --test-suite tests.test_facet - ``` +TODO There is also a script that uses docker images to run the unit tests with different versions of OpenRefine. @@ -656,9 +490,8 @@ Note to myself: When releasing a new version... ```sh ./tests.sh -a - jupyter notebook tests/cli_python2.ipynb ``` - + 2. Make final changes in Git - update versions (e.g. 0.3.7 und 0-3-7) in [README.md](https://github.com/opencultureconsulting/openrefine-client/blob/master/README.md#download) @@ -667,7 +500,7 @@ Note to myself: When releasing a new version... 3. Build executables with PyInstaller - - Run PyInstaller in Python 2 environments on native Windows, macOS and Linux. Should be "the oldest version of the OS you need to support"! Current release is built with: + - Run PyInstaller in Python 3 environments on native Windows, macOS and Linux. Should be "the oldest version of the OS you need to support"! Current release is built with: - Ubuntu 16.04 LTS (64-bit) - macOS Sierra 10.12 @@ -678,8 +511,8 @@ Note to myself: When releasing a new version... ```sh git clone https://github.com/opencultureconsulting/openrefine-client.git cd openrefine-client - python -m pip install . --user - python -m pip install pyinstaller --user + python3 -m pip install . --user + python3 -m pip install pyinstaller --user pyinstaller --onefile refine.py --hidden-import google.refine.__main__ ``` @@ -687,9 +520,8 @@ Note to myself: When releasing a new version... ```sh ./tests.sh -a - jupyter notebook tests/cli_bash.ipynb ``` - + 5. Create release in GitHub - draft [release notes](https://github.com/opencultureconsulting/openrefine-client/releases) and attach one-file-executables @@ -697,6 +529,7 @@ Note to myself: When releasing a new version... 6. Build package and upload to PyPI ```sh + TODO python3 setup.py sdist bdist_wheel python3 -m twine upload dist/* ``` @@ -710,7 +543,7 @@ Note to myself: When releasing a new version... - openrefine-batch: [openrefine-batch.sh](https://github.com/opencultureconsulting/openrefine-batch/blob/master/openrefine-batch.sh#L7) and [openrefine-batch-docker.sh](https://github.com/opencultureconsulting/openrefine-batch/blob/master/openrefine-batch-docker.sh) - - openrefineder: [postBuild](https://github.com/felixlohmeier/openrefineder/blob/master/postBuild) + - openrefineder: [postBuild](https://github.com/felixlohmeier/openrefineder/blob/master/postBuild) and [openrefine-client-bash.ipynb](https://github.com/felixlohmeier/openrefineder/blob/master/openrefine-client-python.ipynb) ## Credits @@ -718,14 +551,6 @@ Note to myself: When releasing a new version... David Huynh, [initial cut]( - - -class HistoryEntry(object): - # N.B. e.g. **response['historyEntry'] won't work as keys are unicode :-/ - #noinspection PyUnusedLocal - def __init__(self, history_entry_id=None, time=None, description=None, **kwargs): - if history_entry_id is None: - raise ValueError('History entry id must be set') - self.id = history_entry_id - self.description = description - self.time = time diff --git a/google/refine/refine.py b/google/refine/refine.py index 346e68c..e40882b 100644 --- a/google/refine/refine.py +++ b/google/refine/refine.py @@ -32,7 +32,6 @@ import requests import urllib.request, urllib.parse, urllib.error from google.refine import facet -from google.refine import history REFINE_HOST = os.environ.get('OPENREFINE_HOST', os.environ.get('GOOGLE_REFINE_HOST', '127.0.0.1')) REFINE_PORT = os.environ.get('OPENREFINE_PORT', os.environ.get('GOOGLE_REFINE_PORT', '3333')) @@ -360,7 +359,6 @@ class RefineProject: self.project_id = project_id self.engine = facet.Engine() self.sorting = facet.Sorting() - self.history_entry = None # following filled in by get_models() self.key_column = None self.has_records = False @@ -392,11 +390,6 @@ class RefineProject: response = self.server.urlopen_json(command, project_id=self.project_id, data=data) - if 'historyEntry' in response: - # **response['historyEntry'] won't work as keys are unicode :-/ - he = response['historyEntry'] - self.history_entry = history.HistoryEntry(he['id'], he['time'], - he['description']) return response def get_models(self):