From 7826fa103797cca63243fbcb4c33df8e3dd3b036 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Fri, 27 Dec 2019 16:07:17 +0100 Subject: [PATCH] port to python 3.7 and use requests --- google/refine/__main__.py | 24 ++++++++-------- google/refine/cli.py | 54 ++++++++++++++++++------------------ google/refine/facet.py | 12 ++++---- google/refine/history.py | 2 +- google/refine/refine.py | 57 ++++++++++++++++++++------------------ requirements.txt | 2 +- setup.py | 4 +-- tests/test_refine_small.py | 2 +- 8 files changed, 80 insertions(+), 77 deletions(-) diff --git a/google/refine/__main__.py b/google/refine/__main__.py index 7105993..1dacd88 100644 --- a/google/refine/__main__.py +++ b/google/refine/__main__.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 """ Script to provide a command line interface to a Refine server. """ @@ -210,14 +210,14 @@ def main(): # get project_id if args and not str.isdigit(args[0]): - projects = refine.Refine(refine.RefineServer()).list_projects().items() + projects = list(refine.Refine(refine.RefineServer()).list_projects().items()) idlist = [] for project_id, project_info in projects: if args[0].decode('UTF-8') == project_info['name']: idlist.append(str(project_id)) if len(idlist) > 1: - print('Error: Found %s projects with name %s.\n' - 'Please specify project by id.' % (len(idlist), args[0])) + print(('Error: Found %s projects with name %s.\n' + 'Please specify project by id.' % (len(idlist), args[0]))) for i in idlist: print('') cli.info(i) @@ -226,8 +226,8 @@ def main(): try: project_id = idlist[0] except IndexError: - print('Error: No project found with name %s.\n' - 'Try command --list' % args[0]) + print(('Error: No project found with name %s.\n' + 'Try command --list' % args[0])) return elif args: project_id = args[0] @@ -240,11 +240,11 @@ def main(): elif options.create: group5_dict = {group5_arg.dest: getattr(options, group5_arg.dest) for group5_arg in group5.option_list} - kwargs = {k: v for k, v in group5_dict.items() + kwargs = {k: v for k, v in list(group5_dict.items()) if v is not None and v not in ['true', 'false']} - kwargs.update({k: True for k, v in group5_dict.items() + kwargs.update({k: True for k, v in list(group5_dict.items()) if v == 'true'}) - kwargs.update({k: False for k, v in group5_dict.items() + kwargs.update({k: False for k, v in list(group5_dict.items()) if v == 'false'}) if options.file_format: kwargs.update({'project_format': options.file_format}) @@ -259,11 +259,11 @@ def main(): elif args and options.template: group6_dict = {group6_arg.dest: getattr(options, group6_arg.dest) for group6_arg in group6.option_list} - kwargs = {k: v for k, v in group6_dict.items() + kwargs = {k: v for k, v in list(group6_dict.items()) if v is not None and v not in ['true', 'false']} - kwargs.update({k: True for k, v in group6_dict.items() + kwargs.update({k: True for k, v in list(group6_dict.items()) if v == 'true'}) - kwargs.update({k: False for k, v in group6_dict.items() + kwargs.update({k: False for k, v in list(group6_dict.items()) if v == 'false'}) cli.templating(project_id, options.template, output_file=options.output, **kwargs) diff --git a/google/refine/cli.py b/google/refine/cli.py index bff7417..7fed49f 100644 --- a/google/refine/cli.py +++ b/google/refine/cli.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 """ Functions used by the command line interface (CLI) """ @@ -24,7 +24,7 @@ import os import ssl import sys import time -import urllib +import urllib.request, urllib.parse, urllib.error from xml.etree import ElementTree from google.refine import refine @@ -38,8 +38,8 @@ def apply(project_id, history_file): raise Exception('Failed to apply %s to %s: %s' % (history_file, project_id, response)) else: - print('File %s has been successfully applied to project %s' % - (history_file, project_id)) + print(('File %s has been successfully applied to project %s' % + (history_file, project_id))) def create(project_file, @@ -113,7 +113,7 @@ def create(project_file, sheets = [0] # TODO: new format for sheets option introduced in OpenRefine 2.8 # execute - kwargs = {k: v for k, v in vars().items() if v is not None} + kwargs = {k: v for k, v in list(vars().items()) if v is not None} project = refine.Refine(refine.RefineServer()).new_project( guess_cell_value_types=guessCellValueTypes, ignore_lines=ignoreLines, @@ -127,8 +127,8 @@ def create(project_file, **kwargs) rows = project.do_json('get-rows')['total'] if rows > 0: - print('{0}: {1}'.format('id', project.project_id)) - print('{0}: {1}'.format('rows', rows)) + print(('{0}: {1}'.format('id', project.project_id))) + print(('{0}: {1}'.format('rows', rows))) return project else: raise Exception( @@ -144,7 +144,7 @@ def delete(project_id): raise Exception('Failed to delete %s: %s' % (project_id, response)) else: - print('Project %s has been successfully deleted' % project_id) + print(('Project %s has been successfully deleted' % project_id)) def download(url, output_file=None): @@ -152,14 +152,14 @@ def download(url, output_file=None): if not output_file: output_file = os.path.basename(url) if os.path.exists(output_file): - print('Error: File %s already exists.\n' + print(('Error: File %s already exists.\n' 'Delete existing file or try command --output ' - 'to specify a different filename.' % output_file) + 'to specify a different filename.' % output_file)) return # Workaround for SSL verification problems in one-file-executables context = ssl._create_unverified_context() - urllib.urlretrieve(url, output_file, context=context) - print('Download to file %s complete' % output_file) + urllib.request.urlretrieve(url, output_file, context=context) + print(('Download to file %s complete' % output_file)) def export(project_id, encoding=None, output_file=None, export_format=None): @@ -181,33 +181,33 @@ def export(project_id, encoding=None, output_file=None, export_format=None): with open(output_file, 'wb') as f: f.write(project.export( export_format=export_format, encoding=encoding).read()) - print('Export to file %s complete' % output_file) + print(('Export to file %s complete' % output_file)) def info(project_id): """Show project metadata""" projects = refine.Refine(refine.RefineServer()).list_projects() - if project_id in projects.keys(): - print('{0:>20}: {1}'.format('id', project_id)) - print('{0:>20}: {1}'.format('url', 'http://' + + if project_id in list(projects.keys()): + print(('{0:>20}: {1}'.format('id', project_id))) + print(('{0:>20}: {1}'.format('url', 'http://' + refine.REFINE_HOST + ':' + refine.REFINE_PORT + - '/project?project=' + project_id)) - for k, v in projects[project_id].items(): + '/project?project=' + project_id))) + for k, v in list(projects[project_id].items()): if v: - print(u'{0:>20}: {1}'.format(k, v)) + print(('{0:>20}: {1}'.format(k, v))) project_model = refine.RefineProject(project_id).get_models() columns = [c['name'] for c in project_model['columnModel']['columns']] for (i, v) in enumerate(columns, start=1): - print(u'{0:>20}: {1}'.format(u'column ' + str(i).zfill(3), v)) + print(('{0:>20}: {1}'.format('column ' + str(i).zfill(3), v))) else: - print('Error: No project found with id %s.\n' - 'Check existing projects with command --list' % (project_id)) + print(('Error: No project found with id %s.\n' + 'Check existing projects with command --list' % (project_id))) def ls(): """Query the server and list projects sorted by mtime.""" - projects = refine.Refine(refine.RefineServer()).list_projects().items() + projects = list(refine.Refine(refine.RefineServer()).list_projects().items()) def date_to_epoch(json_dt): """Convert a JSON date time into seconds-since-epoch.""" @@ -215,7 +215,7 @@ def ls(): projects.sort(key=lambda v: date_to_epoch(v[1]['modified']), reverse=True) if projects: for project_id, project_info in projects: - print(u'{0:>14}: {1}'.format(project_id, project_info['name'])) + print(('{0:>14}: {1}'.format(project_id, project_info['name']))) else: print('Error: No projects found') @@ -271,7 +271,7 @@ def templating(project_id, else: with open(output_file, 'wb') as f: f.write(project.export_templating(**templateconfig).read()) - print('Export to file %s complete' % output_file) + print(('Export to file %s complete' % output_file)) else: # splitToFiles functionality prefix = templateconfig['prefix'] @@ -323,7 +323,7 @@ def templating(project_id, output_file = base + '_' + ids[index] + '.' + ext with open(output_file, 'wb') as f: f.writelines([prefix, record, suffix]) - print('Export to files complete. Last file: %s' % output_file) + print(('Export to files complete. Last file: %s' % output_file)) else: zeros = len(str(len(records))) for index, record in enumerate(records): @@ -331,4 +331,4 @@ def templating(project_id, str(index + 1).zfill(zeros) + '.' + ext with open(output_file, 'wb') as f: f.writelines([prefix, record, suffix]) - print('Export to files complete. Last file: %s' % output_file) + print(('Export to files complete. Last file: %s' % output_file)) diff --git a/google/refine/facet.py b/google/refine/facet.py index 54850a3..1fc4c8c 100644 --- a/google/refine/facet.py +++ b/google/refine/facet.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ OpenRefine Facets, Engine, and Facet Responses. """ @@ -40,11 +40,11 @@ class Facet(object): self.type = facet_type self.name = column self.column_name = column - for k, v in options.items(): + for k, v in list(options.items()): setattr(self, k, v) def as_dict(self): - return dict([(to_camel(k), v) for k, v in self.__dict__.items() + return dict([(to_camel(k), v) for k, v in list(self.__dict__.items()) if v is not None]) @@ -159,8 +159,8 @@ class FacetResponse(object): """Class for unpacking an individual facet response.""" def __init__(self, facet): self.name = None - for k, v in facet.items(): - if isinstance(k, bool) or isinstance(k, basestring): + for k, v in list(facet.items()): + if isinstance(k, bool) or isinstance(k, str): setattr(self, from_camel(k), v) self.choices = {} @@ -268,7 +268,7 @@ class Sorting(object): criteria = [criteria] for criterion in criteria: # A string criterion defaults to a string sort on that column - if isinstance(criterion, basestring): + if isinstance(criterion, str): criterion = { 'column': criterion, 'valueType': 'string', diff --git a/google/refine/history.py b/google/refine/history.py index ffa3c4e..052c64a 100644 --- a/google/refine/history.py +++ b/google/refine/history.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ OpenRefine history: parsing responses. """ diff --git a/google/refine/refine.py b/google/refine/refine.py index a7532df..7a2615b 100644 --- a/google/refine/refine.py +++ b/google/refine/refine.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ Client library to communicate with a Refine server. """ @@ -23,12 +23,13 @@ import json import gzip import os import re -import StringIO +try: + import io +except: + from io import StringIO, BytesIO import time -import urllib -import urllib2_file -import urllib2 -import urlparse +import requests +import urllib.request, urllib.parse, urllib.error from google.refine import facet from google.refine import history @@ -74,29 +75,32 @@ class RefineServer(object): else: params['project'] = project_id if params: - url += '?' + urllib.urlencode(params) - req = urllib2.Request(url) - if data: - req.add_data(data) # data = urllib.urlencode(data) - #req.add_header('Accept-Encoding', 'gzip') + url += '?' + urllib.parse.urlencode(params) + req = urllib.request.Request(url) + try: - response = urllib2.urlopen(req) - except urllib2.HTTPError as e: + if not data: + response = requests.get(url) + else: + response = requests.post(url, data=data, files=files) + except requests.exceptions.HTTPError as e: raise Exception('HTTP %d "%s" for %s\n\t%s' % (e.code, e.msg, e.geturl(), data)) - except urllib2.URLError as e: - raise urllib2.URLError( + except requests.exceptions.URLRequired as e: + raise requests.exceptions.URLRequired( '%s for %s. No Refine server reachable/running; ENV set?' % (e.reason, self.server)) - if response.info().get('Content-Encoding', None) == 'gzip': + + if response.encoding == 'gzip': # Need a seekable filestream for gzip - gzip_fp = gzip.GzipFile(fileobj=StringIO.StringIO(response.read())) + gzip_fp = gzip.GzipFile(fileobj=io.StringIO(response.read())) # XXX Monkey patch response's filehandle. Better way? urllib.addbase.__init__(response, gzip_fp) return response + def urlopen_json(self, *args, **kwargs): """Open a Refine URL, optionally POST data, and return parsed JSON.""" - response = json.loads(self.urlopen(*args, **kwargs).read()) + response = self.urlopen(*args, **kwargs).json() if 'code' in response and response['code'] not in ('ok', 'pending'): error_message = ('server ' + response['code'] + ': ' + response.get('message', response.get('stack', response))) @@ -256,24 +260,23 @@ class Refine: 'include-file-sources': s(include_file_sources), } + files = None if project_url is not None: options['url'] = project_url elif project_file is not None: - options['project-file'] = { - 'fd': open(project_file), - 'filename': project_file, - } + files = {'project-file': open(project_file, 'r')} + if project_name is None: # make a name for itself by stripping extension and directories project_name = (project_file or 'New project').rsplit('.', 1)[0] project_name = os.path.basename(project_name) options['project-name'] = project_name response = self.server.urlopen( - 'create-project-from-upload', options, params + 'create-project-from-upload', options, params, files=files ) # expecting a redirect to the new project containing the id in the url - url_params = urlparse.parse_qs( - urlparse.urlparse(response.geturl()).query) + url_params = urllib.parse.parse_qs( + urllib.parse.urlparse(response.geturl()).query) if 'project' in url_params: project_id = url_params['project'][0] return RefineProject(self.server, project_id) @@ -430,7 +433,7 @@ class RefineProject: def export(self, encoding=None, export_format='tsv'): """Return a fileobject of a project's data.""" url = ('export-rows/' + - urllib.quote(self.project_name().encode('utf8')) + + urllib.parse.quote(self.project_name().encode('utf8')) + '.' + export_format) data = {'format': export_format} if encoding: @@ -441,7 +444,7 @@ class RefineProject: template='', rowSeparator='\n', suffix=''): """Return a fileobject of a project's data in templating mode.""" url = ('export-rows/' + - urllib.quote(self.project_name().encode('utf8')) + + urllib.parse.quote(self.project_name().encode('utf8')) + '.' + 'txt') data = {'format': 'template', 'template': template, diff --git a/requirements.txt b/requirements.txt index f02ab12..663bd1f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -urllib2_file>=0.2.1 \ No newline at end of file +requests \ No newline at end of file diff --git a/setup.py b/setup.py index dbbcc5a..4321afb 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,8 @@ setup(name='openrefine-client', author_email='felix.lohmeier@opencultureconsulting.com', url='https://github.com/opencultureconsulting/openrefine-client', packages=find_packages(exclude=['tests']), - install_requires=['urllib2_file'], - python_requires='>=2.7, !=3.*', + install_requires=['requests'], + python_requires='>=3.0,<4', entry_points={ 'console_scripts': [ 'openrefine-client = google.refine.__main__:main' ] }, diff --git a/tests/test_refine_small.py b/tests/test_refine_small.py index c525ba5..4b3c132 100644 --- a/tests/test_refine_small.py +++ b/tests/test_refine_small.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """ test_refine_small.py """