port to python 3.7 and use requests

This commit is contained in:
Wolf Vollprecht 2019-12-27 16:07:17 +01:00
parent c47ce10eba
commit 7826fa1037
8 changed files with 80 additions and 77 deletions

View File

@ -1,4 +1,4 @@
#! /usr/bin/env python #!/usr/bin/env python3
""" """
Script to provide a command line interface to a Refine server. Script to provide a command line interface to a Refine server.
""" """
@ -210,14 +210,14 @@ def main():
# get project_id # get project_id
if args and not str.isdigit(args[0]): if args and not str.isdigit(args[0]):
projects = refine.Refine(refine.RefineServer()).list_projects().items() projects = list(refine.Refine(refine.RefineServer()).list_projects().items())
idlist = [] idlist = []
for project_id, project_info in projects: for project_id, project_info in projects:
if args[0].decode('UTF-8') == project_info['name']: if args[0].decode('UTF-8') == project_info['name']:
idlist.append(str(project_id)) idlist.append(str(project_id))
if len(idlist) > 1: if len(idlist) > 1:
print('Error: Found %s projects with name %s.\n' print(('Error: Found %s projects with name %s.\n'
'Please specify project by id.' % (len(idlist), args[0])) 'Please specify project by id.' % (len(idlist), args[0])))
for i in idlist: for i in idlist:
print('') print('')
cli.info(i) cli.info(i)
@ -226,8 +226,8 @@ def main():
try: try:
project_id = idlist[0] project_id = idlist[0]
except IndexError: except IndexError:
print('Error: No project found with name %s.\n' print(('Error: No project found with name %s.\n'
'Try command --list' % args[0]) 'Try command --list' % args[0]))
return return
elif args: elif args:
project_id = args[0] project_id = args[0]
@ -240,11 +240,11 @@ def main():
elif options.create: elif options.create:
group5_dict = {group5_arg.dest: getattr(options, group5_arg.dest) group5_dict = {group5_arg.dest: getattr(options, group5_arg.dest)
for group5_arg in group5.option_list} for group5_arg in group5.option_list}
kwargs = {k: v for k, v in group5_dict.items() kwargs = {k: v for k, v in list(group5_dict.items())
if v is not None and v not in ['true', 'false']} if v is not None and v not in ['true', 'false']}
kwargs.update({k: True for k, v in group5_dict.items() kwargs.update({k: True for k, v in list(group5_dict.items())
if v == 'true'}) if v == 'true'})
kwargs.update({k: False for k, v in group5_dict.items() kwargs.update({k: False for k, v in list(group5_dict.items())
if v == 'false'}) if v == 'false'})
if options.file_format: if options.file_format:
kwargs.update({'project_format': options.file_format}) kwargs.update({'project_format': options.file_format})
@ -259,11 +259,11 @@ def main():
elif args and options.template: elif args and options.template:
group6_dict = {group6_arg.dest: getattr(options, group6_arg.dest) group6_dict = {group6_arg.dest: getattr(options, group6_arg.dest)
for group6_arg in group6.option_list} for group6_arg in group6.option_list}
kwargs = {k: v for k, v in group6_dict.items() kwargs = {k: v for k, v in list(group6_dict.items())
if v is not None and v not in ['true', 'false']} if v is not None and v not in ['true', 'false']}
kwargs.update({k: True for k, v in group6_dict.items() kwargs.update({k: True for k, v in list(group6_dict.items())
if v == 'true'}) if v == 'true'})
kwargs.update({k: False for k, v in group6_dict.items() kwargs.update({k: False for k, v in list(group6_dict.items())
if v == 'false'}) if v == 'false'})
cli.templating(project_id, options.template, cli.templating(project_id, options.template,
output_file=options.output, **kwargs) output_file=options.output, **kwargs)

View File

@ -1,4 +1,4 @@
#! /usr/bin/env python #!/usr/bin/env python3
""" """
Functions used by the command line interface (CLI) Functions used by the command line interface (CLI)
""" """
@ -24,7 +24,7 @@ import os
import ssl import ssl
import sys import sys
import time import time
import urllib import urllib.request, urllib.parse, urllib.error
from xml.etree import ElementTree from xml.etree import ElementTree
from google.refine import refine from google.refine import refine
@ -38,8 +38,8 @@ def apply(project_id, history_file):
raise Exception('Failed to apply %s to %s: %s' % raise Exception('Failed to apply %s to %s: %s' %
(history_file, project_id, response)) (history_file, project_id, response))
else: else:
print('File %s has been successfully applied to project %s' % print(('File %s has been successfully applied to project %s' %
(history_file, project_id)) (history_file, project_id)))
def create(project_file, def create(project_file,
@ -113,7 +113,7 @@ def create(project_file,
sheets = [0] sheets = [0]
# TODO: new format for sheets option introduced in OpenRefine 2.8 # TODO: new format for sheets option introduced in OpenRefine 2.8
# execute # execute
kwargs = {k: v for k, v in vars().items() if v is not None} kwargs = {k: v for k, v in list(vars().items()) if v is not None}
project = refine.Refine(refine.RefineServer()).new_project( project = refine.Refine(refine.RefineServer()).new_project(
guess_cell_value_types=guessCellValueTypes, guess_cell_value_types=guessCellValueTypes,
ignore_lines=ignoreLines, ignore_lines=ignoreLines,
@ -127,8 +127,8 @@ def create(project_file,
**kwargs) **kwargs)
rows = project.do_json('get-rows')['total'] rows = project.do_json('get-rows')['total']
if rows > 0: if rows > 0:
print('{0}: {1}'.format('id', project.project_id)) print(('{0}: {1}'.format('id', project.project_id)))
print('{0}: {1}'.format('rows', rows)) print(('{0}: {1}'.format('rows', rows)))
return project return project
else: else:
raise Exception( raise Exception(
@ -144,7 +144,7 @@ def delete(project_id):
raise Exception('Failed to delete %s: %s' % raise Exception('Failed to delete %s: %s' %
(project_id, response)) (project_id, response))
else: else:
print('Project %s has been successfully deleted' % project_id) print(('Project %s has been successfully deleted' % project_id))
def download(url, output_file=None): def download(url, output_file=None):
@ -152,14 +152,14 @@ def download(url, output_file=None):
if not output_file: if not output_file:
output_file = os.path.basename(url) output_file = os.path.basename(url)
if os.path.exists(output_file): if os.path.exists(output_file):
print('Error: File %s already exists.\n' print(('Error: File %s already exists.\n'
'Delete existing file or try command --output ' 'Delete existing file or try command --output '
'to specify a different filename.' % output_file) 'to specify a different filename.' % output_file))
return return
# Workaround for SSL verification problems in one-file-executables # Workaround for SSL verification problems in one-file-executables
context = ssl._create_unverified_context() context = ssl._create_unverified_context()
urllib.urlretrieve(url, output_file, context=context) urllib.request.urlretrieve(url, output_file, context=context)
print('Download to file %s complete' % output_file) print(('Download to file %s complete' % output_file))
def export(project_id, encoding=None, output_file=None, export_format=None): def export(project_id, encoding=None, output_file=None, export_format=None):
@ -181,33 +181,33 @@ def export(project_id, encoding=None, output_file=None, export_format=None):
with open(output_file, 'wb') as f: with open(output_file, 'wb') as f:
f.write(project.export( f.write(project.export(
export_format=export_format, encoding=encoding).read()) export_format=export_format, encoding=encoding).read())
print('Export to file %s complete' % output_file) print(('Export to file %s complete' % output_file))
def info(project_id): def info(project_id):
"""Show project metadata""" """Show project metadata"""
projects = refine.Refine(refine.RefineServer()).list_projects() projects = refine.Refine(refine.RefineServer()).list_projects()
if project_id in projects.keys(): if project_id in list(projects.keys()):
print('{0:>20}: {1}'.format('id', project_id)) print(('{0:>20}: {1}'.format('id', project_id)))
print('{0:>20}: {1}'.format('url', 'http://' + print(('{0:>20}: {1}'.format('url', 'http://' +
refine.REFINE_HOST + ':' + refine.REFINE_HOST + ':' +
refine.REFINE_PORT + refine.REFINE_PORT +
'/project?project=' + project_id)) '/project?project=' + project_id)))
for k, v in projects[project_id].items(): for k, v in list(projects[project_id].items()):
if v: if v:
print(u'{0:>20}: {1}'.format(k, v)) print(('{0:>20}: {1}'.format(k, v)))
project_model = refine.RefineProject(project_id).get_models() project_model = refine.RefineProject(project_id).get_models()
columns = [c['name'] for c in project_model['columnModel']['columns']] columns = [c['name'] for c in project_model['columnModel']['columns']]
for (i, v) in enumerate(columns, start=1): for (i, v) in enumerate(columns, start=1):
print(u'{0:>20}: {1}'.format(u'column ' + str(i).zfill(3), v)) print(('{0:>20}: {1}'.format('column ' + str(i).zfill(3), v)))
else: else:
print('Error: No project found with id %s.\n' print(('Error: No project found with id %s.\n'
'Check existing projects with command --list' % (project_id)) 'Check existing projects with command --list' % (project_id)))
def ls(): def ls():
"""Query the server and list projects sorted by mtime.""" """Query the server and list projects sorted by mtime."""
projects = refine.Refine(refine.RefineServer()).list_projects().items() projects = list(refine.Refine(refine.RefineServer()).list_projects().items())
def date_to_epoch(json_dt): def date_to_epoch(json_dt):
"""Convert a JSON date time into seconds-since-epoch.""" """Convert a JSON date time into seconds-since-epoch."""
@ -215,7 +215,7 @@ def ls():
projects.sort(key=lambda v: date_to_epoch(v[1]['modified']), reverse=True) projects.sort(key=lambda v: date_to_epoch(v[1]['modified']), reverse=True)
if projects: if projects:
for project_id, project_info in projects: for project_id, project_info in projects:
print(u'{0:>14}: {1}'.format(project_id, project_info['name'])) print(('{0:>14}: {1}'.format(project_id, project_info['name'])))
else: else:
print('Error: No projects found') print('Error: No projects found')
@ -271,7 +271,7 @@ def templating(project_id,
else: else:
with open(output_file, 'wb') as f: with open(output_file, 'wb') as f:
f.write(project.export_templating(**templateconfig).read()) f.write(project.export_templating(**templateconfig).read())
print('Export to file %s complete' % output_file) print(('Export to file %s complete' % output_file))
else: else:
# splitToFiles functionality # splitToFiles functionality
prefix = templateconfig['prefix'] prefix = templateconfig['prefix']
@ -323,7 +323,7 @@ def templating(project_id,
output_file = base + '_' + ids[index] + '.' + ext output_file = base + '_' + ids[index] + '.' + ext
with open(output_file, 'wb') as f: with open(output_file, 'wb') as f:
f.writelines([prefix, record, suffix]) f.writelines([prefix, record, suffix])
print('Export to files complete. Last file: %s' % output_file) print(('Export to files complete. Last file: %s' % output_file))
else: else:
zeros = len(str(len(records))) zeros = len(str(len(records)))
for index, record in enumerate(records): for index, record in enumerate(records):
@ -331,4 +331,4 @@ def templating(project_id,
str(index + 1).zfill(zeros) + '.' + ext str(index + 1).zfill(zeros) + '.' + ext
with open(output_file, 'wb') as f: with open(output_file, 'wb') as f:
f.writelines([prefix, record, suffix]) f.writelines([prefix, record, suffix])
print('Export to files complete. Last file: %s' % output_file) print(('Export to files complete. Last file: %s' % output_file))

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
""" """
OpenRefine Facets, Engine, and Facet Responses. OpenRefine Facets, Engine, and Facet Responses.
""" """
@ -40,11 +40,11 @@ class Facet(object):
self.type = facet_type self.type = facet_type
self.name = column self.name = column
self.column_name = column self.column_name = column
for k, v in options.items(): for k, v in list(options.items()):
setattr(self, k, v) setattr(self, k, v)
def as_dict(self): def as_dict(self):
return dict([(to_camel(k), v) for k, v in self.__dict__.items() return dict([(to_camel(k), v) for k, v in list(self.__dict__.items())
if v is not None]) if v is not None])
@ -159,8 +159,8 @@ class FacetResponse(object):
"""Class for unpacking an individual facet response.""" """Class for unpacking an individual facet response."""
def __init__(self, facet): def __init__(self, facet):
self.name = None self.name = None
for k, v in facet.items(): for k, v in list(facet.items()):
if isinstance(k, bool) or isinstance(k, basestring): if isinstance(k, bool) or isinstance(k, str):
setattr(self, from_camel(k), v) setattr(self, from_camel(k), v)
self.choices = {} self.choices = {}
@ -268,7 +268,7 @@ class Sorting(object):
criteria = [criteria] criteria = [criteria]
for criterion in criteria: for criterion in criteria:
# A string criterion defaults to a string sort on that column # A string criterion defaults to a string sort on that column
if isinstance(criterion, basestring): if isinstance(criterion, str):
criterion = { criterion = {
'column': criterion, 'column': criterion,
'valueType': 'string', 'valueType': 'string',

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
""" """
OpenRefine history: parsing responses. OpenRefine history: parsing responses.
""" """

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
""" """
Client library to communicate with a Refine server. Client library to communicate with a Refine server.
""" """
@ -23,12 +23,13 @@ import json
import gzip import gzip
import os import os
import re import re
import StringIO try:
import io
except:
from io import StringIO, BytesIO
import time import time
import urllib import requests
import urllib2_file import urllib.request, urllib.parse, urllib.error
import urllib2
import urlparse
from google.refine import facet from google.refine import facet
from google.refine import history from google.refine import history
@ -74,29 +75,32 @@ class RefineServer(object):
else: else:
params['project'] = project_id params['project'] = project_id
if params: if params:
url += '?' + urllib.urlencode(params) url += '?' + urllib.parse.urlencode(params)
req = urllib2.Request(url) req = urllib.request.Request(url)
if data:
req.add_data(data) # data = urllib.urlencode(data)
#req.add_header('Accept-Encoding', 'gzip')
try: try:
response = urllib2.urlopen(req) if not data:
except urllib2.HTTPError as e: response = requests.get(url)
else:
response = requests.post(url, data=data, files=files)
except requests.exceptions.HTTPError as e:
raise Exception('HTTP %d "%s" for %s\n\t%s' % (e.code, e.msg, e.geturl(), data)) raise Exception('HTTP %d "%s" for %s\n\t%s' % (e.code, e.msg, e.geturl(), data))
except urllib2.URLError as e: except requests.exceptions.URLRequired as e:
raise urllib2.URLError( raise requests.exceptions.URLRequired(
'%s for %s. No Refine server reachable/running; ENV set?' % '%s for %s. No Refine server reachable/running; ENV set?' %
(e.reason, self.server)) (e.reason, self.server))
if response.info().get('Content-Encoding', None) == 'gzip':
if response.encoding == 'gzip':
# Need a seekable filestream for gzip # Need a seekable filestream for gzip
gzip_fp = gzip.GzipFile(fileobj=StringIO.StringIO(response.read())) gzip_fp = gzip.GzipFile(fileobj=io.StringIO(response.read()))
# XXX Monkey patch response's filehandle. Better way? # XXX Monkey patch response's filehandle. Better way?
urllib.addbase.__init__(response, gzip_fp) urllib.addbase.__init__(response, gzip_fp)
return response return response
def urlopen_json(self, *args, **kwargs): def urlopen_json(self, *args, **kwargs):
"""Open a Refine URL, optionally POST data, and return parsed JSON.""" """Open a Refine URL, optionally POST data, and return parsed JSON."""
response = json.loads(self.urlopen(*args, **kwargs).read()) response = self.urlopen(*args, **kwargs).json()
if 'code' in response and response['code'] not in ('ok', 'pending'): if 'code' in response and response['code'] not in ('ok', 'pending'):
error_message = ('server ' + response['code'] + ': ' + error_message = ('server ' + response['code'] + ': ' +
response.get('message', response.get('stack', response))) response.get('message', response.get('stack', response)))
@ -256,24 +260,23 @@ class Refine:
'include-file-sources': s(include_file_sources), 'include-file-sources': s(include_file_sources),
} }
files = None
if project_url is not None: if project_url is not None:
options['url'] = project_url options['url'] = project_url
elif project_file is not None: elif project_file is not None:
options['project-file'] = { files = {'project-file': open(project_file, 'r')}
'fd': open(project_file),
'filename': project_file,
}
if project_name is None: if project_name is None:
# make a name for itself by stripping extension and directories # make a name for itself by stripping extension and directories
project_name = (project_file or 'New project').rsplit('.', 1)[0] project_name = (project_file or 'New project').rsplit('.', 1)[0]
project_name = os.path.basename(project_name) project_name = os.path.basename(project_name)
options['project-name'] = project_name options['project-name'] = project_name
response = self.server.urlopen( response = self.server.urlopen(
'create-project-from-upload', options, params 'create-project-from-upload', options, params, files=files
) )
# expecting a redirect to the new project containing the id in the url # expecting a redirect to the new project containing the id in the url
url_params = urlparse.parse_qs( url_params = urllib.parse.parse_qs(
urlparse.urlparse(response.geturl()).query) urllib.parse.urlparse(response.geturl()).query)
if 'project' in url_params: if 'project' in url_params:
project_id = url_params['project'][0] project_id = url_params['project'][0]
return RefineProject(self.server, project_id) return RefineProject(self.server, project_id)
@ -430,7 +433,7 @@ class RefineProject:
def export(self, encoding=None, export_format='tsv'): def export(self, encoding=None, export_format='tsv'):
"""Return a fileobject of a project's data.""" """Return a fileobject of a project's data."""
url = ('export-rows/' + url = ('export-rows/' +
urllib.quote(self.project_name().encode('utf8')) + urllib.parse.quote(self.project_name().encode('utf8')) +
'.' + export_format) '.' + export_format)
data = {'format': export_format} data = {'format': export_format}
if encoding: if encoding:
@ -441,7 +444,7 @@ class RefineProject:
template='', rowSeparator='\n', suffix=''): template='', rowSeparator='\n', suffix=''):
"""Return a fileobject of a project's data in templating mode.""" """Return a fileobject of a project's data in templating mode."""
url = ('export-rows/' + url = ('export-rows/' +
urllib.quote(self.project_name().encode('utf8')) + urllib.parse.quote(self.project_name().encode('utf8')) +
'.' + 'txt') '.' + 'txt')
data = {'format': 'template', data = {'format': 'template',
'template': template, 'template': template,

View File

@ -1 +1 @@
urllib2_file>=0.2.1 requests

View File

@ -35,8 +35,8 @@ setup(name='openrefine-client',
author_email='felix.lohmeier@opencultureconsulting.com', author_email='felix.lohmeier@opencultureconsulting.com',
url='https://github.com/opencultureconsulting/openrefine-client', url='https://github.com/opencultureconsulting/openrefine-client',
packages=find_packages(exclude=['tests']), packages=find_packages(exclude=['tests']),
install_requires=['urllib2_file'], install_requires=['requests'],
python_requires='>=2.7, !=3.*', python_requires='>=3.0,<4',
entry_points={ entry_points={
'console_scripts': [ 'openrefine-client = google.refine.__main__:main' ] 'console_scripts': [ 'openrefine-client = google.refine.__main__:main' ]
}, },

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
""" """
test_refine_small.py test_refine_small.py
""" """