port to python 3.7 and use requests

This commit is contained in:
Wolf Vollprecht 2019-12-27 16:07:17 +01:00
parent c47ce10eba
commit 7826fa1037
8 changed files with 80 additions and 77 deletions

View File

@ -1,4 +1,4 @@
#! /usr/bin/env python
#!/usr/bin/env python3
"""
Script to provide a command line interface to a Refine server.
"""
@ -210,14 +210,14 @@ def main():
# get project_id
if args and not str.isdigit(args[0]):
projects = refine.Refine(refine.RefineServer()).list_projects().items()
projects = list(refine.Refine(refine.RefineServer()).list_projects().items())
idlist = []
for project_id, project_info in projects:
if args[0].decode('UTF-8') == project_info['name']:
idlist.append(str(project_id))
if len(idlist) > 1:
print('Error: Found %s projects with name %s.\n'
'Please specify project by id.' % (len(idlist), args[0]))
print(('Error: Found %s projects with name %s.\n'
'Please specify project by id.' % (len(idlist), args[0])))
for i in idlist:
print('')
cli.info(i)
@ -226,8 +226,8 @@ def main():
try:
project_id = idlist[0]
except IndexError:
print('Error: No project found with name %s.\n'
'Try command --list' % args[0])
print(('Error: No project found with name %s.\n'
'Try command --list' % args[0]))
return
elif args:
project_id = args[0]
@ -240,11 +240,11 @@ def main():
elif options.create:
group5_dict = {group5_arg.dest: getattr(options, group5_arg.dest)
for group5_arg in group5.option_list}
kwargs = {k: v for k, v in group5_dict.items()
kwargs = {k: v for k, v in list(group5_dict.items())
if v is not None and v not in ['true', 'false']}
kwargs.update({k: True for k, v in group5_dict.items()
kwargs.update({k: True for k, v in list(group5_dict.items())
if v == 'true'})
kwargs.update({k: False for k, v in group5_dict.items()
kwargs.update({k: False for k, v in list(group5_dict.items())
if v == 'false'})
if options.file_format:
kwargs.update({'project_format': options.file_format})
@ -259,11 +259,11 @@ def main():
elif args and options.template:
group6_dict = {group6_arg.dest: getattr(options, group6_arg.dest)
for group6_arg in group6.option_list}
kwargs = {k: v for k, v in group6_dict.items()
kwargs = {k: v for k, v in list(group6_dict.items())
if v is not None and v not in ['true', 'false']}
kwargs.update({k: True for k, v in group6_dict.items()
kwargs.update({k: True for k, v in list(group6_dict.items())
if v == 'true'})
kwargs.update({k: False for k, v in group6_dict.items()
kwargs.update({k: False for k, v in list(group6_dict.items())
if v == 'false'})
cli.templating(project_id, options.template,
output_file=options.output, **kwargs)

View File

@ -1,4 +1,4 @@
#! /usr/bin/env python
#!/usr/bin/env python3
"""
Functions used by the command line interface (CLI)
"""
@ -24,7 +24,7 @@ import os
import ssl
import sys
import time
import urllib
import urllib.request, urllib.parse, urllib.error
from xml.etree import ElementTree
from google.refine import refine
@ -38,8 +38,8 @@ def apply(project_id, history_file):
raise Exception('Failed to apply %s to %s: %s' %
(history_file, project_id, response))
else:
print('File %s has been successfully applied to project %s' %
(history_file, project_id))
print(('File %s has been successfully applied to project %s' %
(history_file, project_id)))
def create(project_file,
@ -113,7 +113,7 @@ def create(project_file,
sheets = [0]
# TODO: new format for sheets option introduced in OpenRefine 2.8
# execute
kwargs = {k: v for k, v in vars().items() if v is not None}
kwargs = {k: v for k, v in list(vars().items()) if v is not None}
project = refine.Refine(refine.RefineServer()).new_project(
guess_cell_value_types=guessCellValueTypes,
ignore_lines=ignoreLines,
@ -127,8 +127,8 @@ def create(project_file,
**kwargs)
rows = project.do_json('get-rows')['total']
if rows > 0:
print('{0}: {1}'.format('id', project.project_id))
print('{0}: {1}'.format('rows', rows))
print(('{0}: {1}'.format('id', project.project_id)))
print(('{0}: {1}'.format('rows', rows)))
return project
else:
raise Exception(
@ -144,7 +144,7 @@ def delete(project_id):
raise Exception('Failed to delete %s: %s' %
(project_id, response))
else:
print('Project %s has been successfully deleted' % project_id)
print(('Project %s has been successfully deleted' % project_id))
def download(url, output_file=None):
@ -152,14 +152,14 @@ def download(url, output_file=None):
if not output_file:
output_file = os.path.basename(url)
if os.path.exists(output_file):
print('Error: File %s already exists.\n'
print(('Error: File %s already exists.\n'
'Delete existing file or try command --output '
'to specify a different filename.' % output_file)
'to specify a different filename.' % output_file))
return
# Workaround for SSL verification problems in one-file-executables
context = ssl._create_unverified_context()
urllib.urlretrieve(url, output_file, context=context)
print('Download to file %s complete' % output_file)
urllib.request.urlretrieve(url, output_file, context=context)
print(('Download to file %s complete' % output_file))
def export(project_id, encoding=None, output_file=None, export_format=None):
@ -181,33 +181,33 @@ def export(project_id, encoding=None, output_file=None, export_format=None):
with open(output_file, 'wb') as f:
f.write(project.export(
export_format=export_format, encoding=encoding).read())
print('Export to file %s complete' % output_file)
print(('Export to file %s complete' % output_file))
def info(project_id):
"""Show project metadata"""
projects = refine.Refine(refine.RefineServer()).list_projects()
if project_id in projects.keys():
print('{0:>20}: {1}'.format('id', project_id))
print('{0:>20}: {1}'.format('url', 'http://' +
if project_id in list(projects.keys()):
print(('{0:>20}: {1}'.format('id', project_id)))
print(('{0:>20}: {1}'.format('url', 'http://' +
refine.REFINE_HOST + ':' +
refine.REFINE_PORT +
'/project?project=' + project_id))
for k, v in projects[project_id].items():
'/project?project=' + project_id)))
for k, v in list(projects[project_id].items()):
if v:
print(u'{0:>20}: {1}'.format(k, v))
print(('{0:>20}: {1}'.format(k, v)))
project_model = refine.RefineProject(project_id).get_models()
columns = [c['name'] for c in project_model['columnModel']['columns']]
for (i, v) in enumerate(columns, start=1):
print(u'{0:>20}: {1}'.format(u'column ' + str(i).zfill(3), v))
print(('{0:>20}: {1}'.format('column ' + str(i).zfill(3), v)))
else:
print('Error: No project found with id %s.\n'
'Check existing projects with command --list' % (project_id))
print(('Error: No project found with id %s.\n'
'Check existing projects with command --list' % (project_id)))
def ls():
"""Query the server and list projects sorted by mtime."""
projects = refine.Refine(refine.RefineServer()).list_projects().items()
projects = list(refine.Refine(refine.RefineServer()).list_projects().items())
def date_to_epoch(json_dt):
"""Convert a JSON date time into seconds-since-epoch."""
@ -215,7 +215,7 @@ def ls():
projects.sort(key=lambda v: date_to_epoch(v[1]['modified']), reverse=True)
if projects:
for project_id, project_info in projects:
print(u'{0:>14}: {1}'.format(project_id, project_info['name']))
print(('{0:>14}: {1}'.format(project_id, project_info['name'])))
else:
print('Error: No projects found')
@ -271,7 +271,7 @@ def templating(project_id,
else:
with open(output_file, 'wb') as f:
f.write(project.export_templating(**templateconfig).read())
print('Export to file %s complete' % output_file)
print(('Export to file %s complete' % output_file))
else:
# splitToFiles functionality
prefix = templateconfig['prefix']
@ -323,7 +323,7 @@ def templating(project_id,
output_file = base + '_' + ids[index] + '.' + ext
with open(output_file, 'wb') as f:
f.writelines([prefix, record, suffix])
print('Export to files complete. Last file: %s' % output_file)
print(('Export to files complete. Last file: %s' % output_file))
else:
zeros = len(str(len(records)))
for index, record in enumerate(records):
@ -331,4 +331,4 @@ def templating(project_id,
str(index + 1).zfill(zeros) + '.' + ext
with open(output_file, 'wb') as f:
f.writelines([prefix, record, suffix])
print('Export to files complete. Last file: %s' % output_file)
print(('Export to files complete. Last file: %s' % output_file))

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
OpenRefine Facets, Engine, and Facet Responses.
"""
@ -40,11 +40,11 @@ class Facet(object):
self.type = facet_type
self.name = column
self.column_name = column
for k, v in options.items():
for k, v in list(options.items()):
setattr(self, k, v)
def as_dict(self):
return dict([(to_camel(k), v) for k, v in self.__dict__.items()
return dict([(to_camel(k), v) for k, v in list(self.__dict__.items())
if v is not None])
@ -159,8 +159,8 @@ class FacetResponse(object):
"""Class for unpacking an individual facet response."""
def __init__(self, facet):
self.name = None
for k, v in facet.items():
if isinstance(k, bool) or isinstance(k, basestring):
for k, v in list(facet.items()):
if isinstance(k, bool) or isinstance(k, str):
setattr(self, from_camel(k), v)
self.choices = {}
@ -268,7 +268,7 @@ class Sorting(object):
criteria = [criteria]
for criterion in criteria:
# A string criterion defaults to a string sort on that column
if isinstance(criterion, basestring):
if isinstance(criterion, str):
criterion = {
'column': criterion,
'valueType': 'string',

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
OpenRefine history: parsing responses.
"""

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
Client library to communicate with a Refine server.
"""
@ -23,12 +23,13 @@ import json
import gzip
import os
import re
import StringIO
try:
import io
except:
from io import StringIO, BytesIO
import time
import urllib
import urllib2_file
import urllib2
import urlparse
import requests
import urllib.request, urllib.parse, urllib.error
from google.refine import facet
from google.refine import history
@ -74,29 +75,32 @@ class RefineServer(object):
else:
params['project'] = project_id
if params:
url += '?' + urllib.urlencode(params)
req = urllib2.Request(url)
if data:
req.add_data(data) # data = urllib.urlencode(data)
#req.add_header('Accept-Encoding', 'gzip')
url += '?' + urllib.parse.urlencode(params)
req = urllib.request.Request(url)
try:
response = urllib2.urlopen(req)
except urllib2.HTTPError as e:
if not data:
response = requests.get(url)
else:
response = requests.post(url, data=data, files=files)
except requests.exceptions.HTTPError as e:
raise Exception('HTTP %d "%s" for %s\n\t%s' % (e.code, e.msg, e.geturl(), data))
except urllib2.URLError as e:
raise urllib2.URLError(
except requests.exceptions.URLRequired as e:
raise requests.exceptions.URLRequired(
'%s for %s. No Refine server reachable/running; ENV set?' %
(e.reason, self.server))
if response.info().get('Content-Encoding', None) == 'gzip':
if response.encoding == 'gzip':
# Need a seekable filestream for gzip
gzip_fp = gzip.GzipFile(fileobj=StringIO.StringIO(response.read()))
gzip_fp = gzip.GzipFile(fileobj=io.StringIO(response.read()))
# XXX Monkey patch response's filehandle. Better way?
urllib.addbase.__init__(response, gzip_fp)
return response
def urlopen_json(self, *args, **kwargs):
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
response = json.loads(self.urlopen(*args, **kwargs).read())
response = self.urlopen(*args, **kwargs).json()
if 'code' in response and response['code'] not in ('ok', 'pending'):
error_message = ('server ' + response['code'] + ': ' +
response.get('message', response.get('stack', response)))
@ -256,24 +260,23 @@ class Refine:
'include-file-sources': s(include_file_sources),
}
files = None
if project_url is not None:
options['url'] = project_url
elif project_file is not None:
options['project-file'] = {
'fd': open(project_file),
'filename': project_file,
}
files = {'project-file': open(project_file, 'r')}
if project_name is None:
# make a name for itself by stripping extension and directories
project_name = (project_file or 'New project').rsplit('.', 1)[0]
project_name = os.path.basename(project_name)
options['project-name'] = project_name
response = self.server.urlopen(
'create-project-from-upload', options, params
'create-project-from-upload', options, params, files=files
)
# expecting a redirect to the new project containing the id in the url
url_params = urlparse.parse_qs(
urlparse.urlparse(response.geturl()).query)
url_params = urllib.parse.parse_qs(
urllib.parse.urlparse(response.geturl()).query)
if 'project' in url_params:
project_id = url_params['project'][0]
return RefineProject(self.server, project_id)
@ -430,7 +433,7 @@ class RefineProject:
def export(self, encoding=None, export_format='tsv'):
"""Return a fileobject of a project's data."""
url = ('export-rows/' +
urllib.quote(self.project_name().encode('utf8')) +
urllib.parse.quote(self.project_name().encode('utf8')) +
'.' + export_format)
data = {'format': export_format}
if encoding:
@ -441,7 +444,7 @@ class RefineProject:
template='', rowSeparator='\n', suffix=''):
"""Return a fileobject of a project's data in templating mode."""
url = ('export-rows/' +
urllib.quote(self.project_name().encode('utf8')) +
urllib.parse.quote(self.project_name().encode('utf8')) +
'.' + 'txt')
data = {'format': 'template',
'template': template,

View File

@ -1 +1 @@
urllib2_file>=0.2.1
requests

View File

@ -35,8 +35,8 @@ setup(name='openrefine-client',
author_email='felix.lohmeier@opencultureconsulting.com',
url='https://github.com/opencultureconsulting/openrefine-client',
packages=find_packages(exclude=['tests']),
install_requires=['urllib2_file'],
python_requires='>=2.7, !=3.*',
install_requires=['requests'],
python_requires='>=3.0,<4',
entry_points={
'console_scripts': [ 'openrefine-client = google.refine.__main__:main' ]
},

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
"""
test_refine_small.py
"""