realign to upstream
This commit is contained in:
parent
ad95432fc0
commit
aa5b3a4203
|
@ -1,6 +1,6 @@
|
|||
#!/usr/bin/env python
|
||||
"""
|
||||
Client library to communicate with a OpenRefine server.
|
||||
Client library to communicate with a Refine server.
|
||||
"""
|
||||
|
||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
||||
|
@ -38,11 +38,11 @@ REFINE_PORT = os.environ.get('OPENREFINE_PORT', os.environ.get('GOOGLE_REFINE_PO
|
|||
|
||||
|
||||
class RefineServer(object):
|
||||
"""Communicate with a OpenRefine server."""
|
||||
"""Communicate with a Refine server."""
|
||||
|
||||
@staticmethod
|
||||
def url():
|
||||
"""Return the URL to the OpenRefine server."""
|
||||
"""Return the URL to the Refine server."""
|
||||
server = 'http://' + REFINE_HOST
|
||||
if REFINE_PORT != '80':
|
||||
server += ':' + REFINE_PORT
|
||||
|
@ -55,7 +55,7 @@ class RefineServer(object):
|
|||
self.__version = None # see version @property below
|
||||
|
||||
def urlopen(self, command, data=None, params=None, project_id=None):
|
||||
"""Open a OpenRefine URL and with optional query params and POST data.
|
||||
"""Open a Refine URL and with optional query params and POST data.
|
||||
|
||||
data: POST data dict
|
||||
param: query params dict
|
||||
|
@ -85,7 +85,7 @@ class RefineServer(object):
|
|||
raise Exception('HTTP %d "%s" for %s\n\t%s' % (e.code, e.msg, e.geturl(), data))
|
||||
except urllib2.URLError as e:
|
||||
raise urllib2.URLError(
|
||||
'%s for %s. No OpenRefine server reachable/running; ENV set?' %
|
||||
'%s for %s. No Refine server reachable/running; ENV set?' %
|
||||
(e.reason, self.server))
|
||||
if response.info().get('Content-Encoding', None) == 'gzip':
|
||||
# Need a seekable filestream for gzip
|
||||
|
@ -95,7 +95,7 @@ class RefineServer(object):
|
|||
return response
|
||||
|
||||
def urlopen_json(self, *args, **kwargs):
|
||||
"""Open a OpenRefine URL, optionally POST data, and return parsed JSON."""
|
||||
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
|
||||
response = json.loads(self.urlopen(*args, **kwargs).read())
|
||||
if 'code' in response and response['code'] not in ('ok', 'pending'):
|
||||
error_message = ('server ' + response['code'] + ': ' +
|
||||
|
@ -118,7 +118,7 @@ class RefineServer(object):
|
|||
|
||||
|
||||
class Refine:
|
||||
"""Class representing a connection to a OpenRefine server."""
|
||||
"""Class representing a connection to a Refine server."""
|
||||
def __init__(self, server):
|
||||
if isinstance(server, RefineServer):
|
||||
self.server = server
|
||||
|
@ -144,17 +144,99 @@ class Refine:
|
|||
return projects[project_id]['name']
|
||||
|
||||
def open_project(self, project_id):
|
||||
"""Open a OpenRefine project."""
|
||||
"""Open a Refine project."""
|
||||
return RefineProject(self.server, project_id)
|
||||
|
||||
def new_project(self, project_file=None, project_name=None,
|
||||
project_format='', **kwargs):
|
||||
"""Create a OpenRefine project."""
|
||||
defaults = { 'guessCellValueTypes' : False, 'headerLines' : 1, 'ignoreLines' : -1, 'includeFileSources' : False, 'limit' : -1, 'linesPerRow' : 1, 'processQuotes' : True, 'separator' : ',', 'skipDataLines' : 0, 'storeBlankCellsAsNulls' : True, 'storeBlankRows' : True, 'storeEmptyStrings' : True, 'trimStrings' : False }
|
||||
# These aren't used yet but are included for reference
|
||||
new_project_defaults = {
|
||||
'text/line-based/*sv': {
|
||||
'encoding': '',
|
||||
'separator': ',',
|
||||
'ignore_lines': -1,
|
||||
'header_lines': 1,
|
||||
'skip_data_lines': 0,
|
||||
'limit': -1,
|
||||
'store_blank_rows': True,
|
||||
'guess_cell_value_types': True,
|
||||
'process_quotes': True,
|
||||
'store_blank_cells_as_nulls': True,
|
||||
'include_file_sources': False},
|
||||
'text/line-based': {
|
||||
'encoding': '',
|
||||
'lines_per_row': 1,
|
||||
'ignore_lines': -1,
|
||||
'limit': -1,
|
||||
'skip_data_lines': -1,
|
||||
'store_blank_rows': True,
|
||||
'store_blank_cells_as_nulls': True,
|
||||
'include_file_sources': False},
|
||||
'text/line-based/fixed-width': {
|
||||
'encoding': '',
|
||||
'column_widths': [20],
|
||||
'ignore_lines': -1,
|
||||
'header_lines': 0,
|
||||
'skip_data_lines': 0,
|
||||
'limit': -1,
|
||||
'guess_cell_value_types': False,
|
||||
'store_blank_rows': True,
|
||||
'store_blank_cells_as_nulls': True,
|
||||
'include_file_sources': False},
|
||||
'text/line-based/pc-axis': {
|
||||
'encoding': '',
|
||||
'limit': -1,
|
||||
'skip_data_lines': -1,
|
||||
'include_file_sources': False},
|
||||
'text/rdf+n3': {'encoding': ''},
|
||||
'text/xml/ods': {
|
||||
'sheets': [],
|
||||
'ignore_lines': -1,
|
||||
'header_lines': 1,
|
||||
'skip_data_lines': 0,
|
||||
'limit': -1,
|
||||
'store_blank_rows': True,
|
||||
'store_blank_cells_as_nulls': True,
|
||||
'include_file_sources': False},
|
||||
'binary/xls': {
|
||||
'xml_based': False,
|
||||
'sheets': [],
|
||||
'ignore_lines': -1,
|
||||
'header_lines': 1,
|
||||
'skip_data_lines': 0,
|
||||
'limit': -1,
|
||||
'store_blank_rows': True,
|
||||
'store_blank_cells_as_nulls': True,
|
||||
'include_file_sources': False}
|
||||
}
|
||||
|
||||
def new_project(self, project_file=None, project_url=None, project_name=None,
|
||||
project_format=None, **kwargs):
|
||||
"""Create a Refine project."""
|
||||
|
||||
if (project_file and project_url) or (not project_file and not project_url):
|
||||
raise ValueError('One (only) of project_file and project_url must be set')
|
||||
|
||||
defaults = {'guessCellValueTypes': False,
|
||||
'headerLines': 1,
|
||||
'ignoreLines': -1,
|
||||
'includeFileSources': False,
|
||||
'limit': -1,
|
||||
'linesPerRow': 1,
|
||||
'processQuotes': True,
|
||||
'project_format': 'text/line-based/*sv',
|
||||
'separator': ',',
|
||||
'skipDataLines': 0,
|
||||
'storeBlankCellsAsNulls': True,
|
||||
'storeBlankRows': True,
|
||||
'storeEmptyStrings': True,
|
||||
'trimStrings': False}
|
||||
|
||||
# options
|
||||
options = { 'format': project_format }
|
||||
if project_file is not None:
|
||||
options = {
|
||||
'format': project_format
|
||||
}
|
||||
if project_url is not None:
|
||||
options['url'] = project_url
|
||||
elif project_file is not None:
|
||||
options['project-file'] = {
|
||||
'fd': open(project_file),
|
||||
'filename': project_file,
|
||||
|
@ -165,10 +247,10 @@ class Refine:
|
|||
project_name = os.path.basename(project_name)
|
||||
options['project-name'] = project_name
|
||||
|
||||
# params (the API requires a json in the 'option' POST argument)
|
||||
params = defaults
|
||||
params.update(kwargs)
|
||||
params = { 'options': json.dumps(params) }
|
||||
# params (the API requires a json in the 'options' POST argument)
|
||||
params_dict = dict(defaults)
|
||||
params_dict.update(kwargs)
|
||||
params = { 'options': json.dumps(params_dict) }
|
||||
|
||||
# submit
|
||||
response = self.server.urlopen(
|
||||
|
@ -179,17 +261,11 @@ class Refine:
|
|||
urlparse.urlparse(response.geturl()).query)
|
||||
if 'project' in url_params:
|
||||
project_id = url_params['project'][0]
|
||||
# check number of rows
|
||||
rows = RefineProject(RefineServer(),project_id).do_json('get-rows')['total']
|
||||
if rows > 0:
|
||||
print('{0}: {1}'.format('id', project_id))
|
||||
print('{0}: {1}'.format('rows', rows))
|
||||
return RefineProject(self.server, project_id)
|
||||
else:
|
||||
raise Exception('Project contains 0 rows. Please check --help for mandatory arguments for xml, json, xlsx and ods')
|
||||
else:
|
||||
raise Exception('Project not created')
|
||||
|
||||
|
||||
def RowsResponseFactory(column_index):
|
||||
"""Factory for the parsing the output from get_rows().
|
||||
|
||||
|
@ -251,7 +327,7 @@ class RefineProject:
|
|||
server = RefineServer(server)
|
||||
self.server = server
|
||||
if not project_id:
|
||||
raise Exception('Missing OpenRefine project ID')
|
||||
raise Exception('Missing Refine project ID')
|
||||
self.project_id = project_id
|
||||
self.engine = facet.Engine()
|
||||
self.sorting = facet.Sorting()
|
||||
|
@ -342,11 +418,17 @@ class RefineProject:
|
|||
export_format)
|
||||
return self.do_raw(url, data={'format': export_format})
|
||||
|
||||
def export_templating(self, export_format='txt', engine='', prefix='', template='', rowSeparator='', suffix=''):
|
||||
"""Return a fileobject of a project's data."""
|
||||
def export_templating(self, export_format='txt', engine='', prefix='',
|
||||
template='', rowSeparator='', suffix=''):
|
||||
"""Return a fileobject of a project's data in templating mode."""
|
||||
url = ('export-rows/' + urllib.quote(self.project_name()) + '.' +
|
||||
export_format)
|
||||
return self.do_raw(url, data={'format': 'template', 'template': template, 'engine': engine, 'prefix': prefix, 'suffix': suffix, 'separator': rowSeparator } )
|
||||
return self.do_raw(url, data={'format': 'template',
|
||||
'template': template,
|
||||
'engine': engine,
|
||||
'prefix': prefix,
|
||||
'suffix': suffix,
|
||||
'separator': rowSeparator})
|
||||
|
||||
def export_rows(self, **kwargs):
|
||||
"""Return an iterable of parsed rows of a project's data."""
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#! /usr/bin/env python
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Script to provide a command line interface to a OpenRefine server.
|
||||
Script to provide a command line interface to a Refine server.
|
||||
"""
|
||||
|
||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
||||
|
|
|
@ -37,7 +37,7 @@ class RefineServerTest(refinetest.RefineTestCase):
|
|||
self.assertTrue(item in version_info)
|
||||
|
||||
def test_version(self):
|
||||
self.assertTrue(self.server.version in ('2.0', '2.1', '2.5', '2.6', '2.7', '2.8'))
|
||||
self.assertTrue(self.server.version in ('2.0', '2.1', '2.5'))
|
||||
|
||||
|
||||
class RefineTest(refinetest.RefineTestCase):
|
||||
|
|
|
@ -22,7 +22,7 @@ from tests import refinetest
|
|||
|
||||
class TutorialTestFacets(refinetest.RefineTestCase):
|
||||
project_file = 'louisiana-elected-officials.csv'
|
||||
project_options = {'guessCellValueTypes': True}
|
||||
project_options = {'guess_cell_value_types': True}
|
||||
|
||||
def test_get_rows(self):
|
||||
# Section "2. Exploration using Facets": {3}
|
||||
|
@ -130,7 +130,7 @@ class TutorialTestFacets(refinetest.RefineTestCase):
|
|||
|
||||
class TutorialTestEditing(refinetest.RefineTestCase):
|
||||
project_file = 'louisiana-elected-officials.csv'
|
||||
project_options = {'guessCellValueTypes': True}
|
||||
project_options = {'guess_cell_value_types': True}
|
||||
|
||||
def test_editing(self):
|
||||
# Section "3. Cell Editing": {1}
|
||||
|
@ -138,7 +138,7 @@ class TutorialTestEditing(refinetest.RefineTestCase):
|
|||
# {2}
|
||||
self.project.text_transform(column='Zip Code 2',
|
||||
expression='value.toString()[0, 5]')
|
||||
self.assertInResponse('transform on 6958 cells in column Zip Code 2')
|
||||
self.assertInResponse('transform on 6067 cells in column Zip Code 2')
|
||||
# {3} - XXX history
|
||||
# {4}
|
||||
office_title_facet = facet.TextFacet('Office Title')
|
||||
|
@ -162,8 +162,8 @@ class TutorialTestEditing(refinetest.RefineTestCase):
|
|||
self.assertEqual(len(clusters), 7)
|
||||
first_cluster = clusters[0]
|
||||
self.assertEqual(len(first_cluster), 2)
|
||||
self.assertEqual(first_cluster[0]['value'], 'DPEC Member at Large')
|
||||
self.assertEqual(first_cluster[0]['count'], 6)
|
||||
self.assertEqual(first_cluster[0]['value'], 'RSCC Member at Large')
|
||||
self.assertEqual(first_cluster[0]['count'], 233)
|
||||
# Not strictly necessary to repeat 'Council Member' but a test
|
||||
# of mass_edit, and it's also what the front end sends.
|
||||
self.project.mass_edit('Office Title', [{
|
||||
|
@ -194,9 +194,9 @@ class TutorialTestEditing(refinetest.RefineTestCase):
|
|||
# {5}, {6}, {7}
|
||||
response = self.project.compute_facets(facet.StarredFacet(True))
|
||||
self.assertEqual(len(response.facets[0].choices), 2) # true & false
|
||||
self.assertEqual(response.facets[0].choices[True].count, 2)
|
||||
self.assertEqual(response.facets[0].choices[True].count, 3)
|
||||
self.project.remove_rows()
|
||||
self.assertInResponse('2 rows')
|
||||
self.assertInResponse('3 rows')
|
||||
|
||||
|
||||
class TutorialTestDuplicateDetection(refinetest.RefineTestCase):
|
||||
|
@ -286,7 +286,7 @@ class TutorialTestTransposeFixedNumberOfRowsIntoColumns(
|
|||
refinetest.RefineTestCase):
|
||||
project_file = 'fixed-rows.csv'
|
||||
project_format = 'text/line-based'
|
||||
project_options = {'headerLines': 0}
|
||||
project_options = {'header_lines': 0}
|
||||
|
||||
def test_transpose_fixed_number_of_rows_into_columns(self):
|
||||
if self.server.version not in ('2.0', '2.1'):
|
||||
|
@ -360,7 +360,7 @@ class TutorialTestTransposeVariableNumberOfRowsIntoColumns(
|
|||
refinetest.RefineTestCase):
|
||||
project_file = 'variable-rows.csv'
|
||||
project_format = 'text/line-based'
|
||||
project_options = {'headerLines': 0}
|
||||
project_options = {'header_lines': 0}
|
||||
|
||||
def test_transpose_variable_number_of_rows_into_columns(self):
|
||||
# {20}, {21}
|
||||
|
|
Loading…
Reference in New Issue