Add get_rows() and supporting row classes. Add len() to engine for number of facets. Add test for Engine.add_facet()
This commit is contained in:
parent
0586e55ea8
commit
26bc2030a2
|
@ -44,6 +44,7 @@ class Facet(object):
|
|||
'invert': self.invert,
|
||||
}
|
||||
|
||||
|
||||
class FacetResponse(object):
|
||||
def __init__(self, facet):
|
||||
self.name = facet['name']
|
||||
|
@ -75,19 +76,23 @@ class Engine(object):
|
|||
facets = [facets]
|
||||
self.facets = facets
|
||||
self.mode = mode
|
||||
|
||||
|
||||
def as_dict(self):
|
||||
return {
|
||||
'facets': [f.as_dict() for f in self.facets], # XXX how with json?
|
||||
'mode': self.mode,
|
||||
}
|
||||
|
||||
def __len__(self):
|
||||
return len(self.facets)
|
||||
|
||||
def as_json(self):
|
||||
return json.dumps(self.as_dict())
|
||||
|
||||
def add_facet(self, facet):
|
||||
self.facets.append(facet)
|
||||
|
||||
|
||||
class RefineServer(object):
|
||||
"""Communicate with a Refine server."""
|
||||
|
||||
|
@ -103,7 +108,7 @@ class RefineServer(object):
|
|||
if 'delete' in command:
|
||||
data['project'] = project_id
|
||||
else:
|
||||
url += '?project=' + project_id
|
||||
url += '?project=' + project_id
|
||||
req = urllib2.Request(url)
|
||||
if data:
|
||||
req.add_data(data) # data = urllib.urlencode(data)
|
||||
|
@ -115,7 +120,7 @@ class RefineServer(object):
|
|||
# XXX Monkey patch response's filehandle. Better way?
|
||||
urllib.addbase.__init__(response, gzip_fp)
|
||||
return response
|
||||
|
||||
|
||||
def urlopen_json(self, *args, **kwargs):
|
||||
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
|
||||
response = self.urlopen(*args, **kwargs)
|
||||
|
@ -136,14 +141,14 @@ class Refine:
|
|||
|
||||
def get_version(self):
|
||||
"""Return version data.
|
||||
|
||||
|
||||
{"revision":"r1836","full_version":"2.0 [r1836]",
|
||||
"full_name":"Google Refine 2.0 [r1836]","version":"2.0"}"""
|
||||
return self.server.urlopen_json('get-version')
|
||||
|
||||
|
||||
def list_projects(self):
|
||||
"""Return a dict of projects indexed by id & name.
|
||||
|
||||
|
||||
{u'1877818633188': {
|
||||
'id': u'1877818633188', u'name': u'akg',
|
||||
u'modified': u'2011-04-07T12:30:07Z',
|
||||
|
@ -193,7 +198,7 @@ class Refine:
|
|||
'split-into-columns': s(split_into_columns), 'separator': s(separator),
|
||||
'ignore': s(ignore_initial_non_blank_lines), 'header-lines': s(header_lines),
|
||||
'skip': s(skip_initial_data_rows), 'limit': s(limit),
|
||||
'guess-value-type': s(guess_value_type),
|
||||
'guess-value-type': s(guess_value_type),
|
||||
'ignore-quotes': s(ignore_quotes),
|
||||
}
|
||||
if project_url is not None:
|
||||
|
@ -218,6 +223,32 @@ class Refine:
|
|||
raise Exception('Project not created')
|
||||
|
||||
|
||||
class RowsResponse(object):
|
||||
class RefineRows(object):
|
||||
class RefineRow(object):
|
||||
def __init__(self, row_response):
|
||||
self.flagged = row_response['flagged']
|
||||
self.starred = row_response['starred']
|
||||
self.row = [c['v'] if c else None for c in row_response['cells']]
|
||||
|
||||
def __init__(self, rows_response):
|
||||
self.rows_response = rows_response
|
||||
def __iter__(self):
|
||||
for row_response in self.rows_response:
|
||||
yield self.RefineRow(row_response)
|
||||
def __len__(self):
|
||||
return len(self.rows_response)
|
||||
|
||||
def __init__(self, response):
|
||||
self.mode = response['mode']
|
||||
self.filtered = response['filtered']
|
||||
self.start = response['start']
|
||||
self.limit = response['limit']
|
||||
self.total = response['total']
|
||||
self.pool = response['pool'] # {"reconCandidates": {},"recons": {}}
|
||||
self.rows = self.RefineRows(response['rows'])
|
||||
|
||||
|
||||
class RefineProject:
|
||||
"""A Google Refine project."""
|
||||
def __init__(self, server, project_id=None, project_name=None):
|
||||
|
@ -239,6 +270,7 @@ class RefineProject:
|
|||
self.columns = [] # columns & column_index filled in by get_models()
|
||||
self.column_index = {}
|
||||
self.get_models()
|
||||
self.engine = Engine()
|
||||
|
||||
def do_raw(self, command, data):
|
||||
"""Issue a command to the server & return a response object."""
|
||||
|
@ -253,7 +285,7 @@ class RefineProject:
|
|||
response = self.do_json('get-models')
|
||||
column_model = response['columnModel']
|
||||
columns = column_model['columns']
|
||||
# Pre-extend the list in python
|
||||
# Pre-extend the list in python
|
||||
self.columns = [None] * (1 + max(c['cellIndex'] for c in columns))
|
||||
for column in columns:
|
||||
cell_index, name = column['cellIndex'], column['name']
|
||||
|
@ -278,7 +310,7 @@ class RefineProject:
|
|||
self.wait_until_idle()
|
||||
return 'ok'
|
||||
return response_json['code'] # can be 'ok' or 'pending'
|
||||
|
||||
|
||||
def export(self, export_format='tsv'):
|
||||
"""Return a fileobject of a project's data."""
|
||||
data = {
|
||||
|
@ -295,9 +327,15 @@ class RefineProject:
|
|||
def delete(self):
|
||||
response_json = self.do_json('delete-project')
|
||||
return 'code' in response_json and response_json['code'] == 'ok'
|
||||
|
||||
def text_facet(self, facets=None, engine=None, mode='row-based'):
|
||||
if not engine:
|
||||
engine = Engine(facets, mode)
|
||||
response = self.do_json('compute-facets', {'engine': engine.as_json()})
|
||||
return FacetsResponse(response)
|
||||
|
||||
def text_facet(self, facets=None):
|
||||
if facets:
|
||||
self.engine = Engine(facets)
|
||||
response = self.do_json('compute-facets',
|
||||
{'engine': self.engine.as_json()})
|
||||
return FacetsResponse(response)
|
||||
|
||||
def get_rows(self, engine=None, start=0, limit=10):
|
||||
response = self.do_json('get-rows', {'start': start, 'limit': limit})
|
||||
return RowsResponse(response)
|
||||
|
||||
|
|
|
@ -22,12 +22,19 @@ class FacetTest(unittest.TestCase):
|
|||
self.assertTrue(str(engine))
|
||||
facet2 = Facet('Ethnicity')
|
||||
engine.add_facet(facet2)
|
||||
print engine.as_json()
|
||||
self.assertEqual(len(engine.facets), 2)
|
||||
self.assertEqual(len(engine), 2)
|
||||
|
||||
def test_serialize(self):
|
||||
engine = Engine()
|
||||
engine_json = engine.as_json()
|
||||
self.assertEqual(engine_json, '{"facets": [], "mode": "row-based"}')
|
||||
|
||||
def test_add_facet(self):
|
||||
facet = Facet(column='Party Code')
|
||||
engine = Engine(facet)
|
||||
engine.add_facet(Facet(column='Ethnicity'))
|
||||
self.assertEqual(len(engine.facets), 2)
|
||||
|
||||
def test_facets_response(self):
|
||||
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
|
||||
|
|
|
@ -15,7 +15,7 @@ from google.refine import Facet, Engine
|
|||
from google.refine import RefineServer, Refine, RefineProject
|
||||
|
||||
PATH_TO_TEST_DATA = os.path.join('google', 'test', 'data')
|
||||
|
||||
|
||||
class RefineTestCase(unittest.TestCase):
|
||||
project_file = None
|
||||
project = None
|
||||
|
@ -30,8 +30,8 @@ class RefineTestCase(unittest.TestCase):
|
|||
if self.project:
|
||||
self.project.delete()
|
||||
self.project = None
|
||||
|
||||
|
||||
|
||||
|
||||
class RefineServerTest(RefineTestCase):
|
||||
def test_init(self):
|
||||
self.assertEqual(self.server.server, 'http://%s:%s' % (REFINE_HOST, REFINE_PORT))
|
||||
|
@ -58,7 +58,7 @@ class RefineTest(RefineTestCase):
|
|||
self.assertEqual(self.project.key_column, 'email')
|
||||
self.assertTrue('email' in self.project.columns)
|
||||
self.assertEqual(self.project.column_index['name'], 1)
|
||||
|
||||
|
||||
def test_delete_project(self):
|
||||
self.assertTrue(self.project.delete())
|
||||
|
||||
|
@ -66,6 +66,15 @@ class RefineTest(RefineTestCase):
|
|||
class TutorialTestFacets(RefineTestCase):
|
||||
project_file = 'louisiana-elected-officials.csv'
|
||||
|
||||
def test_get_rows(self):
|
||||
response = self.project.get_rows(limit=10)
|
||||
self.assertEqual(len(response.rows), 10)
|
||||
self.assertEqual(response.limit, 10)
|
||||
self.assertEqual(response.total, 6958)
|
||||
for row in response.rows:
|
||||
self.assertFalse(row.flagged)
|
||||
self.assertFalse(row.starred)
|
||||
|
||||
def test_basic_facet(self):
|
||||
facet = Facet(column='Party Code')
|
||||
facets = self.project.text_facet(facet)
|
||||
|
@ -74,10 +83,10 @@ class TutorialTestFacets(RefineTestCase):
|
|||
self.assertEqual(pc.choices['D'].count, 3700)
|
||||
self.assertEqual(pc.choices['N'].count, 15)
|
||||
self.assertEqual(pc.blank_choice.count, 1446)
|
||||
|
||||
engine = Engine(facet)
|
||||
engine.add_facet(Facet(column='Ethnicity'))
|
||||
facets = self.project.text_facet(engine=engine)
|
||||
self.project.engine = engine
|
||||
facets = self.project.text_facet()
|
||||
e = facets.facets[1]
|
||||
self.assertEqual(e.choices['B'].count, 1255)
|
||||
self.assertEqual(e.choices['W'].count, 4469)
|
||||
|
|
Loading…
Reference in New Issue