Add get_rows() and supporting row classes. Add len() to engine for number of facets. Add test for Engine.add_facet()

This commit is contained in:
Paul Makepeace 2011-04-24 12:43:11 -04:00
parent 0586e55ea8
commit 26bc2030a2
3 changed files with 76 additions and 22 deletions

View File

@ -44,6 +44,7 @@ class Facet(object):
'invert': self.invert,
}
class FacetResponse(object):
def __init__(self, facet):
self.name = facet['name']
@ -75,19 +76,23 @@ class Engine(object):
facets = [facets]
self.facets = facets
self.mode = mode
def as_dict(self):
return {
'facets': [f.as_dict() for f in self.facets], # XXX how with json?
'mode': self.mode,
}
def __len__(self):
return len(self.facets)
def as_json(self):
return json.dumps(self.as_dict())
def add_facet(self, facet):
self.facets.append(facet)
class RefineServer(object):
"""Communicate with a Refine server."""
@ -103,7 +108,7 @@ class RefineServer(object):
if 'delete' in command:
data['project'] = project_id
else:
url += '?project=' + project_id
url += '?project=' + project_id
req = urllib2.Request(url)
if data:
req.add_data(data) # data = urllib.urlencode(data)
@ -115,7 +120,7 @@ class RefineServer(object):
# XXX Monkey patch response's filehandle. Better way?
urllib.addbase.__init__(response, gzip_fp)
return response
def urlopen_json(self, *args, **kwargs):
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
response = self.urlopen(*args, **kwargs)
@ -136,14 +141,14 @@ class Refine:
def get_version(self):
"""Return version data.
{"revision":"r1836","full_version":"2.0 [r1836]",
"full_name":"Google Refine 2.0 [r1836]","version":"2.0"}"""
return self.server.urlopen_json('get-version')
def list_projects(self):
"""Return a dict of projects indexed by id & name.
{u'1877818633188': {
'id': u'1877818633188', u'name': u'akg',
u'modified': u'2011-04-07T12:30:07Z',
@ -193,7 +198,7 @@ class Refine:
'split-into-columns': s(split_into_columns), 'separator': s(separator),
'ignore': s(ignore_initial_non_blank_lines), 'header-lines': s(header_lines),
'skip': s(skip_initial_data_rows), 'limit': s(limit),
'guess-value-type': s(guess_value_type),
'guess-value-type': s(guess_value_type),
'ignore-quotes': s(ignore_quotes),
}
if project_url is not None:
@ -218,6 +223,32 @@ class Refine:
raise Exception('Project not created')
class RowsResponse(object):
class RefineRows(object):
class RefineRow(object):
def __init__(self, row_response):
self.flagged = row_response['flagged']
self.starred = row_response['starred']
self.row = [c['v'] if c else None for c in row_response['cells']]
def __init__(self, rows_response):
self.rows_response = rows_response
def __iter__(self):
for row_response in self.rows_response:
yield self.RefineRow(row_response)
def __len__(self):
return len(self.rows_response)
def __init__(self, response):
self.mode = response['mode']
self.filtered = response['filtered']
self.start = response['start']
self.limit = response['limit']
self.total = response['total']
self.pool = response['pool'] # {"reconCandidates": {},"recons": {}}
self.rows = self.RefineRows(response['rows'])
class RefineProject:
"""A Google Refine project."""
def __init__(self, server, project_id=None, project_name=None):
@ -239,6 +270,7 @@ class RefineProject:
self.columns = [] # columns & column_index filled in by get_models()
self.column_index = {}
self.get_models()
self.engine = Engine()
def do_raw(self, command, data):
"""Issue a command to the server & return a response object."""
@ -253,7 +285,7 @@ class RefineProject:
response = self.do_json('get-models')
column_model = response['columnModel']
columns = column_model['columns']
# Pre-extend the list in python
# Pre-extend the list in python
self.columns = [None] * (1 + max(c['cellIndex'] for c in columns))
for column in columns:
cell_index, name = column['cellIndex'], column['name']
@ -278,7 +310,7 @@ class RefineProject:
self.wait_until_idle()
return 'ok'
return response_json['code'] # can be 'ok' or 'pending'
def export(self, export_format='tsv'):
"""Return a fileobject of a project's data."""
data = {
@ -295,9 +327,15 @@ class RefineProject:
def delete(self):
response_json = self.do_json('delete-project')
return 'code' in response_json and response_json['code'] == 'ok'
def text_facet(self, facets=None, engine=None, mode='row-based'):
if not engine:
engine = Engine(facets, mode)
response = self.do_json('compute-facets', {'engine': engine.as_json()})
return FacetsResponse(response)
def text_facet(self, facets=None):
if facets:
self.engine = Engine(facets)
response = self.do_json('compute-facets',
{'engine': self.engine.as_json()})
return FacetsResponse(response)
def get_rows(self, engine=None, start=0, limit=10):
response = self.do_json('get-rows', {'start': start, 'limit': limit})
return RowsResponse(response)

View File

@ -22,12 +22,19 @@ class FacetTest(unittest.TestCase):
self.assertTrue(str(engine))
facet2 = Facet('Ethnicity')
engine.add_facet(facet2)
print engine.as_json()
self.assertEqual(len(engine.facets), 2)
self.assertEqual(len(engine), 2)
def test_serialize(self):
engine = Engine()
engine_json = engine.as_json()
self.assertEqual(engine_json, '{"facets": [], "mode": "row-based"}')
def test_add_facet(self):
facet = Facet(column='Party Code')
engine = Engine(facet)
engine.add_facet(Facet(column='Ethnicity'))
self.assertEqual(len(engine.facets), 2)
def test_facets_response(self):
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""

View File

@ -15,7 +15,7 @@ from google.refine import Facet, Engine
from google.refine import RefineServer, Refine, RefineProject
PATH_TO_TEST_DATA = os.path.join('google', 'test', 'data')
class RefineTestCase(unittest.TestCase):
project_file = None
project = None
@ -30,8 +30,8 @@ class RefineTestCase(unittest.TestCase):
if self.project:
self.project.delete()
self.project = None
class RefineServerTest(RefineTestCase):
def test_init(self):
self.assertEqual(self.server.server, 'http://%s:%s' % (REFINE_HOST, REFINE_PORT))
@ -58,7 +58,7 @@ class RefineTest(RefineTestCase):
self.assertEqual(self.project.key_column, 'email')
self.assertTrue('email' in self.project.columns)
self.assertEqual(self.project.column_index['name'], 1)
def test_delete_project(self):
self.assertTrue(self.project.delete())
@ -66,6 +66,15 @@ class RefineTest(RefineTestCase):
class TutorialTestFacets(RefineTestCase):
project_file = 'louisiana-elected-officials.csv'
def test_get_rows(self):
response = self.project.get_rows(limit=10)
self.assertEqual(len(response.rows), 10)
self.assertEqual(response.limit, 10)
self.assertEqual(response.total, 6958)
for row in response.rows:
self.assertFalse(row.flagged)
self.assertFalse(row.starred)
def test_basic_facet(self):
facet = Facet(column='Party Code')
facets = self.project.text_facet(facet)
@ -74,10 +83,10 @@ class TutorialTestFacets(RefineTestCase):
self.assertEqual(pc.choices['D'].count, 3700)
self.assertEqual(pc.choices['N'].count, 15)
self.assertEqual(pc.blank_choice.count, 1446)
engine = Engine(facet)
engine.add_facet(Facet(column='Ethnicity'))
facets = self.project.text_facet(engine=engine)
self.project.engine = engine
facets = self.project.text_facet()
e = facets.facets[1]
self.assertEqual(e.choices['B'].count, 1255)
self.assertEqual(e.choices['W'].count, 4469)