From 26bc2030a27f1abef968e9b43768e87ff935c49a Mon Sep 17 00:00:00 2001 From: Paul Makepeace Date: Sun, 24 Apr 2011 12:43:11 -0400 Subject: [PATCH] Add get_rows() and supporting row classes. Add len() to engine for number of facets. Add test for Engine.add_facet() --- google/refine.py | 68 +++++++++++++++++++++++++++++--------- google/test/test_engine.py | 9 ++++- google/test/test_refine.py | 21 ++++++++---- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/google/refine.py b/google/refine.py index 2810eb4..a49c1eb 100644 --- a/google/refine.py +++ b/google/refine.py @@ -44,6 +44,7 @@ class Facet(object): 'invert': self.invert, } + class FacetResponse(object): def __init__(self, facet): self.name = facet['name'] @@ -75,19 +76,23 @@ class Engine(object): facets = [facets] self.facets = facets self.mode = mode - + def as_dict(self): return { 'facets': [f.as_dict() for f in self.facets], # XXX how with json? 'mode': self.mode, } + def __len__(self): + return len(self.facets) + def as_json(self): return json.dumps(self.as_dict()) def add_facet(self, facet): self.facets.append(facet) + class RefineServer(object): """Communicate with a Refine server.""" @@ -103,7 +108,7 @@ class RefineServer(object): if 'delete' in command: data['project'] = project_id else: - url += '?project=' + project_id + url += '?project=' + project_id req = urllib2.Request(url) if data: req.add_data(data) # data = urllib.urlencode(data) @@ -115,7 +120,7 @@ class RefineServer(object): # XXX Monkey patch response's filehandle. Better way? urllib.addbase.__init__(response, gzip_fp) return response - + def urlopen_json(self, *args, **kwargs): """Open a Refine URL, optionally POST data, and return parsed JSON.""" response = self.urlopen(*args, **kwargs) @@ -136,14 +141,14 @@ class Refine: def get_version(self): """Return version data. - + {"revision":"r1836","full_version":"2.0 [r1836]", "full_name":"Google Refine 2.0 [r1836]","version":"2.0"}""" return self.server.urlopen_json('get-version') - + def list_projects(self): """Return a dict of projects indexed by id & name. - + {u'1877818633188': { 'id': u'1877818633188', u'name': u'akg', u'modified': u'2011-04-07T12:30:07Z', @@ -193,7 +198,7 @@ class Refine: 'split-into-columns': s(split_into_columns), 'separator': s(separator), 'ignore': s(ignore_initial_non_blank_lines), 'header-lines': s(header_lines), 'skip': s(skip_initial_data_rows), 'limit': s(limit), - 'guess-value-type': s(guess_value_type), + 'guess-value-type': s(guess_value_type), 'ignore-quotes': s(ignore_quotes), } if project_url is not None: @@ -218,6 +223,32 @@ class Refine: raise Exception('Project not created') +class RowsResponse(object): + class RefineRows(object): + class RefineRow(object): + def __init__(self, row_response): + self.flagged = row_response['flagged'] + self.starred = row_response['starred'] + self.row = [c['v'] if c else None for c in row_response['cells']] + + def __init__(self, rows_response): + self.rows_response = rows_response + def __iter__(self): + for row_response in self.rows_response: + yield self.RefineRow(row_response) + def __len__(self): + return len(self.rows_response) + + def __init__(self, response): + self.mode = response['mode'] + self.filtered = response['filtered'] + self.start = response['start'] + self.limit = response['limit'] + self.total = response['total'] + self.pool = response['pool'] # {"reconCandidates": {},"recons": {}} + self.rows = self.RefineRows(response['rows']) + + class RefineProject: """A Google Refine project.""" def __init__(self, server, project_id=None, project_name=None): @@ -239,6 +270,7 @@ class RefineProject: self.columns = [] # columns & column_index filled in by get_models() self.column_index = {} self.get_models() + self.engine = Engine() def do_raw(self, command, data): """Issue a command to the server & return a response object.""" @@ -253,7 +285,7 @@ class RefineProject: response = self.do_json('get-models') column_model = response['columnModel'] columns = column_model['columns'] - # Pre-extend the list in python + # Pre-extend the list in python self.columns = [None] * (1 + max(c['cellIndex'] for c in columns)) for column in columns: cell_index, name = column['cellIndex'], column['name'] @@ -278,7 +310,7 @@ class RefineProject: self.wait_until_idle() return 'ok' return response_json['code'] # can be 'ok' or 'pending' - + def export(self, export_format='tsv'): """Return a fileobject of a project's data.""" data = { @@ -295,9 +327,15 @@ class RefineProject: def delete(self): response_json = self.do_json('delete-project') return 'code' in response_json and response_json['code'] == 'ok' - - def text_facet(self, facets=None, engine=None, mode='row-based'): - if not engine: - engine = Engine(facets, mode) - response = self.do_json('compute-facets', {'engine': engine.as_json()}) - return FacetsResponse(response) \ No newline at end of file + + def text_facet(self, facets=None): + if facets: + self.engine = Engine(facets) + response = self.do_json('compute-facets', + {'engine': self.engine.as_json()}) + return FacetsResponse(response) + + def get_rows(self, engine=None, start=0, limit=10): + response = self.do_json('get-rows', {'start': start, 'limit': limit}) + return RowsResponse(response) + diff --git a/google/test/test_engine.py b/google/test/test_engine.py index 187d091..71c6075 100644 --- a/google/test/test_engine.py +++ b/google/test/test_engine.py @@ -22,12 +22,19 @@ class FacetTest(unittest.TestCase): self.assertTrue(str(engine)) facet2 = Facet('Ethnicity') engine.add_facet(facet2) - print engine.as_json() + self.assertEqual(len(engine.facets), 2) + self.assertEqual(len(engine), 2) def test_serialize(self): engine = Engine() engine_json = engine.as_json() self.assertEqual(engine_json, '{"facets": [], "mode": "row-based"}') + + def test_add_facet(self): + facet = Facet(column='Party Code') + engine = Engine(facet) + engine.add_facet(Facet(column='Ethnicity')) + self.assertEqual(len(engine.facets), 2) def test_facets_response(self): response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}""" diff --git a/google/test/test_refine.py b/google/test/test_refine.py index f464452..8a69491 100644 --- a/google/test/test_refine.py +++ b/google/test/test_refine.py @@ -15,7 +15,7 @@ from google.refine import Facet, Engine from google.refine import RefineServer, Refine, RefineProject PATH_TO_TEST_DATA = os.path.join('google', 'test', 'data') - + class RefineTestCase(unittest.TestCase): project_file = None project = None @@ -30,8 +30,8 @@ class RefineTestCase(unittest.TestCase): if self.project: self.project.delete() self.project = None - - + + class RefineServerTest(RefineTestCase): def test_init(self): self.assertEqual(self.server.server, 'http://%s:%s' % (REFINE_HOST, REFINE_PORT)) @@ -58,7 +58,7 @@ class RefineTest(RefineTestCase): self.assertEqual(self.project.key_column, 'email') self.assertTrue('email' in self.project.columns) self.assertEqual(self.project.column_index['name'], 1) - + def test_delete_project(self): self.assertTrue(self.project.delete()) @@ -66,6 +66,15 @@ class RefineTest(RefineTestCase): class TutorialTestFacets(RefineTestCase): project_file = 'louisiana-elected-officials.csv' + def test_get_rows(self): + response = self.project.get_rows(limit=10) + self.assertEqual(len(response.rows), 10) + self.assertEqual(response.limit, 10) + self.assertEqual(response.total, 6958) + for row in response.rows: + self.assertFalse(row.flagged) + self.assertFalse(row.starred) + def test_basic_facet(self): facet = Facet(column='Party Code') facets = self.project.text_facet(facet) @@ -74,10 +83,10 @@ class TutorialTestFacets(RefineTestCase): self.assertEqual(pc.choices['D'].count, 3700) self.assertEqual(pc.choices['N'].count, 15) self.assertEqual(pc.blank_choice.count, 1446) - engine = Engine(facet) engine.add_facet(Facet(column='Ethnicity')) - facets = self.project.text_facet(engine=engine) + self.project.engine = engine + facets = self.project.text_facet() e = facets.facets[1] self.assertEqual(e.choices['B'].count, 1255) self.assertEqual(e.choices['W'].count, 4469)