Add get_rows() and supporting row classes. Add len() to engine for number of facets. Add test for Engine.add_facet()
This commit is contained in:
parent
0586e55ea8
commit
26bc2030a2
|
@ -44,6 +44,7 @@ class Facet(object):
|
||||||
'invert': self.invert,
|
'invert': self.invert,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class FacetResponse(object):
|
class FacetResponse(object):
|
||||||
def __init__(self, facet):
|
def __init__(self, facet):
|
||||||
self.name = facet['name']
|
self.name = facet['name']
|
||||||
|
@ -75,19 +76,23 @@ class Engine(object):
|
||||||
facets = [facets]
|
facets = [facets]
|
||||||
self.facets = facets
|
self.facets = facets
|
||||||
self.mode = mode
|
self.mode = mode
|
||||||
|
|
||||||
def as_dict(self):
|
def as_dict(self):
|
||||||
return {
|
return {
|
||||||
'facets': [f.as_dict() for f in self.facets], # XXX how with json?
|
'facets': [f.as_dict() for f in self.facets], # XXX how with json?
|
||||||
'mode': self.mode,
|
'mode': self.mode,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.facets)
|
||||||
|
|
||||||
def as_json(self):
|
def as_json(self):
|
||||||
return json.dumps(self.as_dict())
|
return json.dumps(self.as_dict())
|
||||||
|
|
||||||
def add_facet(self, facet):
|
def add_facet(self, facet):
|
||||||
self.facets.append(facet)
|
self.facets.append(facet)
|
||||||
|
|
||||||
|
|
||||||
class RefineServer(object):
|
class RefineServer(object):
|
||||||
"""Communicate with a Refine server."""
|
"""Communicate with a Refine server."""
|
||||||
|
|
||||||
|
@ -103,7 +108,7 @@ class RefineServer(object):
|
||||||
if 'delete' in command:
|
if 'delete' in command:
|
||||||
data['project'] = project_id
|
data['project'] = project_id
|
||||||
else:
|
else:
|
||||||
url += '?project=' + project_id
|
url += '?project=' + project_id
|
||||||
req = urllib2.Request(url)
|
req = urllib2.Request(url)
|
||||||
if data:
|
if data:
|
||||||
req.add_data(data) # data = urllib.urlencode(data)
|
req.add_data(data) # data = urllib.urlencode(data)
|
||||||
|
@ -115,7 +120,7 @@ class RefineServer(object):
|
||||||
# XXX Monkey patch response's filehandle. Better way?
|
# XXX Monkey patch response's filehandle. Better way?
|
||||||
urllib.addbase.__init__(response, gzip_fp)
|
urllib.addbase.__init__(response, gzip_fp)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def urlopen_json(self, *args, **kwargs):
|
def urlopen_json(self, *args, **kwargs):
|
||||||
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
|
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
|
||||||
response = self.urlopen(*args, **kwargs)
|
response = self.urlopen(*args, **kwargs)
|
||||||
|
@ -136,14 +141,14 @@ class Refine:
|
||||||
|
|
||||||
def get_version(self):
|
def get_version(self):
|
||||||
"""Return version data.
|
"""Return version data.
|
||||||
|
|
||||||
{"revision":"r1836","full_version":"2.0 [r1836]",
|
{"revision":"r1836","full_version":"2.0 [r1836]",
|
||||||
"full_name":"Google Refine 2.0 [r1836]","version":"2.0"}"""
|
"full_name":"Google Refine 2.0 [r1836]","version":"2.0"}"""
|
||||||
return self.server.urlopen_json('get-version')
|
return self.server.urlopen_json('get-version')
|
||||||
|
|
||||||
def list_projects(self):
|
def list_projects(self):
|
||||||
"""Return a dict of projects indexed by id & name.
|
"""Return a dict of projects indexed by id & name.
|
||||||
|
|
||||||
{u'1877818633188': {
|
{u'1877818633188': {
|
||||||
'id': u'1877818633188', u'name': u'akg',
|
'id': u'1877818633188', u'name': u'akg',
|
||||||
u'modified': u'2011-04-07T12:30:07Z',
|
u'modified': u'2011-04-07T12:30:07Z',
|
||||||
|
@ -193,7 +198,7 @@ class Refine:
|
||||||
'split-into-columns': s(split_into_columns), 'separator': s(separator),
|
'split-into-columns': s(split_into_columns), 'separator': s(separator),
|
||||||
'ignore': s(ignore_initial_non_blank_lines), 'header-lines': s(header_lines),
|
'ignore': s(ignore_initial_non_blank_lines), 'header-lines': s(header_lines),
|
||||||
'skip': s(skip_initial_data_rows), 'limit': s(limit),
|
'skip': s(skip_initial_data_rows), 'limit': s(limit),
|
||||||
'guess-value-type': s(guess_value_type),
|
'guess-value-type': s(guess_value_type),
|
||||||
'ignore-quotes': s(ignore_quotes),
|
'ignore-quotes': s(ignore_quotes),
|
||||||
}
|
}
|
||||||
if project_url is not None:
|
if project_url is not None:
|
||||||
|
@ -218,6 +223,32 @@ class Refine:
|
||||||
raise Exception('Project not created')
|
raise Exception('Project not created')
|
||||||
|
|
||||||
|
|
||||||
|
class RowsResponse(object):
|
||||||
|
class RefineRows(object):
|
||||||
|
class RefineRow(object):
|
||||||
|
def __init__(self, row_response):
|
||||||
|
self.flagged = row_response['flagged']
|
||||||
|
self.starred = row_response['starred']
|
||||||
|
self.row = [c['v'] if c else None for c in row_response['cells']]
|
||||||
|
|
||||||
|
def __init__(self, rows_response):
|
||||||
|
self.rows_response = rows_response
|
||||||
|
def __iter__(self):
|
||||||
|
for row_response in self.rows_response:
|
||||||
|
yield self.RefineRow(row_response)
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.rows_response)
|
||||||
|
|
||||||
|
def __init__(self, response):
|
||||||
|
self.mode = response['mode']
|
||||||
|
self.filtered = response['filtered']
|
||||||
|
self.start = response['start']
|
||||||
|
self.limit = response['limit']
|
||||||
|
self.total = response['total']
|
||||||
|
self.pool = response['pool'] # {"reconCandidates": {},"recons": {}}
|
||||||
|
self.rows = self.RefineRows(response['rows'])
|
||||||
|
|
||||||
|
|
||||||
class RefineProject:
|
class RefineProject:
|
||||||
"""A Google Refine project."""
|
"""A Google Refine project."""
|
||||||
def __init__(self, server, project_id=None, project_name=None):
|
def __init__(self, server, project_id=None, project_name=None):
|
||||||
|
@ -239,6 +270,7 @@ class RefineProject:
|
||||||
self.columns = [] # columns & column_index filled in by get_models()
|
self.columns = [] # columns & column_index filled in by get_models()
|
||||||
self.column_index = {}
|
self.column_index = {}
|
||||||
self.get_models()
|
self.get_models()
|
||||||
|
self.engine = Engine()
|
||||||
|
|
||||||
def do_raw(self, command, data):
|
def do_raw(self, command, data):
|
||||||
"""Issue a command to the server & return a response object."""
|
"""Issue a command to the server & return a response object."""
|
||||||
|
@ -253,7 +285,7 @@ class RefineProject:
|
||||||
response = self.do_json('get-models')
|
response = self.do_json('get-models')
|
||||||
column_model = response['columnModel']
|
column_model = response['columnModel']
|
||||||
columns = column_model['columns']
|
columns = column_model['columns']
|
||||||
# Pre-extend the list in python
|
# Pre-extend the list in python
|
||||||
self.columns = [None] * (1 + max(c['cellIndex'] for c in columns))
|
self.columns = [None] * (1 + max(c['cellIndex'] for c in columns))
|
||||||
for column in columns:
|
for column in columns:
|
||||||
cell_index, name = column['cellIndex'], column['name']
|
cell_index, name = column['cellIndex'], column['name']
|
||||||
|
@ -278,7 +310,7 @@ class RefineProject:
|
||||||
self.wait_until_idle()
|
self.wait_until_idle()
|
||||||
return 'ok'
|
return 'ok'
|
||||||
return response_json['code'] # can be 'ok' or 'pending'
|
return response_json['code'] # can be 'ok' or 'pending'
|
||||||
|
|
||||||
def export(self, export_format='tsv'):
|
def export(self, export_format='tsv'):
|
||||||
"""Return a fileobject of a project's data."""
|
"""Return a fileobject of a project's data."""
|
||||||
data = {
|
data = {
|
||||||
|
@ -295,9 +327,15 @@ class RefineProject:
|
||||||
def delete(self):
|
def delete(self):
|
||||||
response_json = self.do_json('delete-project')
|
response_json = self.do_json('delete-project')
|
||||||
return 'code' in response_json and response_json['code'] == 'ok'
|
return 'code' in response_json and response_json['code'] == 'ok'
|
||||||
|
|
||||||
def text_facet(self, facets=None, engine=None, mode='row-based'):
|
def text_facet(self, facets=None):
|
||||||
if not engine:
|
if facets:
|
||||||
engine = Engine(facets, mode)
|
self.engine = Engine(facets)
|
||||||
response = self.do_json('compute-facets', {'engine': engine.as_json()})
|
response = self.do_json('compute-facets',
|
||||||
return FacetsResponse(response)
|
{'engine': self.engine.as_json()})
|
||||||
|
return FacetsResponse(response)
|
||||||
|
|
||||||
|
def get_rows(self, engine=None, start=0, limit=10):
|
||||||
|
response = self.do_json('get-rows', {'start': start, 'limit': limit})
|
||||||
|
return RowsResponse(response)
|
||||||
|
|
||||||
|
|
|
@ -22,12 +22,19 @@ class FacetTest(unittest.TestCase):
|
||||||
self.assertTrue(str(engine))
|
self.assertTrue(str(engine))
|
||||||
facet2 = Facet('Ethnicity')
|
facet2 = Facet('Ethnicity')
|
||||||
engine.add_facet(facet2)
|
engine.add_facet(facet2)
|
||||||
print engine.as_json()
|
self.assertEqual(len(engine.facets), 2)
|
||||||
|
self.assertEqual(len(engine), 2)
|
||||||
|
|
||||||
def test_serialize(self):
|
def test_serialize(self):
|
||||||
engine = Engine()
|
engine = Engine()
|
||||||
engine_json = engine.as_json()
|
engine_json = engine.as_json()
|
||||||
self.assertEqual(engine_json, '{"facets": [], "mode": "row-based"}')
|
self.assertEqual(engine_json, '{"facets": [], "mode": "row-based"}')
|
||||||
|
|
||||||
|
def test_add_facet(self):
|
||||||
|
facet = Facet(column='Party Code')
|
||||||
|
engine = Engine(facet)
|
||||||
|
engine.add_facet(Facet(column='Ethnicity'))
|
||||||
|
self.assertEqual(len(engine.facets), 2)
|
||||||
|
|
||||||
def test_facets_response(self):
|
def test_facets_response(self):
|
||||||
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
|
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
|
||||||
|
|
|
@ -15,7 +15,7 @@ from google.refine import Facet, Engine
|
||||||
from google.refine import RefineServer, Refine, RefineProject
|
from google.refine import RefineServer, Refine, RefineProject
|
||||||
|
|
||||||
PATH_TO_TEST_DATA = os.path.join('google', 'test', 'data')
|
PATH_TO_TEST_DATA = os.path.join('google', 'test', 'data')
|
||||||
|
|
||||||
class RefineTestCase(unittest.TestCase):
|
class RefineTestCase(unittest.TestCase):
|
||||||
project_file = None
|
project_file = None
|
||||||
project = None
|
project = None
|
||||||
|
@ -30,8 +30,8 @@ class RefineTestCase(unittest.TestCase):
|
||||||
if self.project:
|
if self.project:
|
||||||
self.project.delete()
|
self.project.delete()
|
||||||
self.project = None
|
self.project = None
|
||||||
|
|
||||||
|
|
||||||
class RefineServerTest(RefineTestCase):
|
class RefineServerTest(RefineTestCase):
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
self.assertEqual(self.server.server, 'http://%s:%s' % (REFINE_HOST, REFINE_PORT))
|
self.assertEqual(self.server.server, 'http://%s:%s' % (REFINE_HOST, REFINE_PORT))
|
||||||
|
@ -58,7 +58,7 @@ class RefineTest(RefineTestCase):
|
||||||
self.assertEqual(self.project.key_column, 'email')
|
self.assertEqual(self.project.key_column, 'email')
|
||||||
self.assertTrue('email' in self.project.columns)
|
self.assertTrue('email' in self.project.columns)
|
||||||
self.assertEqual(self.project.column_index['name'], 1)
|
self.assertEqual(self.project.column_index['name'], 1)
|
||||||
|
|
||||||
def test_delete_project(self):
|
def test_delete_project(self):
|
||||||
self.assertTrue(self.project.delete())
|
self.assertTrue(self.project.delete())
|
||||||
|
|
||||||
|
@ -66,6 +66,15 @@ class RefineTest(RefineTestCase):
|
||||||
class TutorialTestFacets(RefineTestCase):
|
class TutorialTestFacets(RefineTestCase):
|
||||||
project_file = 'louisiana-elected-officials.csv'
|
project_file = 'louisiana-elected-officials.csv'
|
||||||
|
|
||||||
|
def test_get_rows(self):
|
||||||
|
response = self.project.get_rows(limit=10)
|
||||||
|
self.assertEqual(len(response.rows), 10)
|
||||||
|
self.assertEqual(response.limit, 10)
|
||||||
|
self.assertEqual(response.total, 6958)
|
||||||
|
for row in response.rows:
|
||||||
|
self.assertFalse(row.flagged)
|
||||||
|
self.assertFalse(row.starred)
|
||||||
|
|
||||||
def test_basic_facet(self):
|
def test_basic_facet(self):
|
||||||
facet = Facet(column='Party Code')
|
facet = Facet(column='Party Code')
|
||||||
facets = self.project.text_facet(facet)
|
facets = self.project.text_facet(facet)
|
||||||
|
@ -74,10 +83,10 @@ class TutorialTestFacets(RefineTestCase):
|
||||||
self.assertEqual(pc.choices['D'].count, 3700)
|
self.assertEqual(pc.choices['D'].count, 3700)
|
||||||
self.assertEqual(pc.choices['N'].count, 15)
|
self.assertEqual(pc.choices['N'].count, 15)
|
||||||
self.assertEqual(pc.blank_choice.count, 1446)
|
self.assertEqual(pc.blank_choice.count, 1446)
|
||||||
|
|
||||||
engine = Engine(facet)
|
engine = Engine(facet)
|
||||||
engine.add_facet(Facet(column='Ethnicity'))
|
engine.add_facet(Facet(column='Ethnicity'))
|
||||||
facets = self.project.text_facet(engine=engine)
|
self.project.engine = engine
|
||||||
|
facets = self.project.text_facet()
|
||||||
e = facets.facets[1]
|
e = facets.facets[1]
|
||||||
self.assertEqual(e.choices['B'].count, 1255)
|
self.assertEqual(e.choices['B'].count, 1255)
|
||||||
self.assertEqual(e.choices['W'].count, 4469)
|
self.assertEqual(e.choices['W'].count, 4469)
|
||||||
|
|
Loading…
Reference in New Issue