Add compute_clusters(). Rename its testcase. Add stack trace to error reporting.
This commit is contained in:
parent
1371911c94
commit
140413e72a
|
@ -179,7 +179,9 @@ class RefineServer(object):
|
||||||
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
|
"""Open a Refine URL, optionally POST data, and return parsed JSON."""
|
||||||
response = json.loads(self.urlopen(*args, **kwargs).read())
|
response = json.loads(self.urlopen(*args, **kwargs).read())
|
||||||
if 'code' in response and response['code'] != 'ok':
|
if 'code' in response and response['code'] != 'ok':
|
||||||
raise Exception(response['code'] + ': ' + response['message'])
|
raise Exception(
|
||||||
|
response['code'] + ': ' +
|
||||||
|
response.get('message', response.get('stack', response)))
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
@ -415,3 +417,34 @@ class RefineProject:
|
||||||
'engine': self.engine.as_json(), 'columnName': column,
|
'engine': self.engine.as_json(), 'columnName': column,
|
||||||
'expression': expression, 'edits': edits})
|
'expression': expression, 'edits': edits})
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
clusterer_defaults = {
|
||||||
|
'binning': {
|
||||||
|
'type': 'binning',
|
||||||
|
'function': 'fingerprint',
|
||||||
|
'params': {},
|
||||||
|
},
|
||||||
|
'knn': {
|
||||||
|
'type': 'knn',
|
||||||
|
'function': 'levenshtein',
|
||||||
|
'params': {
|
||||||
|
'radius': 1,
|
||||||
|
'blocking-ngram-size': 6,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
def compute_clusters(self, column, clusterer_type='binning',
|
||||||
|
function=None, params=None):
|
||||||
|
"""Returns a list of clusters of {'value': ..., 'count': ...}."""
|
||||||
|
clusterer = self.clusterer_defaults[clusterer_type]
|
||||||
|
if params is not None:
|
||||||
|
clusterer['params'] = params
|
||||||
|
if function is not None:
|
||||||
|
clusterer['function'] = function
|
||||||
|
clusterer['column'] = column
|
||||||
|
response = self.do_json('compute-clusters', {
|
||||||
|
'engine': self.engine.as_json(),
|
||||||
|
'clusterer': json.dumps(clusterer)})
|
||||||
|
return [[{'value': x['v'], 'count': x['c']} for x in cluster]
|
||||||
|
for cluster in response]
|
||||||
|
|
||||||
|
|
|
@ -190,10 +190,10 @@ class TutorialTestFacets(RefineTestCase):
|
||||||
self.assertEqual(cd.numeric_count, 548)
|
self.assertEqual(cd.numeric_count, 548)
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestTransformAndClustering(RefineTestCase):
|
class TutorialTestEditing(RefineTestCase):
|
||||||
project_file = 'louisiana-elected-officials.csv'
|
project_file = 'louisiana-elected-officials.csv'
|
||||||
|
|
||||||
def test_transform(self):
|
def test_editing(self):
|
||||||
# Section "3. Cell Editing": {1}
|
# Section "3. Cell Editing": {1}
|
||||||
self.project.engine.remove_all() # redundant due to setUp
|
self.project.engine.remove_all() # redundant due to setUp
|
||||||
# {2}
|
# {2}
|
||||||
|
@ -206,15 +206,34 @@ class TutorialTestTransformAndClustering(RefineTestCase):
|
||||||
self.project.engine.add_facet(office_title_facet)
|
self.project.engine.add_facet(office_title_facet)
|
||||||
response = self.project.compute_facets()
|
response = self.project.compute_facets()
|
||||||
self.assertEqual(len(response.facets[0].choices), 76)
|
self.assertEqual(len(response.facets[0].choices), 76)
|
||||||
response = self.project.text_transform(column='Office Title',
|
response = self.project.text_transform('Office Title', 'value.trim()')
|
||||||
expression='value.trim()')
|
|
||||||
self.assertTrue('6895' in response['historyEntry']['description'])
|
self.assertTrue('6895' in response['historyEntry']['description'])
|
||||||
response = self.project.compute_facets()
|
response = self.project.compute_facets()
|
||||||
self.assertEqual(len(response.facets[0].choices), 67)
|
self.assertEqual(len(response.facets[0].choices), 67)
|
||||||
# {5}
|
# {5}
|
||||||
response = self.project.edit(column='Office Title',
|
response = self.project.edit('Office Title',
|
||||||
'Councilmen', 'Councilman')
|
'Councilmen', 'Councilman')
|
||||||
self.assertTrue('13' in response['historyEntry']['description'])
|
self.assertTrue('13' in response['historyEntry']['description'])
|
||||||
|
response = self.project.compute_facets()
|
||||||
|
self.assertEqual(len(response.facets[0].choices), 66)
|
||||||
|
# {6}
|
||||||
|
response = self.project.compute_clusters('Office Title')
|
||||||
|
self.assertTrue(not response)
|
||||||
|
# {7}
|
||||||
|
clusters = self.project.compute_clusters('Office Title', 'knn')
|
||||||
|
self.assertEqual(len(clusters), 7)
|
||||||
|
self.assertEqual(len(clusters[0]), 2)
|
||||||
|
self.assertEqual(clusters[0][0]['value'], 'RSCC Member')
|
||||||
|
self.assertEqual(clusters[0][0]['count'], 233)
|
||||||
|
# Not strictly necessary to repeat 'Council Member' but a test
|
||||||
|
# of mass_edit, and it's also what the front end sends.
|
||||||
|
response = self.project.mass_edit('Office Title', [{
|
||||||
|
'from': ['Council Member', 'Councilmember'],
|
||||||
|
'to': 'Council Member'
|
||||||
|
}])
|
||||||
|
self.assertTrue('372' in response['historyEntry']['description'])
|
||||||
|
response = self.project.compute_facets()
|
||||||
|
self.assertEqual(len(response.facets[0].choices), 65)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue