diff --git a/google/refine.py b/google/refine.py index b88f299..32fa920 100644 --- a/google/refine.py +++ b/google/refine.py @@ -179,7 +179,9 @@ class RefineServer(object): """Open a Refine URL, optionally POST data, and return parsed JSON.""" response = json.loads(self.urlopen(*args, **kwargs).read()) if 'code' in response and response['code'] != 'ok': - raise Exception(response['code'] + ': ' + response['message']) + raise Exception( + response['code'] + ': ' + + response.get('message', response.get('stack', response))) return response @@ -415,3 +417,34 @@ class RefineProject: 'engine': self.engine.as_json(), 'columnName': column, 'expression': expression, 'edits': edits}) return response + + clusterer_defaults = { + 'binning': { + 'type': 'binning', + 'function': 'fingerprint', + 'params': {}, + }, + 'knn': { + 'type': 'knn', + 'function': 'levenshtein', + 'params': { + 'radius': 1, + 'blocking-ngram-size': 6, + }, + }, + } + def compute_clusters(self, column, clusterer_type='binning', + function=None, params=None): + """Returns a list of clusters of {'value': ..., 'count': ...}.""" + clusterer = self.clusterer_defaults[clusterer_type] + if params is not None: + clusterer['params'] = params + if function is not None: + clusterer['function'] = function + clusterer['column'] = column + response = self.do_json('compute-clusters', { + 'engine': self.engine.as_json(), + 'clusterer': json.dumps(clusterer)}) + return [[{'value': x['v'], 'count': x['c']} for x in cluster] + for cluster in response] + diff --git a/google/test/test_refine.py b/google/test/test_refine.py index bf603f7..85d0f20 100644 --- a/google/test/test_refine.py +++ b/google/test/test_refine.py @@ -190,10 +190,10 @@ class TutorialTestFacets(RefineTestCase): self.assertEqual(cd.numeric_count, 548) -class TutorialTestTransformAndClustering(RefineTestCase): +class TutorialTestEditing(RefineTestCase): project_file = 'louisiana-elected-officials.csv' - def test_transform(self): + def test_editing(self): # Section "3. Cell Editing": {1} self.project.engine.remove_all() # redundant due to setUp # {2} @@ -206,15 +206,34 @@ class TutorialTestTransformAndClustering(RefineTestCase): self.project.engine.add_facet(office_title_facet) response = self.project.compute_facets() self.assertEqual(len(response.facets[0].choices), 76) - response = self.project.text_transform(column='Office Title', - expression='value.trim()') + response = self.project.text_transform('Office Title', 'value.trim()') self.assertTrue('6895' in response['historyEntry']['description']) response = self.project.compute_facets() self.assertEqual(len(response.facets[0].choices), 67) # {5} - response = self.project.edit(column='Office Title', + response = self.project.edit('Office Title', 'Councilmen', 'Councilman') self.assertTrue('13' in response['historyEntry']['description']) + response = self.project.compute_facets() + self.assertEqual(len(response.facets[0].choices), 66) + # {6} + response = self.project.compute_clusters('Office Title') + self.assertTrue(not response) + # {7} + clusters = self.project.compute_clusters('Office Title', 'knn') + self.assertEqual(len(clusters), 7) + self.assertEqual(len(clusters[0]), 2) + self.assertEqual(clusters[0][0]['value'], 'RSCC Member') + self.assertEqual(clusters[0][0]['count'], 233) + # Not strictly necessary to repeat 'Council Member' but a test + # of mass_edit, and it's also what the front end sends. + response = self.project.mass_edit('Office Title', [{ + 'from': ['Council Member', 'Councilmember'], + 'to': 'Council Member' + }]) + self.assertTrue('372' in response['historyEntry']['description']) + response = self.project.compute_facets() + self.assertEqual(len(response.facets[0].choices), 65) if __name__ == '__main__':