From a03212ca948b42eaf95d441265816278ebfb3a77 Mon Sep 17 00:00:00 2001 From: Paul Makepeace Date: Sun, 24 Apr 2011 20:19:45 -0400 Subject: [PATCH] Add text_transform(). Remove dead engine param from get_rows(). Make error reporting in urlopen_json show returned 'code'. --- google/refine.py | 29 ++++++++++++++++++----------- google/test/test_refine.py | 23 +++++++++++++++++++++++ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/google/refine.py b/google/refine.py index 42c0a04..33de914 100644 --- a/google/refine.py +++ b/google/refine.py @@ -177,12 +177,10 @@ class RefineServer(object): def urlopen_json(self, *args, **kwargs): """Open a Refine URL, optionally POST data, and return parsed JSON.""" - response = self.urlopen(*args, **kwargs) - data = response.read() - response_json = json.loads(data) - if 'code' in response_json and response_json['code'] == 'error': - raise Exception(response_json['message']) - return response_json + response = json.loads(self.urlopen(*args, **kwargs).read()) + if 'code' in response and response['code'] != 'ok': + raise Exception(response['code'] + ': ' + response['message']) + return response class Refine: @@ -249,8 +247,10 @@ class Refine: return '' return str(opt) options = { - 'split-into-columns': s(split_into_columns), 'separator': s(separator), - 'ignore': s(ignore_initial_non_blank_lines), 'header-lines': s(header_lines), + 'split-into-columns': s(split_into_columns), + 'separator': s(separator), + 'ignore': s(ignore_initial_non_blank_lines), + 'header-lines': s(header_lines), 'skip': s(skip_initial_data_rows), 'limit': s(limit), 'guess-value-type': s(guess_value_type), 'ignore-quotes': s(ignore_quotes), @@ -318,8 +318,8 @@ class RefineProject: if not project_id and not project_name: raise Exception('Missing Refine project ID and name; need at least one of those') if not project_name or not project_id: - project_id, project_name = Refine(server).get_project_id_name(project_name or - project_id) + project_id, project_name = Refine(server).get_project_id_name( + project_name or project_id) self.project_id = project_id self.project_name = project_name self.columns = [] # columns & column_index filled in by get_models() @@ -390,9 +390,16 @@ class RefineProject: {'engine': self.engine.as_json()}) return FacetsResponse(response) - def get_rows(self, engine=None, start=0, limit=10): + def get_rows(self, start=0, limit=10): response = self.do_json('get-rows', { 'sorting': "{'criteria': []}", 'engine': self.engine.as_json(), 'start': start, 'limit': limit}) return RowsResponse(response) + def text_transform(self, column, expression, on_error='set-to-blank', + repeat=False, repeat_count=10): + response = self.do_json('text-transform', { + 'engine': self.engine.as_json(), 'columnName': column, + 'expression': expression, 'onError': on_error, 'repeat': repeat, + 'repeatCount': repeat_count}) + return response \ No newline at end of file diff --git a/google/test/test_refine.py b/google/test/test_refine.py index b3895f1..57d0f85 100644 --- a/google/test/test_refine.py +++ b/google/test/test_refine.py @@ -190,5 +190,28 @@ class TutorialTestFacets(RefineTestCase): self.assertEqual(cd.numeric_count, 548) +class TutorialTestTransformAndClustering(RefineTestCase): + project_file = 'louisiana-elected-officials.csv' + + def test_transform(self): + # Section "3. Cell Editing": {1} + self.project.engine.remove_all() # redundant due to setUp + # {2} + response = self.project.text_transform(column='Zip Code 2', + expression='value.toString()[0, 5]') + self.assertTrue('6067' in response['historyEntry']['description']) + # {3} - XXX history + # {4} + office_title_facet = TextFacet('Office Title') + self.project.engine.add_facet(office_title_facet) + response = self.project.compute_facets() + self.assertEqual(len(response.facets[0].choices), 76) + response = self.project.text_transform(column='Office Title', + expression='value.trim()') + self.assertTrue('6895' in response['historyEntry']['description']) + response = self.project.compute_facets() + self.assertEqual(len(response.facets[0].choices), 67) + + if __name__ == '__main__': unittest.main() \ No newline at end of file