From d769f761b9a3361166df49fcac67b069b975de18 Mon Sep 17 00:00:00 2001 From: Paul Makepeace Date: Mon, 25 Apr 2011 10:32:11 -0400 Subject: [PATCH] Correct misunderstanding about how columnModel's cellIndex is working. --- google/refine.py | 24 ++++++++++++++++-------- google/test/test_refine.py | 8 +++++++- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/google/refine.py b/google/refine.py index 395370b..b19b6e4 100644 --- a/google/refine.py +++ b/google/refine.py @@ -340,7 +340,8 @@ class RowsResponse(object): self.flagged = row_response['flagged'] self.starred = row_response['starred'] self.index = row_response['i'] - self.row = [c['v'] if c else None for c in row_response['cells']] + self.row = [c['v'] if c else None + for c in row_response['cells']] def __init__(self, rows_response): self.rows_response = rows_response @@ -380,7 +381,8 @@ class RefineProject: self.project_id = project_id self.project_name = project_name self.columns = [] # columns & column_index filled in by get_models() - self.column_index = {} + self.column_index = {} # index into data from get_rows() + self.column_order = {} # order of column in UI self.get_models() self.engine = Engine() self.sorting = Sorting() @@ -399,23 +401,28 @@ class RefineProject: data=data) def get_models(self): - """Fill out column metadata.""" + """Fill out column metadata. + + column structure is sent in a list of columns in their order. + The cellIndex is used to find that column's data when returned from + get_rows().""" response = self.do_json('get-models', include_engine=False) column_model = response['columnModel'] columns = column_model['columns'] # Pre-extend the list in python - self.columns = [None] * (1 + max(c['cellIndex'] for c in columns)) - for column in columns: + self.columns = [None] * len(columns) + for i, column in enumerate(columns): cell_index, name = column['cellIndex'], column['name'] + self.column_order[name] = i self.column_index[name] = cell_index - self.columns[cell_index] = name + self.columns[i] = name self.key_column = column_model['keyColumnName'] # TODO: implement rest def wait_until_idle(self, polling_delay=0.5): while True: - response_json = self.do('get-processes') - if 'processes' in response_json and len(response_json['processes']) > 0: + response = self.do('get-processes') + if 'processes' in response and len(response['processes']) > 0: time.sleep(polling_delay) else: return @@ -537,4 +544,5 @@ class RefineProject: response = self.do_json('add-column', {'baseColumnName': column, 'newColumnName': new_column, 'expression': expression, 'columnInsertIndex': column_insert_index, 'onError': on_error}) + self.get_models() return response diff --git a/google/test/test_refine.py b/google/test/test_refine.py index d9e7238..cba4b01 100644 --- a/google/test/test_refine.py +++ b/google/test/test_refine.py @@ -283,7 +283,13 @@ class TutorialTestDuplicateDetection(RefineTestCase): 'facetCount(value, "value", "email")') self.assertTrue('column email by filling 10 rows' in response['historyEntry']['description']) - + response = self.project.get_rows() + self.assertEqual(self.project.column_order['count'], 1) # i.e. 2nd + # XXX retrieving column data is pretty vile + counts = [r.row[self.project.column_index['count']] + for r in response.rows] + self.assertEqual(counts, [2, 2, 1, 1, 3, 3, 3, 1, 2, 2]) + if __name__ == '__main__': unittest.main() \ No newline at end of file