From 478fae7d9172a52e1296094151b41d2a509be3e0 Mon Sep 17 00:00:00 2001 From: Paul Makepeace Date: Tue, 26 Apr 2011 00:30:20 -0400 Subject: [PATCH] Add split_column(), move_column(). Allow RefineRow to have missing data and return None. This seems to happen when column data at the end of a row is all null. --- google/refine.py | 23 ++++++++++++++++++- google/test/test_refine.py | 47 +++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/google/refine.py b/google/refine.py index aaab918..ef03f7f 100644 --- a/google/refine.py +++ b/google/refine.py @@ -354,7 +354,11 @@ def RowsResponseFactory(column_index): self.row = [c['v'] if c else None for c in row_response['cells']] def __getitem__(self, column): - return self.row[column_index[column]] + # Trailing nulls seem to be stripped from row data + try: + return self.row[column_index[column]] + except IndexError: + return None def __init__(self, rows_response): self.rows_response = rows_response @@ -567,6 +571,16 @@ class RefineProject: self.get_models() return response + def split_column(self, column, separator=',', mode='separator', + regex=False, guess_cell_type=True, + remove_original_column=True): + response = self.do_json('split-column', {'columnName': column, + 'separator': separator, 'mode': mode, 'regex': regex, + 'guessCellType': guess_cell_type, + 'removeOriginalColumn': remove_original_column}) + self.get_models() + return response + def rename_column(self, column, new_column): response = self.do_json('rename-column', {'oldColumnName': column, 'newColumnName': new_column}) @@ -580,6 +594,13 @@ class RefineProject: self.get_models() return response + def move_column(self, column, index): + """Move column to a new position.""" + response = self.do_json('move-column', {'columnName': column, + 'index': index}) + self.get_models() + return response + def blank_down(self, column): response = self.do_json('blank-down', {'columnName': column}) self.get_models() diff --git a/google/test/test_refine.py b/google/test/test_refine.py index a8c0358..36524ab 100644 --- a/google/test/test_refine.py +++ b/google/test/test_refine.py @@ -430,9 +430,54 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(RefineTestCase): class TutorialTestTransposeVariableNumbeOfRowsIntoColumns(RefineTestCase): project_file = 'variable-rows.csv' + project_file_options = {'split_into_columns': False, + 'header_lines': 0} def test_transpose_variable_number_of_rows_into_columns(self): - pass + # {20}, {21} + response = self.project.add_column('Column', 'First Line', + 'if(value.contains(" on "), value, null)') + self.assertTrue('Column by filling 4 rows' in + response['historyEntry']['description']) + response = self.project.get_rows() + first_names = [row['First Line'][0:10] if row['First Line'] else None + for row in response.rows] + self.assertEqual(first_names, ['Tom Dalton', None, None, None, + 'Morgan Law', None, None, None, None, 'Eric Batem']) + # {22} + response = self.project.move_column('First Line', 0) + self.assertTrue('Move column First Line to position 0' in + response['historyEntry']['description']) + self.assertEqual(self.project.column_order['First Line'], 0) + # {23} + self.project.engine.mode = 'record-based' + response = self.project.get_rows() + self.assertEqual(response.mode, 'record-based') + self.assertEqual(response.filtered, 4) + # {24} + response = self.project.add_column('Column', 'Status', + 'row.record.cells["Column"].value[-1]') + self.assertTrue('filling 18 rows' in + response['historyEntry']['description']) + # {25} + response = self.project.text_transform('Column', + 'row.record.cells["Column"].value[1, -1].join("|")') + self.assertTrue('18 cells' in + response['historyEntry']['description']) + # {26} + self.project.engine.mode = 'row-based' + # {27} + blank_facet = TextFacet('First Line', expression='isBlank(value)', + selection=True) + response = self.project.remove_rows(blank_facet) + self.assertEqual('Remove 14 rows', + response['historyEntry']['description']) + self.project.engine.remove_all() + # {28} + 'Split 4 cell(s) in column Column into several columns by separator' + response = self.project.split_column('Column', separator='|') + self.assertTrue('Split 4 cell(s) in column Column' in + response['historyEntry']['description']) if __name__ == '__main__':