Add reorder_rows(), and supporting Sorting class.

This commit is contained in:
Paul Makepeace 2011-04-25 02:49:19 -04:00
parent ca2b690a17
commit 191d93e33f
3 changed files with 90 additions and 12 deletions

View File

@ -171,6 +171,35 @@ class Engine(object):
facet.reset() facet.reset()
class Sorting(object):
"""Class representing the current sorting order for a project.
Used in RefineProject.get_rows()"""
def __init__(self, criteria=None):
self.criteria = []
if criteria is None:
criteria = []
if not isinstance(criteria, list):
criteria = [criteria]
for criterion in criteria:
if isinstance(criterion, basestring):
criterion = {
'column': criterion,
'valueType': 'string',
'caseSensitive': False,
}
criterion.setdefault('reverse', False)
criterion.setdefault('errorPosition', 1)
criterion.setdefault('blankPosition', 2)
self.criteria.append(criterion)
def as_json(self):
return json.dumps({'criteria': self.criteria})
def __len__(self):
return len(self.criteria)
class RefineServer(object): class RefineServer(object):
"""Communicate with a Refine server.""" """Communicate with a Refine server."""
@ -333,6 +362,7 @@ class RowsResponse(object):
class RefineProject: class RefineProject:
"""A Google Refine project.""" """A Google Refine project."""
def __init__(self, server, project_id=None, project_name=None): def __init__(self, server, project_id=None, project_name=None):
if not isinstance(server, RefineServer): if not isinstance(server, RefineServer):
url = urlparse.urlparse(server) url = urlparse.urlparse(server)
@ -353,6 +383,7 @@ class RefineProject:
self.column_index = {} self.column_index = {}
self.get_models() self.get_models()
self.engine = Engine() self.engine = Engine()
self.sorting = Sorting()
def do_raw(self, command, data): def do_raw(self, command, data):
"""Issue a command to the server & return a response object.""" """Issue a command to the server & return a response object."""
@ -364,7 +395,8 @@ class RefineProject:
if data is None: if data is None:
data = {} data = {}
data['engine'] = self.engine.as_json() data['engine'] = self.engine.as_json()
return self.server.urlopen_json(command, project_id=self.project_id, data=data) return self.server.urlopen_json(command, project_id=self.project_id,
data=data)
def get_models(self): def get_models(self):
"""Fill out column metadata.""" """Fill out column metadata."""
@ -391,8 +423,7 @@ class RefineProject:
def apply_operations(self, file_path, wait=True): def apply_operations(self, file_path, wait=True):
json = open(file_path).read() json = open(file_path).read()
response_json = self.do('apply-operations', {'operations': json}) response_json = self.do('apply-operations', {'operations': json})
if response_json['code'] == 'pending': if response_json['code'] == 'pending' and wait:
if wait:
self.wait_until_idle() self.wait_until_idle()
return 'ok' return 'ok'
return response_json['code'] # can be 'ok' or 'pending' return response_json['code'] # can be 'ok' or 'pending'
@ -417,13 +448,22 @@ class RefineProject:
response = self.do_json('compute-facets') response = self.do_json('compute-facets')
return FacetsResponse(response) return FacetsResponse(response)
def get_rows(self, facets=None, start=0, limit=10): def get_rows(self, facets=None, sort_by=None, start=0, limit=10):
if facets: if facets:
self.engine = Engine(facets) self.engine = Engine(facets)
response = self.do_json('get-rows', { if sort_by is not None:
'sorting': "{'criteria': []}", 'start': start, 'limit': limit}) self.sorting = Sorting(sort_by)
response = self.do_json('get-rows', {'sorting': self.sorting.as_json(),
'start': start, 'limit': limit})
return RowsResponse(response) return RowsResponse(response)
def reorder_rows(self, sort_by=None):
if sort_by is not None:
self.sorting = Sorting(sort_by)
response = self.do_json('reorder-rows',
{'sorting': self.sorting.as_json()})
return response
def remove_rows(self, facets=None): def remove_rows(self, facets=None):
if facets: if facets:
self.engine = Engine(facets) self.engine = Engine(facets)

View File

@ -7,14 +7,15 @@ Created by Paul Makepeace on 2011-04-22.
Copyright (c) 2011 Real Programmers. All rights reserved. Copyright (c) 2011 Real Programmers. All rights reserved.
""" """
import json
import json
import os import os
import sys import sys
import unittest import unittest
import urllib import urllib
from google.refine import TextFacet, NumericFacet, StarredFacet, FlaggedFacet from google.refine import TextFacet, NumericFacet, StarredFacet, FlaggedFacet
from google.refine import Engine, FacetsResponse from google.refine import Engine, Sorting, FacetsResponse
class FacetTest(unittest.TestCase): class FacetTest(unittest.TestCase):
def test_init(self): def test_init(self):
@ -42,6 +43,25 @@ class FacetTest(unittest.TestCase):
facet = NumericFacet(column='column', From=1, to=5) facet = NumericFacet(column='column', From=1, to=5)
self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'}) self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'})
def test_sorting(self):
sorting = Sorting()
self.assertEqual(sorting.as_json(), '{"criteria": []}')
sorting = Sorting('email')
c = sorting.criteria[0]
self.assertEqual(c['column'], 'email')
self.assertEqual(c['valueType'], 'string')
self.assertEqual(c['reverse'], False)
self.assertEqual(c['caseSensitive'], False)
self.assertEqual(c['errorPosition'], 1)
self.assertEqual(c['blankPosition'], 2)
sorting = Sorting(['email', 'gender'])
self.assertEqual(len(sorting), 2)
sorting = Sorting(['email', {'column': 'date', 'valueType': 'date'}])
self.assertEqual(len(sorting), 2)
c = sorting.criteria[1]
self.assertEqual(c['column'], 'date')
self.assertEqual(c['valueType'], 'date')
def test_add_facet(self): def test_add_facet(self):
facet = TextFacet(column='Party Code') facet = TextFacet(column='Party Code')
engine = Engine(facet) engine = Engine(facet)
@ -74,7 +94,6 @@ class FacetTest(unittest.TestCase):
engine.remove_all() engine.remove_all()
self.assertEqual(len(engine), 0) self.assertEqual(len(engine), 0)
def test_facets_response(self): def test_facets_response(self):
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}""" response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
response = FacetsResponse(json.loads(response)) response = FacetsResponse(json.loads(response))

View File

@ -235,7 +235,7 @@ class TutorialTestEditing(RefineTestCase):
response = self.project.compute_facets() response = self.project.compute_facets()
self.assertEqual(len(response.facets[0].choices), 65) self.assertEqual(len(response.facets[0].choices), 65)
# Section "4. Row and Column Editing" # Section "4. Row and Column Editing, Batched Row Deletion"
# Test doesn't strictly follow the tutorial as the "Browse this # Test doesn't strictly follow the tutorial as the "Browse this
# cluster" performs a text facet which the server can't complete # cluster" performs a text facet which the server can't complete
# as it busts its max facet count. The useful work is done with # as it busts its max facet count. The useful work is done with
@ -261,5 +261,24 @@ class TutorialTestEditing(RefineTestCase):
self.assertTrue('3 rows' in response['historyEntry']['description']) self.assertTrue('3 rows' in response['historyEntry']['description'])
class TutorialTestDuplicateDetection(RefineTestCase):
project_file = 'duplicates.csv'
def test_duplicate_detection(self):
# Section "4. Row and Column Editing,
# Duplicate Row Detection and Deletion"
# {7}, {8}
response = self.project.get_rows(sort_by='email')
indexes = [r.index for r in response.rows]
self.assertEqual(indexes, [4, 9, 8, 3, 0, 2, 5, 6, 1, 7])
# {9}
response = self.project.reorder_rows()
self.assertEqual('Reorder rows',
response['historyEntry']['description'])
response = self.project.get_rows(sort_by='email')
indexes = [r.index for r in response.rows]
self.assertEqual(indexes, range(10))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()