Add reorder_rows(), and supporting Sorting class.

This commit is contained in:
Paul Makepeace 2011-04-25 02:49:19 -04:00
parent ca2b690a17
commit 191d93e33f
3 changed files with 90 additions and 12 deletions

View File

@ -171,6 +171,35 @@ class Engine(object):
facet.reset()
class Sorting(object):
"""Class representing the current sorting order for a project.
Used in RefineProject.get_rows()"""
def __init__(self, criteria=None):
self.criteria = []
if criteria is None:
criteria = []
if not isinstance(criteria, list):
criteria = [criteria]
for criterion in criteria:
if isinstance(criterion, basestring):
criterion = {
'column': criterion,
'valueType': 'string',
'caseSensitive': False,
}
criterion.setdefault('reverse', False)
criterion.setdefault('errorPosition', 1)
criterion.setdefault('blankPosition', 2)
self.criteria.append(criterion)
def as_json(self):
return json.dumps({'criteria': self.criteria})
def __len__(self):
return len(self.criteria)
class RefineServer(object):
"""Communicate with a Refine server."""
@ -333,6 +362,7 @@ class RowsResponse(object):
class RefineProject:
"""A Google Refine project."""
def __init__(self, server, project_id=None, project_name=None):
if not isinstance(server, RefineServer):
url = urlparse.urlparse(server)
@ -353,6 +383,7 @@ class RefineProject:
self.column_index = {}
self.get_models()
self.engine = Engine()
self.sorting = Sorting()
def do_raw(self, command, data):
"""Issue a command to the server & return a response object."""
@ -364,7 +395,8 @@ class RefineProject:
if data is None:
data = {}
data['engine'] = self.engine.as_json()
return self.server.urlopen_json(command, project_id=self.project_id, data=data)
return self.server.urlopen_json(command, project_id=self.project_id,
data=data)
def get_models(self):
"""Fill out column metadata."""
@ -391,10 +423,9 @@ class RefineProject:
def apply_operations(self, file_path, wait=True):
json = open(file_path).read()
response_json = self.do('apply-operations', {'operations': json})
if response_json['code'] == 'pending':
if wait:
self.wait_until_idle()
return 'ok'
if response_json['code'] == 'pending' and wait:
self.wait_until_idle()
return 'ok'
return response_json['code'] # can be 'ok' or 'pending'
def export(self, export_format='tsv'):
@ -417,13 +448,22 @@ class RefineProject:
response = self.do_json('compute-facets')
return FacetsResponse(response)
def get_rows(self, facets=None, start=0, limit=10):
def get_rows(self, facets=None, sort_by=None, start=0, limit=10):
if facets:
self.engine = Engine(facets)
response = self.do_json('get-rows', {
'sorting': "{'criteria': []}", 'start': start, 'limit': limit})
if sort_by is not None:
self.sorting = Sorting(sort_by)
response = self.do_json('get-rows', {'sorting': self.sorting.as_json(),
'start': start, 'limit': limit})
return RowsResponse(response)
def reorder_rows(self, sort_by=None):
if sort_by is not None:
self.sorting = Sorting(sort_by)
response = self.do_json('reorder-rows',
{'sorting': self.sorting.as_json()})
return response
def remove_rows(self, facets=None):
if facets:
self.engine = Engine(facets)

View File

@ -7,14 +7,15 @@ Created by Paul Makepeace on 2011-04-22.
Copyright (c) 2011 Real Programmers. All rights reserved.
"""
import json
import json
import os
import sys
import unittest
import urllib
from google.refine import TextFacet, NumericFacet, StarredFacet, FlaggedFacet
from google.refine import Engine, FacetsResponse
from google.refine import Engine, Sorting, FacetsResponse
class FacetTest(unittest.TestCase):
def test_init(self):
@ -42,6 +43,25 @@ class FacetTest(unittest.TestCase):
facet = NumericFacet(column='column', From=1, to=5)
self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'})
def test_sorting(self):
sorting = Sorting()
self.assertEqual(sorting.as_json(), '{"criteria": []}')
sorting = Sorting('email')
c = sorting.criteria[0]
self.assertEqual(c['column'], 'email')
self.assertEqual(c['valueType'], 'string')
self.assertEqual(c['reverse'], False)
self.assertEqual(c['caseSensitive'], False)
self.assertEqual(c['errorPosition'], 1)
self.assertEqual(c['blankPosition'], 2)
sorting = Sorting(['email', 'gender'])
self.assertEqual(len(sorting), 2)
sorting = Sorting(['email', {'column': 'date', 'valueType': 'date'}])
self.assertEqual(len(sorting), 2)
c = sorting.criteria[1]
self.assertEqual(c['column'], 'date')
self.assertEqual(c['valueType'], 'date')
def test_add_facet(self):
facet = TextFacet(column='Party Code')
engine = Engine(facet)
@ -74,7 +94,6 @@ class FacetTest(unittest.TestCase):
engine.remove_all()
self.assertEqual(len(engine), 0)
def test_facets_response(self):
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
response = FacetsResponse(json.loads(response))

View File

@ -235,7 +235,7 @@ class TutorialTestEditing(RefineTestCase):
response = self.project.compute_facets()
self.assertEqual(len(response.facets[0].choices), 65)
# Section "4. Row and Column Editing"
# Section "4. Row and Column Editing, Batched Row Deletion"
# Test doesn't strictly follow the tutorial as the "Browse this
# cluster" performs a text facet which the server can't complete
# as it busts its max facet count. The useful work is done with
@ -261,5 +261,24 @@ class TutorialTestEditing(RefineTestCase):
self.assertTrue('3 rows' in response['historyEntry']['description'])
class TutorialTestDuplicateDetection(RefineTestCase):
project_file = 'duplicates.csv'
def test_duplicate_detection(self):
# Section "4. Row and Column Editing,
# Duplicate Row Detection and Deletion"
# {7}, {8}
response = self.project.get_rows(sort_by='email')
indexes = [r.index for r in response.rows]
self.assertEqual(indexes, [4, 9, 8, 3, 0, 2, 5, 6, 1, 7])
# {9}
response = self.project.reorder_rows()
self.assertEqual('Reorder rows',
response['historyEntry']['description'])
response = self.project.get_rows(sort_by='email')
indexes = [r.index for r in response.rows]
self.assertEqual(indexes, range(10))
if __name__ == '__main__':
unittest.main()