Add reorder_rows(), and supporting Sorting class.
This commit is contained in:
parent
ca2b690a17
commit
191d93e33f
|
@ -171,6 +171,35 @@ class Engine(object):
|
|||
facet.reset()
|
||||
|
||||
|
||||
class Sorting(object):
|
||||
"""Class representing the current sorting order for a project.
|
||||
|
||||
Used in RefineProject.get_rows()"""
|
||||
def __init__(self, criteria=None):
|
||||
self.criteria = []
|
||||
if criteria is None:
|
||||
criteria = []
|
||||
if not isinstance(criteria, list):
|
||||
criteria = [criteria]
|
||||
for criterion in criteria:
|
||||
if isinstance(criterion, basestring):
|
||||
criterion = {
|
||||
'column': criterion,
|
||||
'valueType': 'string',
|
||||
'caseSensitive': False,
|
||||
}
|
||||
criterion.setdefault('reverse', False)
|
||||
criterion.setdefault('errorPosition', 1)
|
||||
criterion.setdefault('blankPosition', 2)
|
||||
self.criteria.append(criterion)
|
||||
|
||||
def as_json(self):
|
||||
return json.dumps({'criteria': self.criteria})
|
||||
|
||||
def __len__(self):
|
||||
return len(self.criteria)
|
||||
|
||||
|
||||
class RefineServer(object):
|
||||
"""Communicate with a Refine server."""
|
||||
|
||||
|
@ -333,6 +362,7 @@ class RowsResponse(object):
|
|||
|
||||
class RefineProject:
|
||||
"""A Google Refine project."""
|
||||
|
||||
def __init__(self, server, project_id=None, project_name=None):
|
||||
if not isinstance(server, RefineServer):
|
||||
url = urlparse.urlparse(server)
|
||||
|
@ -353,6 +383,7 @@ class RefineProject:
|
|||
self.column_index = {}
|
||||
self.get_models()
|
||||
self.engine = Engine()
|
||||
self.sorting = Sorting()
|
||||
|
||||
def do_raw(self, command, data):
|
||||
"""Issue a command to the server & return a response object."""
|
||||
|
@ -364,7 +395,8 @@ class RefineProject:
|
|||
if data is None:
|
||||
data = {}
|
||||
data['engine'] = self.engine.as_json()
|
||||
return self.server.urlopen_json(command, project_id=self.project_id, data=data)
|
||||
return self.server.urlopen_json(command, project_id=self.project_id,
|
||||
data=data)
|
||||
|
||||
def get_models(self):
|
||||
"""Fill out column metadata."""
|
||||
|
@ -391,8 +423,7 @@ class RefineProject:
|
|||
def apply_operations(self, file_path, wait=True):
|
||||
json = open(file_path).read()
|
||||
response_json = self.do('apply-operations', {'operations': json})
|
||||
if response_json['code'] == 'pending':
|
||||
if wait:
|
||||
if response_json['code'] == 'pending' and wait:
|
||||
self.wait_until_idle()
|
||||
return 'ok'
|
||||
return response_json['code'] # can be 'ok' or 'pending'
|
||||
|
@ -417,13 +448,22 @@ class RefineProject:
|
|||
response = self.do_json('compute-facets')
|
||||
return FacetsResponse(response)
|
||||
|
||||
def get_rows(self, facets=None, start=0, limit=10):
|
||||
def get_rows(self, facets=None, sort_by=None, start=0, limit=10):
|
||||
if facets:
|
||||
self.engine = Engine(facets)
|
||||
response = self.do_json('get-rows', {
|
||||
'sorting': "{'criteria': []}", 'start': start, 'limit': limit})
|
||||
if sort_by is not None:
|
||||
self.sorting = Sorting(sort_by)
|
||||
response = self.do_json('get-rows', {'sorting': self.sorting.as_json(),
|
||||
'start': start, 'limit': limit})
|
||||
return RowsResponse(response)
|
||||
|
||||
def reorder_rows(self, sort_by=None):
|
||||
if sort_by is not None:
|
||||
self.sorting = Sorting(sort_by)
|
||||
response = self.do_json('reorder-rows',
|
||||
{'sorting': self.sorting.as_json()})
|
||||
return response
|
||||
|
||||
def remove_rows(self, facets=None):
|
||||
if facets:
|
||||
self.engine = Engine(facets)
|
||||
|
|
|
@ -7,14 +7,15 @@ Created by Paul Makepeace on 2011-04-22.
|
|||
Copyright (c) 2011 Real Programmers. All rights reserved.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
import urllib
|
||||
from google.refine import TextFacet, NumericFacet, StarredFacet, FlaggedFacet
|
||||
from google.refine import Engine, FacetsResponse
|
||||
from google.refine import Engine, Sorting, FacetsResponse
|
||||
|
||||
|
||||
class FacetTest(unittest.TestCase):
|
||||
def test_init(self):
|
||||
|
@ -42,6 +43,25 @@ class FacetTest(unittest.TestCase):
|
|||
facet = NumericFacet(column='column', From=1, to=5)
|
||||
self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'})
|
||||
|
||||
def test_sorting(self):
|
||||
sorting = Sorting()
|
||||
self.assertEqual(sorting.as_json(), '{"criteria": []}')
|
||||
sorting = Sorting('email')
|
||||
c = sorting.criteria[0]
|
||||
self.assertEqual(c['column'], 'email')
|
||||
self.assertEqual(c['valueType'], 'string')
|
||||
self.assertEqual(c['reverse'], False)
|
||||
self.assertEqual(c['caseSensitive'], False)
|
||||
self.assertEqual(c['errorPosition'], 1)
|
||||
self.assertEqual(c['blankPosition'], 2)
|
||||
sorting = Sorting(['email', 'gender'])
|
||||
self.assertEqual(len(sorting), 2)
|
||||
sorting = Sorting(['email', {'column': 'date', 'valueType': 'date'}])
|
||||
self.assertEqual(len(sorting), 2)
|
||||
c = sorting.criteria[1]
|
||||
self.assertEqual(c['column'], 'date')
|
||||
self.assertEqual(c['valueType'], 'date')
|
||||
|
||||
def test_add_facet(self):
|
||||
facet = TextFacet(column='Party Code')
|
||||
engine = Engine(facet)
|
||||
|
@ -74,7 +94,6 @@ class FacetTest(unittest.TestCase):
|
|||
engine.remove_all()
|
||||
self.assertEqual(len(engine), 0)
|
||||
|
||||
|
||||
def test_facets_response(self):
|
||||
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
|
||||
response = FacetsResponse(json.loads(response))
|
||||
|
|
|
@ -235,7 +235,7 @@ class TutorialTestEditing(RefineTestCase):
|
|||
response = self.project.compute_facets()
|
||||
self.assertEqual(len(response.facets[0].choices), 65)
|
||||
|
||||
# Section "4. Row and Column Editing"
|
||||
# Section "4. Row and Column Editing, Batched Row Deletion"
|
||||
# Test doesn't strictly follow the tutorial as the "Browse this
|
||||
# cluster" performs a text facet which the server can't complete
|
||||
# as it busts its max facet count. The useful work is done with
|
||||
|
@ -261,5 +261,24 @@ class TutorialTestEditing(RefineTestCase):
|
|||
self.assertTrue('3 rows' in response['historyEntry']['description'])
|
||||
|
||||
|
||||
class TutorialTestDuplicateDetection(RefineTestCase):
|
||||
project_file = 'duplicates.csv'
|
||||
|
||||
def test_duplicate_detection(self):
|
||||
# Section "4. Row and Column Editing,
|
||||
# Duplicate Row Detection and Deletion"
|
||||
# {7}, {8}
|
||||
response = self.project.get_rows(sort_by='email')
|
||||
indexes = [r.index for r in response.rows]
|
||||
self.assertEqual(indexes, [4, 9, 8, 3, 0, 2, 5, 6, 1, 7])
|
||||
# {9}
|
||||
response = self.project.reorder_rows()
|
||||
self.assertEqual('Reorder rows',
|
||||
response['historyEntry']['description'])
|
||||
response = self.project.get_rows(sort_by='email')
|
||||
indexes = [r.index for r in response.rows]
|
||||
self.assertEqual(indexes, range(10))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in New Issue