Add reorder_rows(), and supporting Sorting class.
This commit is contained in:
parent
ca2b690a17
commit
191d93e33f
|
@ -171,6 +171,35 @@ class Engine(object):
|
||||||
facet.reset()
|
facet.reset()
|
||||||
|
|
||||||
|
|
||||||
|
class Sorting(object):
|
||||||
|
"""Class representing the current sorting order for a project.
|
||||||
|
|
||||||
|
Used in RefineProject.get_rows()"""
|
||||||
|
def __init__(self, criteria=None):
|
||||||
|
self.criteria = []
|
||||||
|
if criteria is None:
|
||||||
|
criteria = []
|
||||||
|
if not isinstance(criteria, list):
|
||||||
|
criteria = [criteria]
|
||||||
|
for criterion in criteria:
|
||||||
|
if isinstance(criterion, basestring):
|
||||||
|
criterion = {
|
||||||
|
'column': criterion,
|
||||||
|
'valueType': 'string',
|
||||||
|
'caseSensitive': False,
|
||||||
|
}
|
||||||
|
criterion.setdefault('reverse', False)
|
||||||
|
criterion.setdefault('errorPosition', 1)
|
||||||
|
criterion.setdefault('blankPosition', 2)
|
||||||
|
self.criteria.append(criterion)
|
||||||
|
|
||||||
|
def as_json(self):
|
||||||
|
return json.dumps({'criteria': self.criteria})
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.criteria)
|
||||||
|
|
||||||
|
|
||||||
class RefineServer(object):
|
class RefineServer(object):
|
||||||
"""Communicate with a Refine server."""
|
"""Communicate with a Refine server."""
|
||||||
|
|
||||||
|
@ -333,6 +362,7 @@ class RowsResponse(object):
|
||||||
|
|
||||||
class RefineProject:
|
class RefineProject:
|
||||||
"""A Google Refine project."""
|
"""A Google Refine project."""
|
||||||
|
|
||||||
def __init__(self, server, project_id=None, project_name=None):
|
def __init__(self, server, project_id=None, project_name=None):
|
||||||
if not isinstance(server, RefineServer):
|
if not isinstance(server, RefineServer):
|
||||||
url = urlparse.urlparse(server)
|
url = urlparse.urlparse(server)
|
||||||
|
@ -353,6 +383,7 @@ class RefineProject:
|
||||||
self.column_index = {}
|
self.column_index = {}
|
||||||
self.get_models()
|
self.get_models()
|
||||||
self.engine = Engine()
|
self.engine = Engine()
|
||||||
|
self.sorting = Sorting()
|
||||||
|
|
||||||
def do_raw(self, command, data):
|
def do_raw(self, command, data):
|
||||||
"""Issue a command to the server & return a response object."""
|
"""Issue a command to the server & return a response object."""
|
||||||
|
@ -364,7 +395,8 @@ class RefineProject:
|
||||||
if data is None:
|
if data is None:
|
||||||
data = {}
|
data = {}
|
||||||
data['engine'] = self.engine.as_json()
|
data['engine'] = self.engine.as_json()
|
||||||
return self.server.urlopen_json(command, project_id=self.project_id, data=data)
|
return self.server.urlopen_json(command, project_id=self.project_id,
|
||||||
|
data=data)
|
||||||
|
|
||||||
def get_models(self):
|
def get_models(self):
|
||||||
"""Fill out column metadata."""
|
"""Fill out column metadata."""
|
||||||
|
@ -391,10 +423,9 @@ class RefineProject:
|
||||||
def apply_operations(self, file_path, wait=True):
|
def apply_operations(self, file_path, wait=True):
|
||||||
json = open(file_path).read()
|
json = open(file_path).read()
|
||||||
response_json = self.do('apply-operations', {'operations': json})
|
response_json = self.do('apply-operations', {'operations': json})
|
||||||
if response_json['code'] == 'pending':
|
if response_json['code'] == 'pending' and wait:
|
||||||
if wait:
|
self.wait_until_idle()
|
||||||
self.wait_until_idle()
|
return 'ok'
|
||||||
return 'ok'
|
|
||||||
return response_json['code'] # can be 'ok' or 'pending'
|
return response_json['code'] # can be 'ok' or 'pending'
|
||||||
|
|
||||||
def export(self, export_format='tsv'):
|
def export(self, export_format='tsv'):
|
||||||
|
@ -417,13 +448,22 @@ class RefineProject:
|
||||||
response = self.do_json('compute-facets')
|
response = self.do_json('compute-facets')
|
||||||
return FacetsResponse(response)
|
return FacetsResponse(response)
|
||||||
|
|
||||||
def get_rows(self, facets=None, start=0, limit=10):
|
def get_rows(self, facets=None, sort_by=None, start=0, limit=10):
|
||||||
if facets:
|
if facets:
|
||||||
self.engine = Engine(facets)
|
self.engine = Engine(facets)
|
||||||
response = self.do_json('get-rows', {
|
if sort_by is not None:
|
||||||
'sorting': "{'criteria': []}", 'start': start, 'limit': limit})
|
self.sorting = Sorting(sort_by)
|
||||||
|
response = self.do_json('get-rows', {'sorting': self.sorting.as_json(),
|
||||||
|
'start': start, 'limit': limit})
|
||||||
return RowsResponse(response)
|
return RowsResponse(response)
|
||||||
|
|
||||||
|
def reorder_rows(self, sort_by=None):
|
||||||
|
if sort_by is not None:
|
||||||
|
self.sorting = Sorting(sort_by)
|
||||||
|
response = self.do_json('reorder-rows',
|
||||||
|
{'sorting': self.sorting.as_json()})
|
||||||
|
return response
|
||||||
|
|
||||||
def remove_rows(self, facets=None):
|
def remove_rows(self, facets=None):
|
||||||
if facets:
|
if facets:
|
||||||
self.engine = Engine(facets)
|
self.engine = Engine(facets)
|
||||||
|
|
|
@ -7,14 +7,15 @@ Created by Paul Makepeace on 2011-04-22.
|
||||||
Copyright (c) 2011 Real Programmers. All rights reserved.
|
Copyright (c) 2011 Real Programmers. All rights reserved.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
import urllib
|
import urllib
|
||||||
from google.refine import TextFacet, NumericFacet, StarredFacet, FlaggedFacet
|
from google.refine import TextFacet, NumericFacet, StarredFacet, FlaggedFacet
|
||||||
from google.refine import Engine, FacetsResponse
|
from google.refine import Engine, Sorting, FacetsResponse
|
||||||
|
|
||||||
|
|
||||||
class FacetTest(unittest.TestCase):
|
class FacetTest(unittest.TestCase):
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
|
@ -42,6 +43,25 @@ class FacetTest(unittest.TestCase):
|
||||||
facet = NumericFacet(column='column', From=1, to=5)
|
facet = NumericFacet(column='column', From=1, to=5)
|
||||||
self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'})
|
self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'})
|
||||||
|
|
||||||
|
def test_sorting(self):
|
||||||
|
sorting = Sorting()
|
||||||
|
self.assertEqual(sorting.as_json(), '{"criteria": []}')
|
||||||
|
sorting = Sorting('email')
|
||||||
|
c = sorting.criteria[0]
|
||||||
|
self.assertEqual(c['column'], 'email')
|
||||||
|
self.assertEqual(c['valueType'], 'string')
|
||||||
|
self.assertEqual(c['reverse'], False)
|
||||||
|
self.assertEqual(c['caseSensitive'], False)
|
||||||
|
self.assertEqual(c['errorPosition'], 1)
|
||||||
|
self.assertEqual(c['blankPosition'], 2)
|
||||||
|
sorting = Sorting(['email', 'gender'])
|
||||||
|
self.assertEqual(len(sorting), 2)
|
||||||
|
sorting = Sorting(['email', {'column': 'date', 'valueType': 'date'}])
|
||||||
|
self.assertEqual(len(sorting), 2)
|
||||||
|
c = sorting.criteria[1]
|
||||||
|
self.assertEqual(c['column'], 'date')
|
||||||
|
self.assertEqual(c['valueType'], 'date')
|
||||||
|
|
||||||
def test_add_facet(self):
|
def test_add_facet(self):
|
||||||
facet = TextFacet(column='Party Code')
|
facet = TextFacet(column='Party Code')
|
||||||
engine = Engine(facet)
|
engine = Engine(facet)
|
||||||
|
@ -74,7 +94,6 @@ class FacetTest(unittest.TestCase):
|
||||||
engine.remove_all()
|
engine.remove_all()
|
||||||
self.assertEqual(len(engine), 0)
|
self.assertEqual(len(engine), 0)
|
||||||
|
|
||||||
|
|
||||||
def test_facets_response(self):
|
def test_facets_response(self):
|
||||||
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
|
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
|
||||||
response = FacetsResponse(json.loads(response))
|
response = FacetsResponse(json.loads(response))
|
||||||
|
|
|
@ -235,7 +235,7 @@ class TutorialTestEditing(RefineTestCase):
|
||||||
response = self.project.compute_facets()
|
response = self.project.compute_facets()
|
||||||
self.assertEqual(len(response.facets[0].choices), 65)
|
self.assertEqual(len(response.facets[0].choices), 65)
|
||||||
|
|
||||||
# Section "4. Row and Column Editing"
|
# Section "4. Row and Column Editing, Batched Row Deletion"
|
||||||
# Test doesn't strictly follow the tutorial as the "Browse this
|
# Test doesn't strictly follow the tutorial as the "Browse this
|
||||||
# cluster" performs a text facet which the server can't complete
|
# cluster" performs a text facet which the server can't complete
|
||||||
# as it busts its max facet count. The useful work is done with
|
# as it busts its max facet count. The useful work is done with
|
||||||
|
@ -261,5 +261,24 @@ class TutorialTestEditing(RefineTestCase):
|
||||||
self.assertTrue('3 rows' in response['historyEntry']['description'])
|
self.assertTrue('3 rows' in response['historyEntry']['description'])
|
||||||
|
|
||||||
|
|
||||||
|
class TutorialTestDuplicateDetection(RefineTestCase):
|
||||||
|
project_file = 'duplicates.csv'
|
||||||
|
|
||||||
|
def test_duplicate_detection(self):
|
||||||
|
# Section "4. Row and Column Editing,
|
||||||
|
# Duplicate Row Detection and Deletion"
|
||||||
|
# {7}, {8}
|
||||||
|
response = self.project.get_rows(sort_by='email')
|
||||||
|
indexes = [r.index for r in response.rows]
|
||||||
|
self.assertEqual(indexes, [4, 9, 8, 3, 0, 2, 5, 6, 1, 7])
|
||||||
|
# {9}
|
||||||
|
response = self.project.reorder_rows()
|
||||||
|
self.assertEqual('Reorder rows',
|
||||||
|
response['historyEntry']['description'])
|
||||||
|
response = self.project.get_rows(sort_by='email')
|
||||||
|
indexes = [r.index for r in response.rows]
|
||||||
|
self.assertEqual(indexes, range(10))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
Loading…
Reference in New Issue