delete old tests (will be reimplemented)
This commit is contained in:
parent
cca067a87c
commit
bb9792fd15
|
@ -1,4 +1,2 @@
|
||||||
include README.md
|
include README.md
|
||||||
include COPYING.txt
|
include COPYING.txt
|
||||||
recursive-include tests/data *.csv
|
|
||||||
recursive-include tests *.py
|
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
email,name,state,gender,purchase,count,date
|
|
||||||
danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: 📺),1,"Wed, 4 Jul 2001"
|
|
||||||
melanie.white@example2.edu,Melanie White,NC,F,<iPhone>,1,2001-07-04T12:08:56
|
|
||||||
danny.baron@example1.com, D. ("Tab") Baron,CA,M,Winter jacket,1,2001-07-04
|
|
||||||
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04
|
|
||||||
arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07
|
|
||||||
danny.baron@example1.com,Daniel Baron,,,Bike,1,2001
|
|
||||||
jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000
|
|
||||||
melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999
|
|
||||||
ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998
|
|
||||||
arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997
|
|
Can't render this file because it contains an unexpected character in line 4 and column 33.
|
|
@ -1,11 +1,11 @@
|
||||||
email,name,state,gender,purchase
|
email,name,state,gender,purchase,count,date
|
||||||
danny.baron@example1.com,Danny Baron,CA,M,TV
|
danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: 📺),1,"Wed, 4 Jul 2001"
|
||||||
melanie.white@example2.edu,Melanie White,NC,F,iPhone
|
melanie.white@example2.edu,Melanie White,NC,F,<iPhone>,1,2001-07-04T12:08:56
|
||||||
danny.baron@example1.com,D. Baron,CA,M,Winter jacket
|
danny.baron@example1.com, D. ("Tab") Baron,CA,M,Winter jacket,1,2001-07-04
|
||||||
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight
|
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04
|
||||||
arthur.duff@example4.com,Arthur Duff,OR,M,Dining table
|
arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07
|
||||||
danny.baron@example1.com,Daniel Baron,CA,M,Bike
|
danny.baron@example1.com,Daniel Baron,,,Bike,1,2001
|
||||||
jean.griffith@example5.org,Jean Griffith,WA,F,Power drill
|
jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000
|
||||||
melanie.white@example2.edu,Melanie White,NC,F,iPad
|
melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999
|
||||||
ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier
|
ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998
|
||||||
arthur.duff@example4.com,Arthur Duff,OR,M,Night table
|
arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997
|
||||||
|
|
Can't render this file because it contains an unexpected character in line 4 and column 33.
|
|
@ -0,0 +1,11 @@
|
||||||
|
email,name,state,gender,purchase
|
||||||
|
danny.baron@example1.com,Danny Baron,CA,M,TV
|
||||||
|
melanie.white@example2.edu,Melanie White,NC,F,iPhone
|
||||||
|
danny.baron@example1.com,D. Baron,CA,M,Winter jacket
|
||||||
|
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight
|
||||||
|
arthur.duff@example4.com,Arthur Duff,OR,M,Dining table
|
||||||
|
danny.baron@example1.com,Daniel Baron,CA,M,Bike
|
||||||
|
jean.griffith@example5.org,Jean Griffith,WA,F,Power drill
|
||||||
|
melanie.white@example2.edu,Melanie White,NC,F,iPad
|
||||||
|
ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier
|
||||||
|
arthur.duff@example4.com,Arthur Duff,OR,M,Night table
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,16 +0,0 @@
|
||||||
Tom Dalton sent $3700 to Betty Whitehead on 01/17/2009
|
|
||||||
377 El Camino Real
|
|
||||||
"San Jose, CA"
|
|
||||||
Status: received
|
|
||||||
Morgan Lawless received $10500 from Bob Henselman on 02/05/2009
|
|
||||||
2798 Lancaster Dr.
|
|
||||||
"New York, NY"
|
|
||||||
Status: deposited
|
|
||||||
Eric Bateman sent $22000 to Liz Benedict on 03/02/2009
|
|
||||||
89 Deerfield Cr.
|
|
||||||
"Springfield, WA"
|
|
||||||
Status: received
|
|
||||||
Robert Hartfort received $20000 from Ron Ingleman on 03/28/2009
|
|
||||||
198 Broadway Ave.
|
|
||||||
"Saratoga, CA"
|
|
||||||
Status: unknown
|
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +0,0 @@
|
||||||
Tom Dalton sent $3700 to Betty Whitehead on 01/17/2009
|
|
||||||
377 El Camino Real
|
|
||||||
"San Jose, CA"
|
|
||||||
Status: received
|
|
||||||
Morgan Lawless received $10500 from Bob Henselman on 02/05/2009
|
|
||||||
2798 Lancaster Dr.
|
|
||||||
"New York, NY"
|
|
||||||
(000) 555-6717
|
|
||||||
Status: deposited
|
|
||||||
Eric Bateman sent $22000 to Liz Benedict on 03/02/2009
|
|
||||||
89 Deerfield Cr.
|
|
||||||
"Springfield, WA"
|
|
||||||
(000) 555-1411
|
|
||||||
Status: received
|
|
||||||
Robert Hartfort received $20000 from Ron Ingleman on 03/28/2009
|
|
||||||
198 Broadway Ave.
|
|
||||||
"Saratoga, CA"
|
|
||||||
Status: unknown
|
|
|
|
@ -1,52 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
refinetest.py
|
|
||||||
|
|
||||||
RefineTestCase is a base class that loads Refine projects specified by
|
|
||||||
the class's 'project_file' attribute and provides a 'project' object.
|
|
||||||
|
|
||||||
These tests require a connection to a Refine server either at
|
|
||||||
http://127.0.0.1:3333/ or by specifying environment variables REFINE_HOST
|
|
||||||
and REFINE_PORT.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from google.refine import refine
|
|
||||||
|
|
||||||
PATH_TO_TEST_DATA = os.path.join(os.path.dirname(__file__), 'data')
|
|
||||||
|
|
||||||
|
|
||||||
#noinspection PyPep8Naming
|
|
||||||
class RefineTestCase(unittest.TestCase):
|
|
||||||
project_file = None
|
|
||||||
project_format = 'text/line-based/*sv'
|
|
||||||
project_options = {}
|
|
||||||
project = None
|
|
||||||
# Section "2. Exploration using Facets": {1}, {2}
|
|
||||||
|
|
||||||
def project_path(self):
|
|
||||||
return os.path.join(PATH_TO_TEST_DATA, self.project_file)
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self.server = refine.RefineServer()
|
|
||||||
self.refine = refine.Refine(self.server)
|
|
||||||
if self.project_file:
|
|
||||||
self.project = self.refine.new_project(
|
|
||||||
project_file=self.project_path(), project_format=self.project_format, **self.project_options)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
if self.project:
|
|
||||||
self.project.delete()
|
|
||||||
self.project = None
|
|
||||||
|
|
||||||
def assertInResponse(self, expect):
|
|
||||||
desc = None
|
|
||||||
try:
|
|
||||||
desc = self.project.history_entry.description
|
|
||||||
self.assertTrue(expect in desc)
|
|
||||||
except AssertionError:
|
|
||||||
raise AssertionError('Expecting "%s" in "%s"' % (expect, desc))
|
|
|
@ -1,148 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
test_facet.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
|
||||||
|
|
||||||
import json
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from google.refine.facet import *
|
|
||||||
|
|
||||||
|
|
||||||
class CamelTest(unittest.TestCase):
|
|
||||||
def test_to_camel(self):
|
|
||||||
pairs = (
|
|
||||||
('this', 'this'),
|
|
||||||
('this_attr', 'thisAttr'),
|
|
||||||
('From', 'from'),
|
|
||||||
)
|
|
||||||
for attr, camel_attr in pairs:
|
|
||||||
self.assertEqual(to_camel(attr), camel_attr)
|
|
||||||
|
|
||||||
def test_from_camel(self):
|
|
||||||
pairs = (
|
|
||||||
('this', 'this'),
|
|
||||||
('This', 'this'),
|
|
||||||
('thisAttr', 'this_attr'),
|
|
||||||
('ThisAttr', 'this_attr'),
|
|
||||||
('From', 'from'),
|
|
||||||
)
|
|
||||||
for camel_attr, attr in pairs:
|
|
||||||
self.assertEqual(from_camel(camel_attr), attr)
|
|
||||||
|
|
||||||
|
|
||||||
class FacetTest(unittest.TestCase):
|
|
||||||
def test_init(self):
|
|
||||||
facet = TextFacet('column name')
|
|
||||||
engine = Engine(facet)
|
|
||||||
self.assertEqual(facet.selection, [])
|
|
||||||
self.assertTrue(str(engine))
|
|
||||||
facet = NumericFacet('column name', From=1, to=5)
|
|
||||||
self.assertEqual(facet.to, 5)
|
|
||||||
self.assertEqual(facet.From, 1)
|
|
||||||
facet = StarredFacet()
|
|
||||||
self.assertEqual(facet.expression, 'row.starred')
|
|
||||||
facet = StarredFacet(True)
|
|
||||||
self.assertEqual(facet.selection[0]['v']['v'], True)
|
|
||||||
facet = FlaggedFacet(False)
|
|
||||||
self.assertEqual(facet.selection[0]['v']['v'], False)
|
|
||||||
self.assertRaises(ValueError, FlaggedFacet, 'false') # no strings
|
|
||||||
facet = TextFilterFacet('column name', 'query')
|
|
||||||
self.assertEqual(facet.query, 'query')
|
|
||||||
|
|
||||||
def test_selections(self):
|
|
||||||
facet = TextFacet('column name')
|
|
||||||
facet.include('element')
|
|
||||||
self.assertEqual(len(facet.selection), 1)
|
|
||||||
facet.include('element 2')
|
|
||||||
self.assertEqual(len(facet.selection), 2)
|
|
||||||
facet.exclude('element')
|
|
||||||
self.assertEqual(len(facet.selection), 1)
|
|
||||||
facet.reset()
|
|
||||||
self.assertEqual(len(facet.selection), 0)
|
|
||||||
facet.include('element').include('element 2')
|
|
||||||
self.assertEqual(len(facet.selection), 2)
|
|
||||||
|
|
||||||
|
|
||||||
class EngineTest(unittest.TestCase):
|
|
||||||
def test_init(self):
|
|
||||||
engine = Engine()
|
|
||||||
self.assertEqual(engine.mode, 'row-based')
|
|
||||||
engine.mode = 'record-based'
|
|
||||||
self.assertEqual(engine.mode, 'record-based')
|
|
||||||
engine.set_facets(BlankFacet)
|
|
||||||
self.assertEqual(engine.mode, 'record-based')
|
|
||||||
engine.set_facets(BlankFacet, BlankFacet)
|
|
||||||
self.assertEqual(len(engine), 2)
|
|
||||||
|
|
||||||
def test_serialize(self):
|
|
||||||
engine = Engine()
|
|
||||||
engine_json = engine.as_json()
|
|
||||||
self.assertEqual(engine_json, '{"facets": [], "mode": "row-based"}')
|
|
||||||
facet = TextFacet(column='column')
|
|
||||||
self.assertEqual(facet.as_dict(), {'selectError': False, 'name': 'column', 'selection': [], 'expression': 'value', 'invert': False, 'columnName': 'column', 'selectBlank': False, 'omitBlank': False, 'type': 'list', 'omitError': False})
|
|
||||||
facet = NumericFacet(column='column', From=1, to=5)
|
|
||||||
self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'})
|
|
||||||
|
|
||||||
def test_add_facet(self):
|
|
||||||
facet = TextFacet(column='Party Code')
|
|
||||||
engine = Engine(facet)
|
|
||||||
engine.add_facet(TextFacet(column='Ethnicity'))
|
|
||||||
self.assertEqual(len(engine.facets), 2)
|
|
||||||
self.assertEqual(len(engine), 2)
|
|
||||||
|
|
||||||
def test_reset_remove(self):
|
|
||||||
text_facet1 = TextFacet('column name')
|
|
||||||
text_facet1.include('element')
|
|
||||||
text_facet2 = TextFacet('column name 2')
|
|
||||||
text_facet2.include('element 2')
|
|
||||||
engine = Engine(text_facet1, text_facet2)
|
|
||||||
self.assertEqual(len(engine), 2)
|
|
||||||
self.assertEqual(len(text_facet1.selection), 1)
|
|
||||||
engine.reset_all()
|
|
||||||
self.assertEqual(len(text_facet1.selection), 0)
|
|
||||||
self.assertEqual(len(text_facet2.selection), 0)
|
|
||||||
engine.remove_all()
|
|
||||||
self.assertEqual(len(engine), 0)
|
|
||||||
|
|
||||||
|
|
||||||
class SortingTest(unittest.TestCase):
|
|
||||||
def test_sorting(self):
|
|
||||||
sorting = Sorting()
|
|
||||||
self.assertEqual(sorting.as_json(), '{"criteria": []}')
|
|
||||||
sorting = Sorting('email')
|
|
||||||
c = sorting.criteria[0]
|
|
||||||
self.assertEqual(c['column'], 'email')
|
|
||||||
self.assertEqual(c['valueType'], 'string')
|
|
||||||
self.assertEqual(c['reverse'], False)
|
|
||||||
self.assertEqual(c['caseSensitive'], False)
|
|
||||||
self.assertEqual(c['errorPosition'], 1)
|
|
||||||
self.assertEqual(c['blankPosition'], 2)
|
|
||||||
sorting = Sorting(['email', 'gender'])
|
|
||||||
self.assertEqual(len(sorting), 2)
|
|
||||||
sorting = Sorting(['email', {'column': 'date', 'valueType': 'date'}])
|
|
||||||
self.assertEqual(len(sorting), 2)
|
|
||||||
c = sorting.criteria[1]
|
|
||||||
self.assertEqual(c['column'], 'date')
|
|
||||||
self.assertEqual(c['valueType'], 'date')
|
|
||||||
|
|
||||||
|
|
||||||
class FacetsResponseTest(unittest.TestCase):
|
|
||||||
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
|
|
||||||
|
|
||||||
def test_facet_response(self):
|
|
||||||
party_code_facet = TextFacet('Party Code')
|
|
||||||
engine = Engine(party_code_facet)
|
|
||||||
facets = engine.facets_response(json.loads(self.response)).facets
|
|
||||||
self.assertEqual(facets[0].choices['D'].count, 3700)
|
|
||||||
self.assertEqual(facets[0].blank_choice.count, 1446)
|
|
||||||
self.assertEqual(facets[party_code_facet], facets[0])
|
|
||||||
# test iteration
|
|
||||||
facet = [f for f in facets][0]
|
|
||||||
self.assertEqual(facet, facets[0])
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
|
@ -1,31 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
test_history.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from google.refine.history import *
|
|
||||||
|
|
||||||
|
|
||||||
class HistoryTest(unittest.TestCase):
|
|
||||||
def test_init(self):
|
|
||||||
response = {
|
|
||||||
u"code": "ok",
|
|
||||||
u"historyEntry": {
|
|
||||||
u"id": 1303851435223,
|
|
||||||
u"description": "Split 4 cells",
|
|
||||||
u"time": "2011-04-26T16:45:08Z"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
he = response['historyEntry']
|
|
||||||
entry = HistoryEntry(he['id'], he['time'], he['description'])
|
|
||||||
self.assertEqual(entry.id, 1303851435223)
|
|
||||||
self.assertEqual(entry.description, 'Split 4 cells')
|
|
||||||
self.assertEqual(entry.time, '2011-04-26T16:45:08Z')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
|
@ -1,80 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
test_refine.py
|
|
||||||
|
|
||||||
These tests require a connection to a Refine server either at
|
|
||||||
http://127.0.0.1:3333/ or by specifying environment variables
|
|
||||||
OPENREFINE_HOST and OPENREFINE_PORT.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
|
||||||
|
|
||||||
import csv
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from google.refine import refine
|
|
||||||
import refinetest
|
|
||||||
|
|
||||||
from io import StringIO
|
|
||||||
|
|
||||||
|
|
||||||
class RefineServerTest(refinetest.RefineTestCase):
|
|
||||||
def test_init(self):
|
|
||||||
server_url = 'http://' + refine.REFINE_HOST
|
|
||||||
if refine.REFINE_PORT != '80':
|
|
||||||
server_url += ':' + refine.REFINE_PORT
|
|
||||||
self.assertEqual(self.server.server, server_url)
|
|
||||||
self.assertEqual(refine.RefineServer.url(), server_url)
|
|
||||||
# strip trailing /
|
|
||||||
server = refine.RefineServer('http://refine.example/')
|
|
||||||
self.assertEqual(server.server, 'http://refine.example')
|
|
||||||
|
|
||||||
def test_list_projects(self):
|
|
||||||
projects = self.refine.list_projects()
|
|
||||||
self.assertTrue(isinstance(projects, dict))
|
|
||||||
|
|
||||||
def test_get_version(self):
|
|
||||||
version_info = self.server.get_version()
|
|
||||||
for item in ('revision', 'version', 'full_version', 'full_name'):
|
|
||||||
self.assertTrue(item in version_info)
|
|
||||||
|
|
||||||
def test_version(self):
|
|
||||||
self.assertTrue(self.server.version in ('3.2'))
|
|
||||||
|
|
||||||
|
|
||||||
class RefineTest(refinetest.RefineTestCase):
|
|
||||||
project_file = 'duplicates.csv'
|
|
||||||
|
|
||||||
def test_new_project(self):
|
|
||||||
self.assertTrue(isinstance(self.project, refine.RefineProject))
|
|
||||||
|
|
||||||
def test_wait_until_idle(self):
|
|
||||||
self.project.wait_until_idle() # should just return
|
|
||||||
|
|
||||||
def test_get_models(self):
|
|
||||||
self.assertEqual(self.project.key_column, 'email')
|
|
||||||
self.assertTrue('email' in self.project.columns)
|
|
||||||
self.assertTrue('email' in self.project.column_order)
|
|
||||||
self.assertEqual(self.project.column_order['name'], 1)
|
|
||||||
|
|
||||||
def test_delete_project(self):
|
|
||||||
self.assertTrue(self.project.delete())
|
|
||||||
|
|
||||||
def test_open_export(self):
|
|
||||||
response = refine.RefineProject(self.project.project_url()).export()
|
|
||||||
lines = response.text.splitlines()
|
|
||||||
self.assertTrue('email' in lines[0])
|
|
||||||
for line in lines[1:]:
|
|
||||||
self.assertTrue('M' in line or 'F' in line)
|
|
||||||
|
|
||||||
def test_open_export_csv(self):
|
|
||||||
response = refine.RefineProject(self.project.project_url()).export()
|
|
||||||
csv_fp = csv.reader(StringIO(response.text), dialect='excel-tab')
|
|
||||||
row = csv_fp.__next__()
|
|
||||||
self.assertTrue(row[0] == 'email')
|
|
||||||
for row in csv_fp:
|
|
||||||
self.assertTrue(row[3] == 'F' or row[3] == 'M')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
|
@ -1,81 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
test_refine_small.py
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from google.refine import refine
|
|
||||||
|
|
||||||
|
|
||||||
class RefineRowsTest(unittest.TestCase):
|
|
||||||
def test_rows_response(self):
|
|
||||||
rr = refine.RowsResponseFactory({
|
|
||||||
u'gender': 3, u'state': 2, u'purchase': 4, u'email': 0,
|
|
||||||
u'name': 1})
|
|
||||||
response = rr({
|
|
||||||
u'rows': [{
|
|
||||||
u'i': 0,
|
|
||||||
u'cells': [
|
|
||||||
{u'v': u'danny.baron@example1.com'},
|
|
||||||
{u'v': u'Danny Baron'},
|
|
||||||
{u'v': u'CA'},
|
|
||||||
{u'v': u'M'},
|
|
||||||
{u'v': u'TV'}
|
|
||||||
],
|
|
||||||
u'starred': False,
|
|
||||||
u'flagged': False
|
|
||||||
}],
|
|
||||||
u'start': 0,
|
|
||||||
u'limit': 1,
|
|
||||||
u'mode': u'row-based',
|
|
||||||
u'filtered': 10,
|
|
||||||
u'total': 10,
|
|
||||||
})
|
|
||||||
self.assertEqual(len(response.rows), 1)
|
|
||||||
# test iteration
|
|
||||||
rows = [row for row in response.rows]
|
|
||||||
self.assertEqual(rows[0]['name'], 'Danny Baron')
|
|
||||||
# test indexing
|
|
||||||
self.assertEqual(response.rows[0]['name'], 'Danny Baron')
|
|
||||||
|
|
||||||
|
|
||||||
class RefineProjectTest(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
# Mock out get_models so it doesn't attempt to connect to a server
|
|
||||||
self._get_models = refine.RefineProject.get_models
|
|
||||||
refine.RefineProject.get_models = lambda me: me
|
|
||||||
# Save REFINE_{HOST,PORT} as tests overwrite it
|
|
||||||
self._refine_host_port = refine.REFINE_HOST, refine.REFINE_PORT
|
|
||||||
refine.REFINE_HOST, refine.REFINE_PORT = '127.0.0.1', '3333'
|
|
||||||
|
|
||||||
def test_server_init(self):
|
|
||||||
RP = refine.RefineProject
|
|
||||||
p = RP('http://127.0.0.1:3333/project?project=1658955153749')
|
|
||||||
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
|
|
||||||
self.assertEqual(p.project_id, '1658955153749')
|
|
||||||
p = RP('http://127.0.0.1:3333', '1658955153749')
|
|
||||||
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
|
|
||||||
self.assertEqual(p.project_id, '1658955153749')
|
|
||||||
p = RP('http://server/varnish/project?project=1658955153749')
|
|
||||||
self.assertEqual(p.server.server, 'http://server/varnish')
|
|
||||||
self.assertEqual(p.project_id, '1658955153749')
|
|
||||||
p = RP('1658955153749')
|
|
||||||
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
|
|
||||||
self.assertEqual(p.project_id, '1658955153749')
|
|
||||||
refine.REFINE_HOST = '10.0.0.1'
|
|
||||||
refine.REFINE_PORT = '80'
|
|
||||||
p = RP('1658955153749')
|
|
||||||
self.assertEqual(p.server.server, 'http://10.0.0.1')
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
# Restore mocked get_models
|
|
||||||
refine.RefineProject.get_models = self._get_models
|
|
||||||
# Restore values for REFINE_{HOST,PORT}
|
|
||||||
refine.REFINE_HOST, refine.REFINE_PORT = self._refine_host_port
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
|
@ -1,490 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
test_tutorial.py
|
|
||||||
|
|
||||||
The tests here are based on David Huynh's Refine tutorial at
|
|
||||||
http://davidhuynh.net/spaces/nicar2011/tutorial.pdf The tests perform all the
|
|
||||||
Refine actions given in the tutorial (except the web scraping) and verify the
|
|
||||||
changes expected to be observed explained in the tutorial.
|
|
||||||
|
|
||||||
These tests require a connection to a Refine server either at
|
|
||||||
http://127.0.0.1:3333/ or by specifying environment variables
|
|
||||||
OPENREFINE_HOST and OPENREFINE_PORT.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
|
|
||||||
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from google.refine import facet
|
|
||||||
import refinetest
|
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestFacets(refinetest.RefineTestCase):
|
|
||||||
project_file = 'louisiana-elected-officials.csv'
|
|
||||||
project_options = {'guess_cell_value_types': True}
|
|
||||||
|
|
||||||
def test_get_rows(self):
|
|
||||||
# Section "2. Exploration using Facets": {3}
|
|
||||||
response = self.project.get_rows(limit=10)
|
|
||||||
self.assertEqual(len(response.rows), 10)
|
|
||||||
self.assertEqual(response.limit, 10)
|
|
||||||
self.assertEqual(response.total, 6958)
|
|
||||||
self.assertEqual(response.filtered, 6958)
|
|
||||||
for row in response.rows:
|
|
||||||
self.assertFalse(row.flagged)
|
|
||||||
self.assertFalse(row.starred)
|
|
||||||
|
|
||||||
def test_facet(self):
|
|
||||||
# Section "2. Exploration using Facets": {4}
|
|
||||||
party_code_facet = facet.TextFacet(column='Party Code')
|
|
||||||
response = self.project.compute_facets(party_code_facet)
|
|
||||||
pc = response.facets[0]
|
|
||||||
# test look by index same as look up by facet object
|
|
||||||
self.assertEqual(pc, response.facets[party_code_facet])
|
|
||||||
self.assertEqual(pc.name, 'Party Code')
|
|
||||||
self.assertEqual(pc.choices['D'].count, 3700)
|
|
||||||
self.assertEqual(pc.choices['N'].count, 15)
|
|
||||||
self.assertEqual(pc.blank_choice.count, 1446)
|
|
||||||
# {5}, {6}
|
|
||||||
engine = facet.Engine(party_code_facet)
|
|
||||||
ethnicity_facet = facet.TextFacet(column='Ethnicity')
|
|
||||||
engine.add_facet(ethnicity_facet)
|
|
||||||
self.project.engine = engine
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
e = response.facets[ethnicity_facet]
|
|
||||||
self.assertEqual(e.choices['B'].count, 1255)
|
|
||||||
self.assertEqual(e.choices['W'].count, 4469)
|
|
||||||
# {7}
|
|
||||||
ethnicity_facet.include('B')
|
|
||||||
response = self.project.get_rows()
|
|
||||||
self.assertEqual(response.filtered, 1255)
|
|
||||||
indexes = [row.index for row in response.rows]
|
|
||||||
self.assertEqual(indexes, [1, 2, 3, 4, 6, 12, 18, 26, 28, 32])
|
|
||||||
# {8}
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
pc = response.facets[party_code_facet]
|
|
||||||
self.assertEqual(pc.name, 'Party Code')
|
|
||||||
self.assertEqual(pc.choices['D'].count, 1179)
|
|
||||||
self.assertEqual(pc.choices['R'].count, 11)
|
|
||||||
self.assertEqual(pc.blank_choice.count, 46)
|
|
||||||
# {9}
|
|
||||||
party_code_facet.include('R')
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
e = response.facets[ethnicity_facet]
|
|
||||||
self.assertEqual(e.choices['B'].count, 11)
|
|
||||||
# {10}
|
|
||||||
party_code_facet.reset()
|
|
||||||
ethnicity_facet.reset()
|
|
||||||
response = self.project.get_rows()
|
|
||||||
self.assertEqual(response.filtered, 6958)
|
|
||||||
# {11}
|
|
||||||
office_title_facet = facet.TextFacet('Office Title')
|
|
||||||
self.project.engine.add_facet(office_title_facet)
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
self.assertEqual(len(response.facets[2].choices), 76)
|
|
||||||
# {12} - XXX not sure how to interpret bins & baseBins yet
|
|
||||||
office_level_facet = facet.NumericFacet('Office Level')
|
|
||||||
self.project.engine.add_facet(office_level_facet)
|
|
||||||
# {13}
|
|
||||||
office_level_facet.From = 300 # from reserved word
|
|
||||||
office_level_facet.to = 320
|
|
||||||
response = self.project.get_rows()
|
|
||||||
self.assertEqual(response.filtered, 1907)
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
ot = response.facets[office_title_facet]
|
|
||||||
self.assertEqual(len(ot.choices), 21)
|
|
||||||
self.assertEqual(ot.choices['Chief of Police'].count, 2)
|
|
||||||
self.assertEqual(ot.choices['Chief of Police '].count, 211)
|
|
||||||
# {14}
|
|
||||||
self.project.engine.remove_all()
|
|
||||||
response = self.project.get_rows()
|
|
||||||
self.assertEqual(response.filtered, 6958)
|
|
||||||
# {15}
|
|
||||||
phone_facet = facet.TextFacet('Phone', expression='value[0, 3]')
|
|
||||||
self.project.engine.add_facet(phone_facet)
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
p = response.facets[phone_facet]
|
|
||||||
self.assertEqual(p.expression, 'value[0, 3]')
|
|
||||||
self.assertEqual(p.choices['318'].count, 2331)
|
|
||||||
# {16}
|
|
||||||
commissioned_date_facet = facet.NumericFacet(
|
|
||||||
'Commissioned Date',
|
|
||||||
expression='value.toDate().datePart("year")')
|
|
||||||
self.project.engine.add_facet(commissioned_date_facet)
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
cd = response.facets[commissioned_date_facet]
|
|
||||||
self.assertEqual(cd.error_count, 959)
|
|
||||||
self.assertEqual(cd.numeric_count, 5999)
|
|
||||||
# {17}
|
|
||||||
office_description_facet = facet.NumericFacet(
|
|
||||||
'Office Description',
|
|
||||||
expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()')
|
|
||||||
self.project.engine.add_facet(office_description_facet)
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
od = response.facets[office_description_facet]
|
|
||||||
self.assertEqual(od.min, 0)
|
|
||||||
self.assertEqual(od.max, 110)
|
|
||||||
self.assertEqual(od.numeric_count, 548)
|
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestEditing(refinetest.RefineTestCase):
|
|
||||||
project_file = 'louisiana-elected-officials.csv'
|
|
||||||
project_options = {'guess_cell_value_types': True}
|
|
||||||
|
|
||||||
def test_editing(self):
|
|
||||||
# Section "3. Cell Editing": {1}
|
|
||||||
self.project.engine.remove_all() # redundant due to setUp
|
|
||||||
# {2}
|
|
||||||
self.project.text_transform(column='Zip Code 2',
|
|
||||||
expression='value.toString()[0, 5]')
|
|
||||||
self.assertInResponse('transform on 6958 cells in column Zip Code 2')
|
|
||||||
# {3} - XXX history
|
|
||||||
# {4}
|
|
||||||
office_title_facet = facet.TextFacet('Office Title')
|
|
||||||
self.project.engine.add_facet(office_title_facet)
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
self.assertEqual(len(response.facets[office_title_facet].choices), 76)
|
|
||||||
self.project.text_transform('Office Title', 'value.trim()')
|
|
||||||
self.assertInResponse('6895')
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
self.assertEqual(len(response.facets[office_title_facet].choices), 67)
|
|
||||||
# {5}
|
|
||||||
self.project.edit('Office Title', 'Councilmen', 'Councilman')
|
|
||||||
self.assertInResponse('13')
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
self.assertEqual(len(response.facets[office_title_facet].choices), 66)
|
|
||||||
# {6}
|
|
||||||
response = self.project.compute_clusters('Office Title')
|
|
||||||
self.assertTrue(response)
|
|
||||||
# {7}
|
|
||||||
clusters = self.project.compute_clusters('Office Title', 'knn')
|
|
||||||
self.assertEqual(len(clusters), 7)
|
|
||||||
first_cluster = clusters[0]
|
|
||||||
self.assertEqual(len(first_cluster), 2)
|
|
||||||
self.assertEqual(first_cluster[0]['value'], 'DPEC Member at Large')
|
|
||||||
self.assertEqual(first_cluster[0]['count'], 6)
|
|
||||||
# Not strictly necessary to repeat 'Council Member' but a test
|
|
||||||
# of mass_edit, and it's also what the front end sends.
|
|
||||||
self.project.mass_edit('Office Title', [{
|
|
||||||
'from': ['Council Member', 'Councilmember'],
|
|
||||||
'to': 'Council Member'
|
|
||||||
}])
|
|
||||||
self.assertInResponse('372')
|
|
||||||
response = self.project.compute_facets()
|
|
||||||
self.assertEqual(len(response.facets[office_title_facet].choices), 65)
|
|
||||||
|
|
||||||
# Section "4. Row and Column Editing, Batched Row Deletion"
|
|
||||||
# Test doesn't strictly follow the tutorial as the "Browse this
|
|
||||||
# cluster" performs a text facet which the server can't complete
|
|
||||||
# as it busts its max facet count. The useful work is done with
|
|
||||||
# get_rows(). Also, we can facet & select in one; the UI can't.
|
|
||||||
# {1}, {2}, {3}, {4}
|
|
||||||
clusters = self.project.compute_clusters('Candidate Name')
|
|
||||||
for cluster in clusters[0:3]: # just do a few
|
|
||||||
for match in cluster:
|
|
||||||
# {2}
|
|
||||||
if match['value'].endswith(', '):
|
|
||||||
response = self.project.get_rows(
|
|
||||||
facet.TextFacet('Candidate Name', match['value']))
|
|
||||||
self.assertEqual(len(response.rows), 1)
|
|
||||||
for row in response.rows:
|
|
||||||
self.project.star_row(row)
|
|
||||||
self.assertInResponse(str(row.index + 1))
|
|
||||||
# {5}, {6}, {7}
|
|
||||||
response = self.project.compute_facets(facet.StarredFacet(True))
|
|
||||||
self.assertEqual(len(response.facets[0].choices), 2) # true & false
|
|
||||||
self.assertEqual(response.facets[0].choices[True].count, 2)
|
|
||||||
self.project.remove_rows()
|
|
||||||
self.assertInResponse('2 rows')
|
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestDuplicateDetection(refinetest.RefineTestCase):
|
|
||||||
project_file = 'duplicates.csv'
|
|
||||||
|
|
||||||
def test_duplicate_detection(self):
|
|
||||||
# Section "4. Row and Column Editing,
|
|
||||||
# Duplicate Row Detection and Deletion"
|
|
||||||
# {7}, {8}
|
|
||||||
response = self.project.get_rows(sort_by='email')
|
|
||||||
indexes = [row.index for row in response.rows]
|
|
||||||
self.assertEqual(indexes, [4, 9, 8, 3, 0, 2, 5, 6, 1, 7])
|
|
||||||
# {9}
|
|
||||||
self.project.reorder_rows()
|
|
||||||
self.assertInResponse('Reorder rows')
|
|
||||||
response = self.project.get_rows()
|
|
||||||
indexes = [row.index for row in response.rows]
|
|
||||||
self.assertEqual(indexes, list(range(10)))
|
|
||||||
# {10}
|
|
||||||
self.project.add_column(
|
|
||||||
'email', 'count', 'facetCount(value, "value", "email")')
|
|
||||||
self.assertInResponse('column email by filling 10 rows')
|
|
||||||
response = self.project.get_rows()
|
|
||||||
self.assertEqual(self.project.column_order['email'], 0) # i.e. 1st
|
|
||||||
self.assertEqual(self.project.column_order['count'], 1) # i.e. 2nd
|
|
||||||
counts = [row['count'] for row in response.rows]
|
|
||||||
self.assertEqual(counts, [2, 2, 1, 1, 3, 3, 3, 1, 2, 2])
|
|
||||||
# {11}
|
|
||||||
self.assertFalse(self.project.has_records)
|
|
||||||
self.project.blank_down('email')
|
|
||||||
self.assertInResponse('Blank down 4 cells')
|
|
||||||
self.assertTrue(self.project.has_records)
|
|
||||||
response = self.project.get_rows()
|
|
||||||
emails = [1 if row['email'] else 0 for row in response.rows]
|
|
||||||
self.assertEqual(emails, [1, 0, 1, 1, 1, 0, 0, 1, 1, 0])
|
|
||||||
# {12}
|
|
||||||
blank_facet = facet.BlankFacet('email', selection=True)
|
|
||||||
# {13}
|
|
||||||
self.project.remove_rows(blank_facet)
|
|
||||||
self.assertInResponse('Remove 4 rows')
|
|
||||||
self.project.engine.remove_all()
|
|
||||||
response = self.project.get_rows()
|
|
||||||
email_counts = [(row['email'], row['count']) for row in response.rows]
|
|
||||||
self.assertEqual(email_counts, [
|
|
||||||
(u'arthur.duff@example4.com', 2),
|
|
||||||
(u'ben.morisson@example6.org', 1),
|
|
||||||
(u'ben.tyler@example3.org', 1),
|
|
||||||
(u'danny.baron@example1.com', 3),
|
|
||||||
(u'jean.griffith@example5.org', 1),
|
|
||||||
(u'melanie.white@example2.edu', 2)
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestTransposeColumnsIntoRows(refinetest.RefineTestCase):
|
|
||||||
project_file = 'us_economic_assistance.csv'
|
|
||||||
|
|
||||||
def test_transpose_columns_into_rows(self):
|
|
||||||
# Section "5. Structural Editing, Transpose Columns into Rows"
|
|
||||||
# {1}, {2}, {3}
|
|
||||||
self.project.transpose_columns_into_rows('FY1946', 64, 'pair')
|
|
||||||
self.assertInResponse('64 column(s) starting with FY1946')
|
|
||||||
# {4}
|
|
||||||
self.project.add_column('pair', 'year', 'value[2,6].toNumber()')
|
|
||||||
self.assertInResponse('filling 26185 rows')
|
|
||||||
# {5}
|
|
||||||
self.project.text_transform(
|
|
||||||
column='pair', expression='value.substring(7).toNumber()')
|
|
||||||
self.assertInResponse('transform on 26185 cells')
|
|
||||||
# {6}
|
|
||||||
self.project.rename_column('pair', 'amount')
|
|
||||||
self.assertInResponse('Rename column pair to amount')
|
|
||||||
# {7}
|
|
||||||
self.project.fill_down('country_name')
|
|
||||||
self.assertInResponse('Fill down 23805 cells')
|
|
||||||
self.project.fill_down('program_name')
|
|
||||||
self.assertInResponse('Fill down 23805 cells')
|
|
||||||
# spot check of last row for transforms and fill down
|
|
||||||
response = self.project.get_rows()
|
|
||||||
row10 = response.rows[9]
|
|
||||||
self.assertEqual(row10['country_name'], 'Afghanistan')
|
|
||||||
self.assertEqual(row10['program_name'],
|
|
||||||
'Department of Defense Security Assistance')
|
|
||||||
self.assertEqual(row10['amount'], 113777303)
|
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestTransposeFixedNumberOfRowsIntoColumns(
|
|
||||||
refinetest.RefineTestCase):
|
|
||||||
project_file = 'fixed-rows.csv'
|
|
||||||
project_format = 'text/line-based'
|
|
||||||
project_options = {'header_lines': 0}
|
|
||||||
|
|
||||||
def test_transpose_fixed_number_of_rows_into_columns(self):
|
|
||||||
if self.server.version not in ('2.0', '2.1'):
|
|
||||||
self.project.rename_column('Column 1', 'Column')
|
|
||||||
# Section "5. Structural Editing,
|
|
||||||
# Transpose Fixed Number of Rows into Columns"
|
|
||||||
# {1}
|
|
||||||
self.assertTrue('Column' in self.project.column_order)
|
|
||||||
# {8}
|
|
||||||
self.project.transpose_rows_into_columns('Column', 4)
|
|
||||||
self.assertInResponse('Transpose every 4 cells in column Column')
|
|
||||||
# {9} - renaming column triggers a bug in Refine <= 2.1
|
|
||||||
if self.server.version not in ('2.0', '2.1'):
|
|
||||||
self.project.rename_column('Column 2', 'Address')
|
|
||||||
self.project.rename_column('Column 3', 'Address 2')
|
|
||||||
self.project.rename_column('Column 4', 'Status')
|
|
||||||
# {10}
|
|
||||||
self.project.add_column(
|
|
||||||
'Column 1', 'Transaction',
|
|
||||||
'if(value.contains(" sent "), "send", "receive")')
|
|
||||||
self.assertInResponse('Column 1 by filling 4 rows')
|
|
||||||
# {11}
|
|
||||||
transaction_facet = facet.TextFacet(column='Transaction',
|
|
||||||
selection='send')
|
|
||||||
self.project.engine.add_facet(transaction_facet)
|
|
||||||
self.project.compute_facets()
|
|
||||||
# {12}, {13}, {14}
|
|
||||||
self.project.add_column(
|
|
||||||
'Column 1', 'Sender',
|
|
||||||
'value.partition(" sent ")[0]')
|
|
||||||
# XXX resetting the facet shows data in rows with Transaction=receive
|
|
||||||
# which shouldn't have been possible with the facet.
|
|
||||||
self.project.add_column(
|
|
||||||
'Column 1', 'Recipient',
|
|
||||||
'value.partition(" to ")[2].partition(" on ")[0]')
|
|
||||||
self.project.add_column(
|
|
||||||
'Column 1', 'Amount',
|
|
||||||
'value.partition(" sent ")[2].partition(" to ")[0]')
|
|
||||||
# {15}
|
|
||||||
transaction_facet.reset().include('receive')
|
|
||||||
self.project.get_rows()
|
|
||||||
# XXX there seems to be some kind of bug where the model doesn't
|
|
||||||
# match get_rows() output - cellIndex being returned that are
|
|
||||||
# out of range.
|
|
||||||
#self.assertTrue(a_row['Sender'] is None)
|
|
||||||
#self.assertTrue(a_row['Recipient'] is None)
|
|
||||||
#self.assertTrue(a_row['Amount'] is None)
|
|
||||||
# {16}
|
|
||||||
for column, expression in (
|
|
||||||
('Sender',
|
|
||||||
'cells["Column 1"].value.partition(" from ")[2].partition(" on ")[0]'),
|
|
||||||
('Recipient',
|
|
||||||
'cells["Column 1"].value.partition(" received ")[0]'),
|
|
||||||
('Amount',
|
|
||||||
'cells["Column 1"].value.partition(" received ")[2].partition(" from ")[0]')
|
|
||||||
):
|
|
||||||
self.project.text_transform(column, expression)
|
|
||||||
self.assertInResponse('2 cells')
|
|
||||||
# {17}
|
|
||||||
transaction_facet.reset()
|
|
||||||
# {18}
|
|
||||||
self.project.text_transform('Column 1', 'value.partition(" on ")[2]')
|
|
||||||
self.assertInResponse('4 cells')
|
|
||||||
# {19}
|
|
||||||
self.project.reorder_columns(['Transaction', 'Amount', 'Sender',
|
|
||||||
'Recipient'])
|
|
||||||
self.assertInResponse('Reorder columns')
|
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestTransposeVariableNumberOfRowsIntoColumns(
|
|
||||||
refinetest.RefineTestCase):
|
|
||||||
project_file = 'variable-rows.csv'
|
|
||||||
project_format = 'text/line-based'
|
|
||||||
project_options = {'header_lines': 0}
|
|
||||||
|
|
||||||
def test_transpose_variable_number_of_rows_into_columns(self):
|
|
||||||
# {20}, {21}
|
|
||||||
if self.server.version not in ('2.0', '2.1') :
|
|
||||||
self.project.rename_column('Column 1', 'Column')
|
|
||||||
self.project.add_column(
|
|
||||||
'Column', 'First Line', 'if(value.contains(" on "), value, null)')
|
|
||||||
self.assertInResponse('Column by filling 4 rows')
|
|
||||||
response = self.project.get_rows()
|
|
||||||
first_names = [row['First Line'][0:10] if row['First Line'] else None
|
|
||||||
for row in response.rows]
|
|
||||||
self.assertEqual(first_names, [
|
|
||||||
'Tom Dalton', None, None, None,
|
|
||||||
'Morgan Law', None, None, None, None, 'Eric Batem'])
|
|
||||||
# {22}
|
|
||||||
self.project.move_column('First Line', 0)
|
|
||||||
self.assertInResponse('Move column First Line to position 0')
|
|
||||||
self.assertEqual(self.project.column_order['First Line'], 0)
|
|
||||||
# {23}
|
|
||||||
self.project.engine.mode = 'record-based'
|
|
||||||
response = self.project.get_rows()
|
|
||||||
self.assertEqual(response.mode, 'record-based')
|
|
||||||
self.assertEqual(response.filtered, 4)
|
|
||||||
# {24}
|
|
||||||
self.project.add_column(
|
|
||||||
'Column', 'Status', 'row.record.cells["Column"].value[-1]')
|
|
||||||
self.assertInResponse('filling 18 rows')
|
|
||||||
# {25}
|
|
||||||
self.project.text_transform(
|
|
||||||
'Column', 'row.record.cells["Column"].value[1, -1].join("|")')
|
|
||||||
self.assertInResponse('18 cells')
|
|
||||||
# {26}
|
|
||||||
self.project.engine.mode = 'fd'
|
|
||||||
# {27}
|
|
||||||
blank_facet = facet.BlankFacet('First Line', selection=True)
|
|
||||||
self.project.remove_rows(blank_facet)
|
|
||||||
self.assertInResponse('Remove 14 rows')
|
|
||||||
self.project.engine.remove_all()
|
|
||||||
# {28}
|
|
||||||
self.project.split_column('Column', separator='|')
|
|
||||||
self.assertInResponse('Split 4 cell(s) in column Column')
|
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestWebScraping(refinetest.RefineTestCase):
|
|
||||||
project_file = 'eli-lilly.csv'
|
|
||||||
|
|
||||||
filter_expr_1 = """
|
|
||||||
forEach(
|
|
||||||
value[2,-2].replace(" ", " ").split("), ("),
|
|
||||||
v,
|
|
||||||
v[0,-1].partition(", '", true).join(":")
|
|
||||||
).join("|")
|
|
||||||
"""
|
|
||||||
filter_expr_2 = """
|
|
||||||
filter(
|
|
||||||
value.split("|"), p, p.partition(":")[0].toNumber() == %d
|
|
||||||
)[0].partition(":")[2]
|
|
||||||
"""
|
|
||||||
|
|
||||||
def test_web_scraping(self):
|
|
||||||
# Section "6. Web Scraping"
|
|
||||||
# {1}, {2}
|
|
||||||
self.project.split_column('key', separator=':')
|
|
||||||
self.assertInResponse('Split 5409 cell(s) in column key')
|
|
||||||
self.project.rename_column('key 1', 'page')
|
|
||||||
self.assertInResponse('Rename column key 1 to page')
|
|
||||||
self.project.rename_column('key 2', 'top')
|
|
||||||
self.assertInResponse('Rename column key 2 to top')
|
|
||||||
self.project.move_column('line', 'end')
|
|
||||||
self.assertInResponse('Move column line to position 2')
|
|
||||||
# {3}
|
|
||||||
self.project.sorting = facet.Sorting([
|
|
||||||
{'column': 'page', 'valueType': 'number'},
|
|
||||||
{'column': 'top', 'valueType': 'number'},
|
|
||||||
])
|
|
||||||
self.project.reorder_rows()
|
|
||||||
self.assertInResponse('Reorder rows')
|
|
||||||
first_row = self.project.get_rows(limit=1).rows[0]
|
|
||||||
self.assertEqual(first_row['page'], 1)
|
|
||||||
self.assertEqual(first_row['top'], 24)
|
|
||||||
# {4}
|
|
||||||
filter_facet = facet.TextFilterFacet('line', 'ahman')
|
|
||||||
rows = self.project.get_rows(filter_facet).rows
|
|
||||||
self.assertEqual(len(rows), 1)
|
|
||||||
self.assertEqual(rows[0]['top'], 106)
|
|
||||||
filter_facet.query = 'alvarez'
|
|
||||||
rows = self.project.get_rows().rows
|
|
||||||
self.assertEqual(len(rows), 2)
|
|
||||||
self.assertEqual(rows[-1]['top'], 567)
|
|
||||||
self.project.engine.remove_all()
|
|
||||||
# {5} - tutorial says 'line'; it means 'top'
|
|
||||||
line_facet = facet.NumericFacet('top')
|
|
||||||
line_facet.to = 100
|
|
||||||
self.project.remove_rows(line_facet)
|
|
||||||
self.assertInResponse('Remove 775 rows')
|
|
||||||
line_facet.From = 570
|
|
||||||
line_facet.to = 600
|
|
||||||
self.project.remove_rows(line_facet)
|
|
||||||
self.assertInResponse('Remove 71 rows')
|
|
||||||
line_facet.reset()
|
|
||||||
response = self.project.get_rows()
|
|
||||||
self.assertEqual(response.filtered, 4563)
|
|
||||||
# {6}
|
|
||||||
page_facet = facet.TextFacet('page', 1) # 1 not '1'
|
|
||||||
self.project.engine.add_facet(page_facet)
|
|
||||||
# {7}
|
|
||||||
rows = self.project.get_rows().rows
|
|
||||||
# Look for a row with a name in it by skipping HTML
|
|
||||||
name_row = [row for row in rows if '<b>' not in row['line']][0]
|
|
||||||
self.assertTrue('WELLNESS' in name_row['line'])
|
|
||||||
self.assertEqual(name_row['top'], 161)
|
|
||||||
line_facet.From = 20
|
|
||||||
line_facet.to = 160
|
|
||||||
self.project.remove_rows()
|
|
||||||
self.assertInResponse('Remove 9 rows')
|
|
||||||
self.project.engine.remove_all()
|
|
||||||
# {8}
|
|
||||||
self.project.text_transform('line', expression=self.filter_expr_1)
|
|
||||||
self.assertInResponse('Text transform on 4554 cells in column line')
|
|
||||||
# {9} - XXX following is generating Java exceptions
|
|
||||||
#filter_expr = self.filter_expr_2 % 16
|
|
||||||
#self.project.add_column('line', 'Name', expression=filter_expr)
|
|
||||||
# {10} to the final {19} - nothing new in terms of exercising the API.
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
Loading…
Reference in New Issue