delete old tests (will be reimplemented)

This commit is contained in:
Felix Lohmeier 2020-08-01 16:23:06 +02:00
parent cca067a87c
commit bb9792fd15
30 changed files with 22 additions and 15690 deletions

View File

@ -1,4 +1,2 @@
include README.md include README.md
include COPYING.txt include COPYING.txt
recursive-include tests/data *.csv
recursive-include tests *.py

View File

View File

@ -1,11 +0,0 @@
email,name,state,gender,purchase,count,date
danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: 📺),1,"Wed, 4 Jul 2001"
melanie.white@example2.edu,Melanie White,NC,F,<iPhone>,1,2001-07-04T12:08:56
danny.baron@example1.com, D. ("Tab") Baron,CA,M,Winter jacket,1,2001-07-04
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04
arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07
danny.baron@example1.com,Daniel Baron,,,Bike,1,2001
jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000
melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999
ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998
arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997
Can't render this file because it contains an unexpected character in line 4 and column 33.

View File

@ -1,11 +1,11 @@
email,name,state,gender,purchase email,name,state,gender,purchase,count,date
danny.baron@example1.com,Danny Baron,CA,M,TV danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: 📺),1,"Wed, 4 Jul 2001"
melanie.white@example2.edu,Melanie White,NC,F,iPhone melanie.white@example2.edu,Melanie White,NC,F,<iPhone>,1,2001-07-04T12:08:56
danny.baron@example1.com,D. Baron,CA,M,Winter jacket danny.baron@example1.com, D. ("Tab") Baron,CA,M,Winter jacket,1,2001-07-04
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04
arthur.duff@example4.com,Arthur Duff,OR,M,Dining table arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07
danny.baron@example1.com,Daniel Baron,CA,M,Bike danny.baron@example1.com,Daniel Baron,,,Bike,1,2001
jean.griffith@example5.org,Jean Griffith,WA,F,Power drill jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000
melanie.white@example2.edu,Melanie White,NC,F,iPad melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999
ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998
arthur.duff@example4.com,Arthur Duff,OR,M,Night table arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997

Can't render this file because it contains an unexpected character in line 4 and column 33.

View File

@ -0,0 +1,11 @@
email,name,state,gender,purchase
danny.baron@example1.com,Danny Baron,CA,M,TV
melanie.white@example2.edu,Melanie White,NC,F,iPhone
danny.baron@example1.com,D. Baron,CA,M,Winter jacket
ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight
arthur.duff@example4.com,Arthur Duff,OR,M,Dining table
danny.baron@example1.com,Daniel Baron,CA,M,Bike
jean.griffith@example5.org,Jean Griffith,WA,F,Power drill
melanie.white@example2.edu,Melanie White,NC,F,iPad
ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier
arthur.duff@example4.com,Arthur Duff,OR,M,Night table
1 email name state gender purchase
2 danny.baron@example1.com Danny Baron CA M TV
3 melanie.white@example2.edu Melanie White NC F iPhone
4 danny.baron@example1.com D. Baron CA M Winter jacket
5 ben.tyler@example3.org Ben Tyler NV M Flashlight
6 arthur.duff@example4.com Arthur Duff OR M Dining table
7 danny.baron@example1.com Daniel Baron CA M Bike
8 jean.griffith@example5.org Jean Griffith WA F Power drill
9 melanie.white@example2.edu Melanie White NC F iPad
10 ben.morisson@example6.org Ben Morisson FL M Amplifier
11 arthur.duff@example4.com Arthur Duff OR M Night table

File diff suppressed because it is too large Load Diff

View File

@ -1,16 +0,0 @@
Tom Dalton sent $3700 to Betty Whitehead on 01/17/2009
377 El Camino Real
"San Jose, CA"
Status: received
Morgan Lawless received $10500 from Bob Henselman on 02/05/2009
2798 Lancaster Dr.
"New York, NY"
Status: deposited
Eric Bateman sent $22000 to Liz Benedict on 03/02/2009
89 Deerfield Cr.
"Springfield, WA"
Status: received
Robert Hartfort received $20000 from Ron Ingleman on 03/28/2009
198 Broadway Ave.
"Saratoga, CA"
Status: unknown
1 Tom Dalton sent $3700 to Betty Whitehead on 01/17/2009
2 377 El Camino Real
3 San Jose, CA
4 Status: received
5 Morgan Lawless received $10500 from Bob Henselman on 02/05/2009
6 2798 Lancaster Dr.
7 New York, NY
8 Status: deposited
9 Eric Bateman sent $22000 to Liz Benedict on 03/02/2009
10 89 Deerfield Cr.
11 Springfield, WA
12 Status: received
13 Robert Hartfort received $20000 from Ron Ingleman on 03/28/2009
14 198 Broadway Ave.
15 Saratoga, CA
16 Status: unknown

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +0,0 @@
Tom Dalton sent $3700 to Betty Whitehead on 01/17/2009
377 El Camino Real
"San Jose, CA"
Status: received
Morgan Lawless received $10500 from Bob Henselman on 02/05/2009
2798 Lancaster Dr.
"New York, NY"
(000) 555-6717
Status: deposited
Eric Bateman sent $22000 to Liz Benedict on 03/02/2009
89 Deerfield Cr.
"Springfield, WA"
(000) 555-1411
Status: received
Robert Hartfort received $20000 from Ron Ingleman on 03/28/2009
198 Broadway Ave.
"Saratoga, CA"
Status: unknown
1 Tom Dalton sent $3700 to Betty Whitehead on 01/17/2009
2 377 El Camino Real
3 San Jose, CA
4 Status: received
5 Morgan Lawless received $10500 from Bob Henselman on 02/05/2009
6 2798 Lancaster Dr.
7 New York, NY
8 (000) 555-6717
9 Status: deposited
10 Eric Bateman sent $22000 to Liz Benedict on 03/02/2009
11 89 Deerfield Cr.
12 Springfield, WA
13 (000) 555-1411
14 Status: received
15 Robert Hartfort received $20000 from Ron Ingleman on 03/28/2009
16 198 Broadway Ave.
17 Saratoga, CA
18 Status: unknown

View File

@ -1,52 +0,0 @@
#!/usr/bin/env python
"""
refinetest.py
RefineTestCase is a base class that loads Refine projects specified by
the class's 'project_file' attribute and provides a 'project' object.
These tests require a connection to a Refine server either at
http://127.0.0.1:3333/ or by specifying environment variables REFINE_HOST
and REFINE_PORT.
"""
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
import os
import unittest
from google.refine import refine
PATH_TO_TEST_DATA = os.path.join(os.path.dirname(__file__), 'data')
#noinspection PyPep8Naming
class RefineTestCase(unittest.TestCase):
project_file = None
project_format = 'text/line-based/*sv'
project_options = {}
project = None
# Section "2. Exploration using Facets": {1}, {2}
def project_path(self):
return os.path.join(PATH_TO_TEST_DATA, self.project_file)
def setUp(self):
self.server = refine.RefineServer()
self.refine = refine.Refine(self.server)
if self.project_file:
self.project = self.refine.new_project(
project_file=self.project_path(), project_format=self.project_format, **self.project_options)
def tearDown(self):
if self.project:
self.project.delete()
self.project = None
def assertInResponse(self, expect):
desc = None
try:
desc = self.project.history_entry.description
self.assertTrue(expect in desc)
except AssertionError:
raise AssertionError('Expecting "%s" in "%s"' % (expect, desc))

View File

@ -1,148 +0,0 @@
#!/usr/bin/env python
"""
test_facet.py
"""
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
import json
import unittest
from google.refine.facet import *
class CamelTest(unittest.TestCase):
def test_to_camel(self):
pairs = (
('this', 'this'),
('this_attr', 'thisAttr'),
('From', 'from'),
)
for attr, camel_attr in pairs:
self.assertEqual(to_camel(attr), camel_attr)
def test_from_camel(self):
pairs = (
('this', 'this'),
('This', 'this'),
('thisAttr', 'this_attr'),
('ThisAttr', 'this_attr'),
('From', 'from'),
)
for camel_attr, attr in pairs:
self.assertEqual(from_camel(camel_attr), attr)
class FacetTest(unittest.TestCase):
def test_init(self):
facet = TextFacet('column name')
engine = Engine(facet)
self.assertEqual(facet.selection, [])
self.assertTrue(str(engine))
facet = NumericFacet('column name', From=1, to=5)
self.assertEqual(facet.to, 5)
self.assertEqual(facet.From, 1)
facet = StarredFacet()
self.assertEqual(facet.expression, 'row.starred')
facet = StarredFacet(True)
self.assertEqual(facet.selection[0]['v']['v'], True)
facet = FlaggedFacet(False)
self.assertEqual(facet.selection[0]['v']['v'], False)
self.assertRaises(ValueError, FlaggedFacet, 'false') # no strings
facet = TextFilterFacet('column name', 'query')
self.assertEqual(facet.query, 'query')
def test_selections(self):
facet = TextFacet('column name')
facet.include('element')
self.assertEqual(len(facet.selection), 1)
facet.include('element 2')
self.assertEqual(len(facet.selection), 2)
facet.exclude('element')
self.assertEqual(len(facet.selection), 1)
facet.reset()
self.assertEqual(len(facet.selection), 0)
facet.include('element').include('element 2')
self.assertEqual(len(facet.selection), 2)
class EngineTest(unittest.TestCase):
def test_init(self):
engine = Engine()
self.assertEqual(engine.mode, 'row-based')
engine.mode = 'record-based'
self.assertEqual(engine.mode, 'record-based')
engine.set_facets(BlankFacet)
self.assertEqual(engine.mode, 'record-based')
engine.set_facets(BlankFacet, BlankFacet)
self.assertEqual(len(engine), 2)
def test_serialize(self):
engine = Engine()
engine_json = engine.as_json()
self.assertEqual(engine_json, '{"facets": [], "mode": "row-based"}')
facet = TextFacet(column='column')
self.assertEqual(facet.as_dict(), {'selectError': False, 'name': 'column', 'selection': [], 'expression': 'value', 'invert': False, 'columnName': 'column', 'selectBlank': False, 'omitBlank': False, 'type': 'list', 'omitError': False})
facet = NumericFacet(column='column', From=1, to=5)
self.assertEqual(facet.as_dict(), {'from': 1, 'to': 5, 'selectBlank': True, 'name': 'column', 'selectError': True, 'expression': 'value', 'selectNumeric': True, 'columnName': 'column', 'selectNonNumeric': True, 'type': 'range'})
def test_add_facet(self):
facet = TextFacet(column='Party Code')
engine = Engine(facet)
engine.add_facet(TextFacet(column='Ethnicity'))
self.assertEqual(len(engine.facets), 2)
self.assertEqual(len(engine), 2)
def test_reset_remove(self):
text_facet1 = TextFacet('column name')
text_facet1.include('element')
text_facet2 = TextFacet('column name 2')
text_facet2.include('element 2')
engine = Engine(text_facet1, text_facet2)
self.assertEqual(len(engine), 2)
self.assertEqual(len(text_facet1.selection), 1)
engine.reset_all()
self.assertEqual(len(text_facet1.selection), 0)
self.assertEqual(len(text_facet2.selection), 0)
engine.remove_all()
self.assertEqual(len(engine), 0)
class SortingTest(unittest.TestCase):
def test_sorting(self):
sorting = Sorting()
self.assertEqual(sorting.as_json(), '{"criteria": []}')
sorting = Sorting('email')
c = sorting.criteria[0]
self.assertEqual(c['column'], 'email')
self.assertEqual(c['valueType'], 'string')
self.assertEqual(c['reverse'], False)
self.assertEqual(c['caseSensitive'], False)
self.assertEqual(c['errorPosition'], 1)
self.assertEqual(c['blankPosition'], 2)
sorting = Sorting(['email', 'gender'])
self.assertEqual(len(sorting), 2)
sorting = Sorting(['email', {'column': 'date', 'valueType': 'date'}])
self.assertEqual(len(sorting), 2)
c = sorting.criteria[1]
self.assertEqual(c['column'], 'date')
self.assertEqual(c['valueType'], 'date')
class FacetsResponseTest(unittest.TestCase):
response = """{"facets":[{"name":"Party Code","expression":"value","columnName":"Party Code","invert":false,"choices":[{"v":{"v":"D","l":"D"},"c":3700,"s":false},{"v":{"v":"R","l":"R"},"c":1613,"s":false},{"v":{"v":"N","l":"N"},"c":15,"s":false},{"v":{"v":"O","l":"O"},"c":184,"s":false}],"blankChoice":{"s":false,"c":1446}}],"mode":"row-based"}"""
def test_facet_response(self):
party_code_facet = TextFacet('Party Code')
engine = Engine(party_code_facet)
facets = engine.facets_response(json.loads(self.response)).facets
self.assertEqual(facets[0].choices['D'].count, 3700)
self.assertEqual(facets[0].blank_choice.count, 1446)
self.assertEqual(facets[party_code_facet], facets[0])
# test iteration
facet = [f for f in facets][0]
self.assertEqual(facet, facets[0])
if __name__ == '__main__':
unittest.main()

View File

@ -1,31 +0,0 @@
#!/usr/bin/env python
"""
test_history.py
"""
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
import unittest
from google.refine.history import *
class HistoryTest(unittest.TestCase):
def test_init(self):
response = {
u"code": "ok",
u"historyEntry": {
u"id": 1303851435223,
u"description": "Split 4 cells",
u"time": "2011-04-26T16:45:08Z"
}
}
he = response['historyEntry']
entry = HistoryEntry(he['id'], he['time'], he['description'])
self.assertEqual(entry.id, 1303851435223)
self.assertEqual(entry.description, 'Split 4 cells')
self.assertEqual(entry.time, '2011-04-26T16:45:08Z')
if __name__ == '__main__':
unittest.main()

View File

@ -1,80 +0,0 @@
#!/usr/bin/env python
"""
test_refine.py
These tests require a connection to a Refine server either at
http://127.0.0.1:3333/ or by specifying environment variables
OPENREFINE_HOST and OPENREFINE_PORT.
"""
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
import csv
import unittest
from google.refine import refine
import refinetest
from io import StringIO
class RefineServerTest(refinetest.RefineTestCase):
def test_init(self):
server_url = 'http://' + refine.REFINE_HOST
if refine.REFINE_PORT != '80':
server_url += ':' + refine.REFINE_PORT
self.assertEqual(self.server.server, server_url)
self.assertEqual(refine.RefineServer.url(), server_url)
# strip trailing /
server = refine.RefineServer('http://refine.example/')
self.assertEqual(server.server, 'http://refine.example')
def test_list_projects(self):
projects = self.refine.list_projects()
self.assertTrue(isinstance(projects, dict))
def test_get_version(self):
version_info = self.server.get_version()
for item in ('revision', 'version', 'full_version', 'full_name'):
self.assertTrue(item in version_info)
def test_version(self):
self.assertTrue(self.server.version in ('3.2'))
class RefineTest(refinetest.RefineTestCase):
project_file = 'duplicates.csv'
def test_new_project(self):
self.assertTrue(isinstance(self.project, refine.RefineProject))
def test_wait_until_idle(self):
self.project.wait_until_idle() # should just return
def test_get_models(self):
self.assertEqual(self.project.key_column, 'email')
self.assertTrue('email' in self.project.columns)
self.assertTrue('email' in self.project.column_order)
self.assertEqual(self.project.column_order['name'], 1)
def test_delete_project(self):
self.assertTrue(self.project.delete())
def test_open_export(self):
response = refine.RefineProject(self.project.project_url()).export()
lines = response.text.splitlines()
self.assertTrue('email' in lines[0])
for line in lines[1:]:
self.assertTrue('M' in line or 'F' in line)
def test_open_export_csv(self):
response = refine.RefineProject(self.project.project_url()).export()
csv_fp = csv.reader(StringIO(response.text), dialect='excel-tab')
row = csv_fp.__next__()
self.assertTrue(row[0] == 'email')
for row in csv_fp:
self.assertTrue(row[3] == 'F' or row[3] == 'M')
if __name__ == '__main__':
unittest.main()

View File

@ -1,81 +0,0 @@
#!/usr/bin/env python3
"""
test_refine_small.py
"""
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
import unittest
from google.refine import refine
class RefineRowsTest(unittest.TestCase):
def test_rows_response(self):
rr = refine.RowsResponseFactory({
u'gender': 3, u'state': 2, u'purchase': 4, u'email': 0,
u'name': 1})
response = rr({
u'rows': [{
u'i': 0,
u'cells': [
{u'v': u'danny.baron@example1.com'},
{u'v': u'Danny Baron'},
{u'v': u'CA'},
{u'v': u'M'},
{u'v': u'TV'}
],
u'starred': False,
u'flagged': False
}],
u'start': 0,
u'limit': 1,
u'mode': u'row-based',
u'filtered': 10,
u'total': 10,
})
self.assertEqual(len(response.rows), 1)
# test iteration
rows = [row for row in response.rows]
self.assertEqual(rows[0]['name'], 'Danny Baron')
# test indexing
self.assertEqual(response.rows[0]['name'], 'Danny Baron')
class RefineProjectTest(unittest.TestCase):
def setUp(self):
# Mock out get_models so it doesn't attempt to connect to a server
self._get_models = refine.RefineProject.get_models
refine.RefineProject.get_models = lambda me: me
# Save REFINE_{HOST,PORT} as tests overwrite it
self._refine_host_port = refine.REFINE_HOST, refine.REFINE_PORT
refine.REFINE_HOST, refine.REFINE_PORT = '127.0.0.1', '3333'
def test_server_init(self):
RP = refine.RefineProject
p = RP('http://127.0.0.1:3333/project?project=1658955153749')
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
self.assertEqual(p.project_id, '1658955153749')
p = RP('http://127.0.0.1:3333', '1658955153749')
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
self.assertEqual(p.project_id, '1658955153749')
p = RP('http://server/varnish/project?project=1658955153749')
self.assertEqual(p.server.server, 'http://server/varnish')
self.assertEqual(p.project_id, '1658955153749')
p = RP('1658955153749')
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
self.assertEqual(p.project_id, '1658955153749')
refine.REFINE_HOST = '10.0.0.1'
refine.REFINE_PORT = '80'
p = RP('1658955153749')
self.assertEqual(p.server.server, 'http://10.0.0.1')
def tearDown(self):
# Restore mocked get_models
refine.RefineProject.get_models = self._get_models
# Restore values for REFINE_{HOST,PORT}
refine.REFINE_HOST, refine.REFINE_PORT = self._refine_host_port
if __name__ == '__main__':
unittest.main()

View File

@ -1,490 +0,0 @@
#!/usr/bin/env python
"""
test_tutorial.py
The tests here are based on David Huynh's Refine tutorial at
http://davidhuynh.net/spaces/nicar2011/tutorial.pdf The tests perform all the
Refine actions given in the tutorial (except the web scraping) and verify the
changes expected to be observed explained in the tutorial.
These tests require a connection to a Refine server either at
http://127.0.0.1:3333/ or by specifying environment variables
OPENREFINE_HOST and OPENREFINE_PORT.
"""
# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved.
import unittest
from google.refine import facet
import refinetest
class TutorialTestFacets(refinetest.RefineTestCase):
project_file = 'louisiana-elected-officials.csv'
project_options = {'guess_cell_value_types': True}
def test_get_rows(self):
# Section "2. Exploration using Facets": {3}
response = self.project.get_rows(limit=10)
self.assertEqual(len(response.rows), 10)
self.assertEqual(response.limit, 10)
self.assertEqual(response.total, 6958)
self.assertEqual(response.filtered, 6958)
for row in response.rows:
self.assertFalse(row.flagged)
self.assertFalse(row.starred)
def test_facet(self):
# Section "2. Exploration using Facets": {4}
party_code_facet = facet.TextFacet(column='Party Code')
response = self.project.compute_facets(party_code_facet)
pc = response.facets[0]
# test look by index same as look up by facet object
self.assertEqual(pc, response.facets[party_code_facet])
self.assertEqual(pc.name, 'Party Code')
self.assertEqual(pc.choices['D'].count, 3700)
self.assertEqual(pc.choices['N'].count, 15)
self.assertEqual(pc.blank_choice.count, 1446)
# {5}, {6}
engine = facet.Engine(party_code_facet)
ethnicity_facet = facet.TextFacet(column='Ethnicity')
engine.add_facet(ethnicity_facet)
self.project.engine = engine
response = self.project.compute_facets()
e = response.facets[ethnicity_facet]
self.assertEqual(e.choices['B'].count, 1255)
self.assertEqual(e.choices['W'].count, 4469)
# {7}
ethnicity_facet.include('B')
response = self.project.get_rows()
self.assertEqual(response.filtered, 1255)
indexes = [row.index for row in response.rows]
self.assertEqual(indexes, [1, 2, 3, 4, 6, 12, 18, 26, 28, 32])
# {8}
response = self.project.compute_facets()
pc = response.facets[party_code_facet]
self.assertEqual(pc.name, 'Party Code')
self.assertEqual(pc.choices['D'].count, 1179)
self.assertEqual(pc.choices['R'].count, 11)
self.assertEqual(pc.blank_choice.count, 46)
# {9}
party_code_facet.include('R')
response = self.project.compute_facets()
e = response.facets[ethnicity_facet]
self.assertEqual(e.choices['B'].count, 11)
# {10}
party_code_facet.reset()
ethnicity_facet.reset()
response = self.project.get_rows()
self.assertEqual(response.filtered, 6958)
# {11}
office_title_facet = facet.TextFacet('Office Title')
self.project.engine.add_facet(office_title_facet)
response = self.project.compute_facets()
self.assertEqual(len(response.facets[2].choices), 76)
# {12} - XXX not sure how to interpret bins & baseBins yet
office_level_facet = facet.NumericFacet('Office Level')
self.project.engine.add_facet(office_level_facet)
# {13}
office_level_facet.From = 300 # from reserved word
office_level_facet.to = 320
response = self.project.get_rows()
self.assertEqual(response.filtered, 1907)
response = self.project.compute_facets()
ot = response.facets[office_title_facet]
self.assertEqual(len(ot.choices), 21)
self.assertEqual(ot.choices['Chief of Police'].count, 2)
self.assertEqual(ot.choices['Chief of Police '].count, 211)
# {14}
self.project.engine.remove_all()
response = self.project.get_rows()
self.assertEqual(response.filtered, 6958)
# {15}
phone_facet = facet.TextFacet('Phone', expression='value[0, 3]')
self.project.engine.add_facet(phone_facet)
response = self.project.compute_facets()
p = response.facets[phone_facet]
self.assertEqual(p.expression, 'value[0, 3]')
self.assertEqual(p.choices['318'].count, 2331)
# {16}
commissioned_date_facet = facet.NumericFacet(
'Commissioned Date',
expression='value.toDate().datePart("year")')
self.project.engine.add_facet(commissioned_date_facet)
response = self.project.compute_facets()
cd = response.facets[commissioned_date_facet]
self.assertEqual(cd.error_count, 959)
self.assertEqual(cd.numeric_count, 5999)
# {17}
office_description_facet = facet.NumericFacet(
'Office Description',
expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()')
self.project.engine.add_facet(office_description_facet)
response = self.project.compute_facets()
od = response.facets[office_description_facet]
self.assertEqual(od.min, 0)
self.assertEqual(od.max, 110)
self.assertEqual(od.numeric_count, 548)
class TutorialTestEditing(refinetest.RefineTestCase):
project_file = 'louisiana-elected-officials.csv'
project_options = {'guess_cell_value_types': True}
def test_editing(self):
# Section "3. Cell Editing": {1}
self.project.engine.remove_all() # redundant due to setUp
# {2}
self.project.text_transform(column='Zip Code 2',
expression='value.toString()[0, 5]')
self.assertInResponse('transform on 6958 cells in column Zip Code 2')
# {3} - XXX history
# {4}
office_title_facet = facet.TextFacet('Office Title')
self.project.engine.add_facet(office_title_facet)
response = self.project.compute_facets()
self.assertEqual(len(response.facets[office_title_facet].choices), 76)
self.project.text_transform('Office Title', 'value.trim()')
self.assertInResponse('6895')
response = self.project.compute_facets()
self.assertEqual(len(response.facets[office_title_facet].choices), 67)
# {5}
self.project.edit('Office Title', 'Councilmen', 'Councilman')
self.assertInResponse('13')
response = self.project.compute_facets()
self.assertEqual(len(response.facets[office_title_facet].choices), 66)
# {6}
response = self.project.compute_clusters('Office Title')
self.assertTrue(response)
# {7}
clusters = self.project.compute_clusters('Office Title', 'knn')
self.assertEqual(len(clusters), 7)
first_cluster = clusters[0]
self.assertEqual(len(first_cluster), 2)
self.assertEqual(first_cluster[0]['value'], 'DPEC Member at Large')
self.assertEqual(first_cluster[0]['count'], 6)
# Not strictly necessary to repeat 'Council Member' but a test
# of mass_edit, and it's also what the front end sends.
self.project.mass_edit('Office Title', [{
'from': ['Council Member', 'Councilmember'],
'to': 'Council Member'
}])
self.assertInResponse('372')
response = self.project.compute_facets()
self.assertEqual(len(response.facets[office_title_facet].choices), 65)
# Section "4. Row and Column Editing, Batched Row Deletion"
# Test doesn't strictly follow the tutorial as the "Browse this
# cluster" performs a text facet which the server can't complete
# as it busts its max facet count. The useful work is done with
# get_rows(). Also, we can facet & select in one; the UI can't.
# {1}, {2}, {3}, {4}
clusters = self.project.compute_clusters('Candidate Name')
for cluster in clusters[0:3]: # just do a few
for match in cluster:
# {2}
if match['value'].endswith(', '):
response = self.project.get_rows(
facet.TextFacet('Candidate Name', match['value']))
self.assertEqual(len(response.rows), 1)
for row in response.rows:
self.project.star_row(row)
self.assertInResponse(str(row.index + 1))
# {5}, {6}, {7}
response = self.project.compute_facets(facet.StarredFacet(True))
self.assertEqual(len(response.facets[0].choices), 2) # true & false
self.assertEqual(response.facets[0].choices[True].count, 2)
self.project.remove_rows()
self.assertInResponse('2 rows')
class TutorialTestDuplicateDetection(refinetest.RefineTestCase):
project_file = 'duplicates.csv'
def test_duplicate_detection(self):
# Section "4. Row and Column Editing,
# Duplicate Row Detection and Deletion"
# {7}, {8}
response = self.project.get_rows(sort_by='email')
indexes = [row.index for row in response.rows]
self.assertEqual(indexes, [4, 9, 8, 3, 0, 2, 5, 6, 1, 7])
# {9}
self.project.reorder_rows()
self.assertInResponse('Reorder rows')
response = self.project.get_rows()
indexes = [row.index for row in response.rows]
self.assertEqual(indexes, list(range(10)))
# {10}
self.project.add_column(
'email', 'count', 'facetCount(value, "value", "email")')
self.assertInResponse('column email by filling 10 rows')
response = self.project.get_rows()
self.assertEqual(self.project.column_order['email'], 0) # i.e. 1st
self.assertEqual(self.project.column_order['count'], 1) # i.e. 2nd
counts = [row['count'] for row in response.rows]
self.assertEqual(counts, [2, 2, 1, 1, 3, 3, 3, 1, 2, 2])
# {11}
self.assertFalse(self.project.has_records)
self.project.blank_down('email')
self.assertInResponse('Blank down 4 cells')
self.assertTrue(self.project.has_records)
response = self.project.get_rows()
emails = [1 if row['email'] else 0 for row in response.rows]
self.assertEqual(emails, [1, 0, 1, 1, 1, 0, 0, 1, 1, 0])
# {12}
blank_facet = facet.BlankFacet('email', selection=True)
# {13}
self.project.remove_rows(blank_facet)
self.assertInResponse('Remove 4 rows')
self.project.engine.remove_all()
response = self.project.get_rows()
email_counts = [(row['email'], row['count']) for row in response.rows]
self.assertEqual(email_counts, [
(u'arthur.duff@example4.com', 2),
(u'ben.morisson@example6.org', 1),
(u'ben.tyler@example3.org', 1),
(u'danny.baron@example1.com', 3),
(u'jean.griffith@example5.org', 1),
(u'melanie.white@example2.edu', 2)
])
class TutorialTestTransposeColumnsIntoRows(refinetest.RefineTestCase):
project_file = 'us_economic_assistance.csv'
def test_transpose_columns_into_rows(self):
# Section "5. Structural Editing, Transpose Columns into Rows"
# {1}, {2}, {3}
self.project.transpose_columns_into_rows('FY1946', 64, 'pair')
self.assertInResponse('64 column(s) starting with FY1946')
# {4}
self.project.add_column('pair', 'year', 'value[2,6].toNumber()')
self.assertInResponse('filling 26185 rows')
# {5}
self.project.text_transform(
column='pair', expression='value.substring(7).toNumber()')
self.assertInResponse('transform on 26185 cells')
# {6}
self.project.rename_column('pair', 'amount')
self.assertInResponse('Rename column pair to amount')
# {7}
self.project.fill_down('country_name')
self.assertInResponse('Fill down 23805 cells')
self.project.fill_down('program_name')
self.assertInResponse('Fill down 23805 cells')
# spot check of last row for transforms and fill down
response = self.project.get_rows()
row10 = response.rows[9]
self.assertEqual(row10['country_name'], 'Afghanistan')
self.assertEqual(row10['program_name'],
'Department of Defense Security Assistance')
self.assertEqual(row10['amount'], 113777303)
class TutorialTestTransposeFixedNumberOfRowsIntoColumns(
refinetest.RefineTestCase):
project_file = 'fixed-rows.csv'
project_format = 'text/line-based'
project_options = {'header_lines': 0}
def test_transpose_fixed_number_of_rows_into_columns(self):
if self.server.version not in ('2.0', '2.1'):
self.project.rename_column('Column 1', 'Column')
# Section "5. Structural Editing,
# Transpose Fixed Number of Rows into Columns"
# {1}
self.assertTrue('Column' in self.project.column_order)
# {8}
self.project.transpose_rows_into_columns('Column', 4)
self.assertInResponse('Transpose every 4 cells in column Column')
# {9} - renaming column triggers a bug in Refine <= 2.1
if self.server.version not in ('2.0', '2.1'):
self.project.rename_column('Column 2', 'Address')
self.project.rename_column('Column 3', 'Address 2')
self.project.rename_column('Column 4', 'Status')
# {10}
self.project.add_column(
'Column 1', 'Transaction',
'if(value.contains(" sent "), "send", "receive")')
self.assertInResponse('Column 1 by filling 4 rows')
# {11}
transaction_facet = facet.TextFacet(column='Transaction',
selection='send')
self.project.engine.add_facet(transaction_facet)
self.project.compute_facets()
# {12}, {13}, {14}
self.project.add_column(
'Column 1', 'Sender',
'value.partition(" sent ")[0]')
# XXX resetting the facet shows data in rows with Transaction=receive
# which shouldn't have been possible with the facet.
self.project.add_column(
'Column 1', 'Recipient',
'value.partition(" to ")[2].partition(" on ")[0]')
self.project.add_column(
'Column 1', 'Amount',
'value.partition(" sent ")[2].partition(" to ")[0]')
# {15}
transaction_facet.reset().include('receive')
self.project.get_rows()
# XXX there seems to be some kind of bug where the model doesn't
# match get_rows() output - cellIndex being returned that are
# out of range.
#self.assertTrue(a_row['Sender'] is None)
#self.assertTrue(a_row['Recipient'] is None)
#self.assertTrue(a_row['Amount'] is None)
# {16}
for column, expression in (
('Sender',
'cells["Column 1"].value.partition(" from ")[2].partition(" on ")[0]'),
('Recipient',
'cells["Column 1"].value.partition(" received ")[0]'),
('Amount',
'cells["Column 1"].value.partition(" received ")[2].partition(" from ")[0]')
):
self.project.text_transform(column, expression)
self.assertInResponse('2 cells')
# {17}
transaction_facet.reset()
# {18}
self.project.text_transform('Column 1', 'value.partition(" on ")[2]')
self.assertInResponse('4 cells')
# {19}
self.project.reorder_columns(['Transaction', 'Amount', 'Sender',
'Recipient'])
self.assertInResponse('Reorder columns')
class TutorialTestTransposeVariableNumberOfRowsIntoColumns(
refinetest.RefineTestCase):
project_file = 'variable-rows.csv'
project_format = 'text/line-based'
project_options = {'header_lines': 0}
def test_transpose_variable_number_of_rows_into_columns(self):
# {20}, {21}
if self.server.version not in ('2.0', '2.1') :
self.project.rename_column('Column 1', 'Column')
self.project.add_column(
'Column', 'First Line', 'if(value.contains(" on "), value, null)')
self.assertInResponse('Column by filling 4 rows')
response = self.project.get_rows()
first_names = [row['First Line'][0:10] if row['First Line'] else None
for row in response.rows]
self.assertEqual(first_names, [
'Tom Dalton', None, None, None,
'Morgan Law', None, None, None, None, 'Eric Batem'])
# {22}
self.project.move_column('First Line', 0)
self.assertInResponse('Move column First Line to position 0')
self.assertEqual(self.project.column_order['First Line'], 0)
# {23}
self.project.engine.mode = 'record-based'
response = self.project.get_rows()
self.assertEqual(response.mode, 'record-based')
self.assertEqual(response.filtered, 4)
# {24}
self.project.add_column(
'Column', 'Status', 'row.record.cells["Column"].value[-1]')
self.assertInResponse('filling 18 rows')
# {25}
self.project.text_transform(
'Column', 'row.record.cells["Column"].value[1, -1].join("|")')
self.assertInResponse('18 cells')
# {26}
self.project.engine.mode = 'fd'
# {27}
blank_facet = facet.BlankFacet('First Line', selection=True)
self.project.remove_rows(blank_facet)
self.assertInResponse('Remove 14 rows')
self.project.engine.remove_all()
# {28}
self.project.split_column('Column', separator='|')
self.assertInResponse('Split 4 cell(s) in column Column')
class TutorialTestWebScraping(refinetest.RefineTestCase):
project_file = 'eli-lilly.csv'
filter_expr_1 = """
forEach(
value[2,-2].replace("&#160;", " ").split("), ("),
v,
v[0,-1].partition(", '", true).join(":")
).join("|")
"""
filter_expr_2 = """
filter(
value.split("|"), p, p.partition(":")[0].toNumber() == %d
)[0].partition(":")[2]
"""
def test_web_scraping(self):
# Section "6. Web Scraping"
# {1}, {2}
self.project.split_column('key', separator=':')
self.assertInResponse('Split 5409 cell(s) in column key')
self.project.rename_column('key 1', 'page')
self.assertInResponse('Rename column key 1 to page')
self.project.rename_column('key 2', 'top')
self.assertInResponse('Rename column key 2 to top')
self.project.move_column('line', 'end')
self.assertInResponse('Move column line to position 2')
# {3}
self.project.sorting = facet.Sorting([
{'column': 'page', 'valueType': 'number'},
{'column': 'top', 'valueType': 'number'},
])
self.project.reorder_rows()
self.assertInResponse('Reorder rows')
first_row = self.project.get_rows(limit=1).rows[0]
self.assertEqual(first_row['page'], 1)
self.assertEqual(first_row['top'], 24)
# {4}
filter_facet = facet.TextFilterFacet('line', 'ahman')
rows = self.project.get_rows(filter_facet).rows
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]['top'], 106)
filter_facet.query = 'alvarez'
rows = self.project.get_rows().rows
self.assertEqual(len(rows), 2)
self.assertEqual(rows[-1]['top'], 567)
self.project.engine.remove_all()
# {5} - tutorial says 'line'; it means 'top'
line_facet = facet.NumericFacet('top')
line_facet.to = 100
self.project.remove_rows(line_facet)
self.assertInResponse('Remove 775 rows')
line_facet.From = 570
line_facet.to = 600
self.project.remove_rows(line_facet)
self.assertInResponse('Remove 71 rows')
line_facet.reset()
response = self.project.get_rows()
self.assertEqual(response.filtered, 4563)
# {6}
page_facet = facet.TextFacet('page', 1) # 1 not '1'
self.project.engine.add_facet(page_facet)
# {7}
rows = self.project.get_rows().rows
# Look for a row with a name in it by skipping HTML
name_row = [row for row in rows if '<b>' not in row['line']][0]
self.assertTrue('WELLNESS' in name_row['line'])
self.assertEqual(name_row['top'], 161)
line_facet.From = 20
line_facet.to = 160
self.project.remove_rows()
self.assertInResponse('Remove 9 rows')
self.project.engine.remove_all()
# {8}
self.project.text_transform('line', expression=self.filter_expr_1)
self.assertInResponse('Text transform on 4554 cells in column line')
# {9} - XXX following is generating Java exceptions
#filter_expr = self.filter_expr_2 % 16
#self.project.add_column('line', 'Name', expression=filter_expr)
# {10} to the final {19} - nothing new in terms of exercising the API.
if __name__ == '__main__':
unittest.main()