openrefine-client/google/test/test_refine.py

221 lines
8.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# encoding: utf-8
"""
test_refine.py
Created by Paul Makepeace on 2011-04-22.
Copyright (c) 2011 Real Programmers. All rights reserved.
"""
import sys
import os
import unittest
from google.refine import REFINE_HOST, REFINE_PORT
from google.refine import NumericFacet, TextFacet, Engine
from google.refine import RefineServer, Refine, RefineProject
from google.refine import to_camel, from_camel
PATH_TO_TEST_DATA = os.path.join('google', 'test', 'data')
class CamelTest(unittest.TestCase):
def test_to_camel(self):
pairs = (
('this', 'this'),
('this_attr', 'thisAttr'),
('From', 'from'),
)
for attr, camel_attr in pairs:
self.assertEqual(to_camel(attr), camel_attr)
def test_from_camel(self):
pairs = (
('this', 'this'),
('This', 'this'),
('thisAttr', 'this_attr'),
('ThisAttr', 'this_attr'),
('From', 'from'),
)
for camel_attr, attr in pairs:
self.assertEqual(from_camel(camel_attr), attr)
class RefineTestCase(unittest.TestCase):
project_file = None
project = None
# Section "2. Exploration using Facets": {1}, {2}
def setUp(self):
self.server = RefineServer()
self.refine = Refine(self.server)
if self.project_file:
self.project = self.refine.new_project(
os.path.join(PATH_TO_TEST_DATA, self.project_file))
def tearDown(self):
if self.project:
self.project.delete()
self.project = None
class RefineServerTest(RefineTestCase):
def test_init(self):
self.assertEqual(self.server.server, 'http://%s:%s' % (REFINE_HOST, REFINE_PORT))
server = RefineServer('http://refine.example/')
self.assertEqual(server.server, 'http://refine.example')
def test_list_projects(self):
projects = self.refine.list_projects()
self.assertTrue(isinstance(projects, dict))
def test_get_version(self):
version_info = self.refine.get_version()
for item in ('revision', 'version', 'full_version', 'full_name'):
self.assertTrue(item in version_info)
class RefineTest(RefineTestCase):
project_file = 'duplicates.csv'
def test_new_project(self):
self.assertTrue(isinstance(self.project, RefineProject))
def test_get_models(self):
self.assertEqual(self.project.key_column, 'email')
self.assertTrue('email' in self.project.columns)
self.assertEqual(self.project.column_index['name'], 1)
def test_delete_project(self):
self.assertTrue(self.project.delete())
class TutorialTestFacets(RefineTestCase):
project_file = 'louisiana-elected-officials.csv'
def test_get_rows(self):
# Section "2. Exploration using Facets": {3}
response = self.project.get_rows(limit=10)
self.assertEqual(len(response.rows), 10)
self.assertEqual(response.limit, 10)
self.assertEqual(response.total, 6958)
for row in response.rows:
self.assertFalse(row.flagged)
self.assertFalse(row.starred)
def test_facet(self):
# Section "2. Exploration using Facets": {4}
party_code_facet = TextFacet(column='Party Code')
response = self.project.compute_facets(party_code_facet)
pc = response.facets[0]
self.assertEqual(pc.name, 'Party Code')
self.assertEqual(pc.choices['D'].count, 3700)
self.assertEqual(pc.choices['N'].count, 15)
self.assertEqual(pc.blank_choice.count, 1446)
# {5}, {6}
engine = Engine(party_code_facet)
ethnicity_facet = TextFacet(column='Ethnicity')
engine.add_facet(ethnicity_facet)
self.project.engine = engine
response = self.project.compute_facets()
e = response.facets[1]
self.assertEqual(e.choices['B'].count, 1255)
self.assertEqual(e.choices['W'].count, 4469)
# {7}
ethnicity_facet.include('B')
response = self.project.get_rows()
self.assertEqual(response.filtered, 1255)
indexes = [r.index for r in response.rows]
self.assertEqual(indexes, [1, 2, 3, 4, 6, 12, 18, 26, 28, 32])
# {8}
response = self.project.compute_facets()
pc = response.facets[0]
self.assertEqual(pc.name, 'Party Code')
self.assertEqual(pc.choices['D'].count, 1179)
self.assertEqual(pc.choices['R'].count, 11)
self.assertEqual(pc.blank_choice.count, 46)
# {9}
party_code_facet.include('R')
response = self.project.compute_facets()
e = response.facets[1]
self.assertEqual(e.choices['B'].count, 11)
# {10}
party_code_facet.reset()
ethnicity_facet.reset()
response = self.project.get_rows()
self.assertEqual(response.filtered, 6958)
# {11}
office_title_facet = TextFacet('Office Title')
self.project.engine.add_facet(office_title_facet)
response = self.project.compute_facets()
self.assertEqual(len(response.facets[2].choices), 76)
# {12} - XXX not sure how to interpret bins & baseBins yet
office_level_facet = NumericFacet('Office Level')
self.project.engine.add_facet(office_level_facet)
# {13}
office_level_facet.From = 300 # from reserved word
office_level_facet.to = 320
response = self.project.get_rows()
self.assertEqual(response.filtered, 1907)
response = self.project.compute_facets()
ot = response.facets[2] # Office Title
self.assertEqual(len(ot.choices), 21)
self.assertEqual(ot.choices['Chief of Police'].count, 2)
self.assertEqual(ot.choices['Chief of Police '].count, 211)
2011-04-25 00:51:21 +02:00
# {14}
self.project.engine.remove_all()
response = self.project.get_rows()
self.assertEqual(response.filtered, 6958)
# {15}
phone_facet = TextFacet('Phone', expression='value[0, 3]')
self.project.engine.add_facet(phone_facet)
response = self.project.compute_facets()
p = response.facets[0]
self.assertEqual(p.expression, 'value[0, 3]')
self.assertEqual(p.choices['318'].count, 2331)
# {16}
commissioned_date_facet = NumericFacet('Commissioned Date',
expression='value.toDate().datePart("year")')
self.project.engine.add_facet(commissioned_date_facet)
response = self.project.compute_facets()
cd = response.facets[1]
self.assertEqual(cd.error_count, 959)
self.assertEqual(cd.numeric_count, 5999)
# {17}
office_description_facet = NumericFacet('Office Description',
expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()')
self.project.engine.add_facet(office_description_facet)
response = self.project.compute_facets()
cd = response.facets[2]
self.assertEqual(cd.min, 0)
self.assertEqual(cd.max, 110)
self.assertEqual(cd.numeric_count, 548)
class TutorialTestTransformAndClustering(RefineTestCase):
project_file = 'louisiana-elected-officials.csv'
def test_transform(self):
# Section "3. Cell Editing": {1}
self.project.engine.remove_all() # redundant due to setUp
# {2}
response = self.project.text_transform(column='Zip Code 2',
expression='value.toString()[0, 5]')
self.assertTrue('6067' in response['historyEntry']['description'])
# {3} - XXX history
# {4}
office_title_facet = TextFacet('Office Title')
self.project.engine.add_facet(office_title_facet)
response = self.project.compute_facets()
self.assertEqual(len(response.facets[0].choices), 76)
response = self.project.text_transform(column='Office Title',
expression='value.trim()')
2011-04-25 02:45:53 +02:00
self.assertTrue('6895' in response['historyEntry']['description'])
response = self.project.compute_facets()
self.assertEqual(len(response.facets[0].choices), 67)
2011-04-25 02:45:53 +02:00
# {5}
response = self.project.edit(column='Office Title',
'Councilmen', 'Councilman')
self.assertTrue('13' in response['historyEntry']['description'])
if __name__ == '__main__':
unittest.main()