Whitespace & minor renaming to bring in line with PEP8 guidelines

2025-04-13 00:00:34 +02:00 · 2013-10-09 23:04:24 +05:00 · 2013-10-09 23:04:24 +05:00 · bc0a8e7c7b
commit bc0a8e7c7b
parent e9ef9a6d56
10 changed files with 134 additions and 104 deletions
--- a/google/refine/facet.py
+++ b/google/refine/facet.py
@ -28,6 +28,7 @@ def to_camel(attr):
    return (attr[0].lower() +
            re.sub(r'_(.)', lambda x: x.group(1).upper(), attr[1:]))

+
 def from_camel(attr):
    """convert thisAttrName to this_attr_name."""
    # Don't add an underscore for capitalized first letter
@ -35,8 +36,8 @@ def from_camel(attr):


 class Facet(object):
-    def __init__(self, column, type, **options):
-        self.type = type
+    def __init__(self, column, facet_type, **options):
+        self.type = facet_type
        self.name = column
        self.column_name = column
        for k, v in options.items():
@ -50,7 +51,7 @@ class Facet(object):
 class TextFilterFacet(Facet):
    def __init__(self, column, query, **options):
        super(TextFilterFacet, self).__init__(
-            column, query=query, case_sensitive=False, type='text',
+            column, query=query, case_sensitive=False, facet_type='text',
            mode='text', **options)


@ -60,7 +61,7 @@ class TextFacet(Facet):
                 select_error=False, invert=False, **options):
        super(TextFacet, self).__init__(
            column,
-            type='list',
+            facet_type='list',
            omit_blank=omit_blank,
            omit_error=omit_error,
            select_blank=select_blank,
@ -99,37 +100,39 @@ class BoolFacet(TextFacet):
            raise ValueError('selection must be True or False.')
        if expression is None:
            raise ValueError('Missing expression')
-        super(BoolFacet, self).__init__(column,
-            expression=expression, selection=selection)
+        super(BoolFacet, self).__init__(
+            column, expression=expression, selection=selection)


 class StarredFacet(BoolFacet):
    def __init__(self, selection=None):
-        super(StarredFacet, self).__init__('',
-            expression='row.starred', selection=selection)
+        super(StarredFacet, self).__init__(
+            '', expression='row.starred', selection=selection)


 class FlaggedFacet(BoolFacet):
    def __init__(self, selection=None):
-        super(FlaggedFacet, self).__init__('',
-            expression='row.flagged', selection=selection)
+        super(FlaggedFacet, self).__init__(
+            '', expression='row.flagged', selection=selection)


 class BlankFacet(BoolFacet):
    def __init__(self, column, selection=None):
-        super(BlankFacet, self).__init__(column,
-            expression='isBlank(value)', selection=selection)
+        super(BlankFacet, self).__init__(
+            column, expression='isBlank(value)', selection=selection)


 class ReconJudgmentFacet(TextFacet):
    def __init__(self, column, **options):
-        super(ReconJudgmentFacet, self).__init__(column,
+        super(ReconJudgmentFacet, self).__init__(
+            column,
            expression=('forNonBlank(cell.recon.judgment, v, v, '
                        'if(isNonBlank(value), "(unreconciled)", "(blank)"))'),
            **options)


 # Capitalize 'From' to get around python's reserved word.
+#noinspection PyPep8Naming
 class NumericFacet(Facet):
    def __init__(self, column, From=None, to=None, expression='value',
                 select_blank=True, select_error=True, select_non_numeric=True,
@ -139,7 +142,7 @@ class NumericFacet(Facet):
            From=From,
            to=to,
            expression=expression,
-            type='range',
+            facet_type='range',
            select_blank=select_blank,
            select_error=select_error,
            select_non_numeric=select_non_numeric,
@ -155,10 +158,12 @@ class NumericFacet(Facet):
 class FacetResponse(object):
    """Class for unpacking an individual facet response."""
    def __init__(self, facet):
+        self.name = None
        for k, v in facet.items():
            if isinstance(k, bool) or isinstance(k, basestring):
                setattr(self, from_camel(k), v)
        self.choices = {}
+
        class FacetChoice(object):
            def __init__(self, c):
                self.count = c['c']
@ -188,11 +193,14 @@ class FacetsResponse(object):
    def __init__(self, engine, facets):
        class FacetResponseContainer(object):
            facets = None
+
            def __init__(self, facet_responses):
                self.facets = [FacetResponse(fr) for fr in facet_responses]
+
            def __iter__(self):
                for facet in self.facets:
                    yield facet
+
            def __getitem__(self, index):
                if not isinstance(index, int):
                    index = engine.facet_index_by_id[id(index)]
--- a/google/refine/history.py
+++ b/google/refine/history.py
@ -18,15 +18,13 @@ Google Refine history: parsing responses.
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>

-import json
-import re
-

 class HistoryEntry(object):
    # N.B. e.g. **response['historyEntry'] won't work as keys are unicode :-/
-    def __init__(self, id=None, time=None, description=None, **kwargs):
-        if id is None:
+    #noinspection PyUnusedLocal
+    def __init__(self, history_entry_id=None, time=None, description=None, **kwargs):
+        if history_entry_id is None:
            raise ValueError('History entry id must be set')
-        self.id = id
+        self.id = history_entry_id
        self.description = description
        self.time = time
--- a/google/refine/refine.py
+++ b/google/refine/refine.py
@ -50,7 +50,7 @@ class RefineServer(object):

    def __init__(self, server=None):
        if server is None:
-            server=self.url()
+            server = self.url()
        self.server = server[:-1] if server.endswith('/') else server
        self.__version = None     # see version @property below

@ -114,6 +114,7 @@ class RefineServer(object):
            self.__version = self.get_version()['version']
        return self.__version

+
 class Refine:
    """Class representing a connection to a Refine server."""
    def __init__(self, server):
@ -155,9 +156,9 @@ class Refine:
                    guess_value_type=True,  # numbers, dates, etc.
                    ignore_quotes=False):

-        if ((project_file and project_url) or
-            (not project_file and not project_url)):
+        if (project_file and project_url) or (not project_file and not project_url):
            raise ValueError('One (only) of project_file and project_url must be set')
+
        def s(opt):
            if isinstance(opt, bool):
                return 'on' if opt else ''
@ -211,6 +212,7 @@ def RowsResponseFactory(column_index):
                    self.index = row_response['i']
                    self.row = [c['v'] if c else None
                                for c in row_response['cells']]
+
                def __getitem__(self, column):
                    # Trailing nulls seem to be stripped from row data
                    try:
@ -220,11 +222,14 @@ def RowsResponseFactory(column_index):

            def __init__(self, rows_response):
                self.rows_response = rows_response
+
            def __iter__(self):
                for row_response in self.rows_response:
                    yield self.RefineRow(row_response)
+
            def __getitem__(self, index):
                return self.RefineRow(self.rows_response[index])
+
            def __len__(self):
                return len(self.rows_response)

@ -331,8 +336,8 @@ class RefineProject:
                return

    def apply_operations(self, file_path, wait=True):
-        json = open(file_path).read()
-        response_json = self.do_json('apply-operations', {'operations': json})
+        json_data = open(file_path).read()
+        response_json = self.do_json('apply-operations', {'operations': json_data})
        if response_json['code'] == 'pending' and wait:
            self.wait_until_idle()
            return 'ok'
@ -426,6 +431,7 @@ class RefineProject:
            },
        },
    }
+
    def compute_clusters(self, column, clusterer_type='binning',
                         function=None, params=None):
        """Returns a list of clusters of {'value': ..., 'count': ...}."""
@ -443,7 +449,7 @@ class RefineProject:
    def annotate_one_row(self, row, annotation, state=True):
        if annotation not in ('starred', 'flagged'):
            raise ValueError('annotation must be one of starred or flagged')
-        state = 'true' if state == True else 'false'
+        state = 'true' if state is True else 'false'
        return self.do_json('annotate-one-row', {'row': row.index,
                                                 annotation: state})

@ -457,18 +463,19 @@ class RefineProject:
                   column_insert_index=None, on_error='set-to-blank'):
        if column_insert_index is None:
            column_insert_index = self.column_order[column] + 1
-        response = self.do_json('add-column', {'baseColumnName': column,
-            'newColumnName': new_column, 'expression': expression,
-            'columnInsertIndex': column_insert_index, 'onError': on_error})
+        response = self.do_json('add-column', {
+            'baseColumnName': column, 'newColumnName': new_column,
+            'expression': expression, 'columnInsertIndex': column_insert_index,
+            'onError': on_error})
        self.get_models()
        return response

    def split_column(self, column, separator=',', mode='separator',
                     regex=False, guess_cell_type=True,
                     remove_original_column=True):
-        response = self.do_json('split-column', {'columnName': column,
-            'separator': separator, 'mode': mode, 'regex': regex,
-            'guessCellType': guess_cell_type,
+        response = self.do_json('split-column', {
+            'columnName': column, 'separator': separator, 'mode': mode,
+            'regex': regex, 'guessCellType': guess_cell_type,
            'removeOriginalColumn': remove_original_column})
        self.get_models()
        return response
@ -505,9 +512,11 @@ class RefineProject:
        self.get_models()
        return response

-    def transpose_columns_into_rows(self, start_column, column_count,
+    def transpose_columns_into_rows(
+            self, start_column, column_count,
            combined_column_name, separator=':', prepend_column_name=True,
            ignore_blank_cells=True):
+
        response = self.do_json('transpose-columns-into-rows', {
            'startColumnName': start_column, 'columnCount': column_count,
            'combinedColumnName': combined_column_name,
@ -550,7 +559,8 @@ class RefineProject:
                return recon_service
        return None

-    def reconcile(self, column, service, type=None, config=None):
+    def reconcile(self, column, service, reconciliation_type=None,
+                  reconciliation_config=None):
        """Perform a reconciliation asynchronously.

        config: {
@ -570,21 +580,21 @@ class RefineProject:
        for reconciliation to complete.
        """
        # Create a reconciliation config by looking up recon service info
-        if config is None:
+        if reconciliation_config is None:
            service = self.get_reconciliation_service_by_name_or_url(service)
-            if type is None:
+            if reconciliation_type is None:
                raise ValueError('Must have at least one of config or type')
-            config = {
+            reconciliation_config = {
                'mode': 'standard-service',
                'service': service['url'],
                'identifierSpace': service['identifierSpace'],
                'schemaSpace': service['schemaSpace'],
                'type': {
-                    'id': type['id'],
-                    'name': type['name'],
+                    'id': reconciliation_type['id'],
+                    'name': reconciliation_type['name'],
                },
                'autoMatch': True,
                'columnDetails': [],
            }
        return self.do_json('reconcile', {
-            'columnName': column, 'config': json.dumps(config)})
+            'columnName': column, 'config': json.dumps(reconciliation_config)})
--- a/refine.py
+++ b/refine.py
@ -50,16 +50,19 @@ PARSER.add_option('-E', '--export', dest='export', action='store_true',
 PARSER.add_option('-f', '--apply', dest='apply',
                  help='Apply a JSON commands file to a project')

+
 def list_projects():
    """Query the Refine server and list projects by ID: name."""
    projects = refine.Refine(refine.RefineServer()).list_projects().items()
+
    def date_to_epoch(json_dt):
-        "Convert a JSON date time into seconds-since-epoch."
+        """Convert a JSON date time into seconds-since-epoch."""
        return time.mktime(time.strptime(json_dt, '%Y-%m-%dT%H:%M:%SZ'))
    projects.sort(key=lambda v: date_to_epoch(v[1]['modified']), reverse=True)
    for project_id, project_info in projects:
        print('{0:>14}: {1}'.format(project_id, project_info['name']))

+
 def export_project(project, options):
    """Dump a project to stdout or options.output file."""
    export_format = 'tsv'
@ -73,8 +76,10 @@ def export_project(project, options):
    output.writelines(project.export(export_format=export_format))
    output.close()

+
+#noinspection PyPep8Naming
 def main():
-    "Main."
+    """Main."""
    options, args = PARSER.parse_args()

    if options.host:
@ -100,4 +105,4 @@ def main():

 if __name__ == '__main__':
    # return project so that it's available interactively, python -i refine.py
-    project = main()
+    refine_project = main()
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +1 @@
-urllib2-file>=0.2.1
+urllib2_file>=0.2.1
--- a/setup.py
+++ b/setup.py
@ -20,8 +20,9 @@ import os
 from setuptools import setup
 from setuptools import find_packages

-def read(fname):
-    return open(os.path.join(os.path.dirname(__file__), fname)).read()
+
+def read(filename):
+    return open(os.path.join(os.path.dirname(__file__), filename)).read()

 setup(name='refine-client',
      version='0.2.1',
--- a/tests/refinetest.py
+++ b/tests/refinetest.py
@ -20,6 +20,7 @@ from google.refine import refine
 PATH_TO_TEST_DATA = os.path.join('tests', 'data')


+#noinspection PyPep8Naming
 class RefineTestCase(unittest.TestCase):
    project_file = None
    project_file_options = {}
@ -42,6 +43,7 @@ class RefineTestCase(unittest.TestCase):
            self.project = None

    def assertInResponse(self, expect):
+        desc = None
        try:
            desc = self.project.history_entry.description
            self.assertTrue(expect in desc)
--- a/tests/test_history.py
+++ b/tests/test_history.py
@ -13,11 +13,11 @@ from google.refine.history import *
 class HistoryTest(unittest.TestCase):
    def test_init(self):
        response = {
-            u"code":"ok",
+            u"code": "ok",
            u"historyEntry": {
-                u"id":1303851435223,
-                u"description":"Split 4 cells",
-                u"time":"2011-04-26T16:45:08Z"
+                u"id": 1303851435223,
+                u"description": "Split 4 cells",
+                u"time": "2011-04-26T16:45:08Z"
            }
        }
        he = response['historyEntry']
--- a/tests/test_refine_small.py
+++ b/tests/test_refine_small.py
@ -46,7 +46,7 @@ class RefineProjectTest(unittest.TestCase):
    def setUp(self):
        # Mock out get_models so it doesn't attempt to connect to a server
        self._get_models = refine.RefineProject.get_models
-        refine.RefineProject.get_models = lambda self: self
+        refine.RefineProject.get_models = lambda me: me
        # Save REFINE_{HOST,PORT} as tests overwrite it
        self._refine_host_port = refine.REFINE_HOST, refine.REFINE_PORT
        refine.REFINE_HOST, refine.REFINE_PORT = '127.0.0.1', '3333'
@ -65,8 +65,8 @@ class RefineProjectTest(unittest.TestCase):
        p = RP('1658955153749')
        self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
        self.assertEqual(p.project_id, '1658955153749')
-        refine.REFINE_HOST='10.0.0.1'
-        refine.REFINE_PORT='80'
+        refine.REFINE_HOST = '10.0.0.1'
+        refine.REFINE_PORT = '80'
        p = RP('1658955153749')
        self.assertEqual(p.server.server, 'http://10.0.0.1')

--- a/tests/test_tutorial.py
+++ b/tests/test_tutorial.py
@ -107,7 +107,8 @@ class TutorialTestFacets(refinetest.RefineTestCase):
        self.assertEqual(p.expression, 'value[0, 3]')
        self.assertEqual(p.choices['318'].count, 2331)
        # {16}
-        commissioned_date_facet = facet.NumericFacet('Commissioned Date',
+        commissioned_date_facet = facet.NumericFacet(
+            'Commissioned Date',
            expression='value.toDate().datePart("year")')
        self.project.engine.add_facet(commissioned_date_facet)
        response = self.project.compute_facets()
@ -115,7 +116,8 @@ class TutorialTestFacets(refinetest.RefineTestCase):
        self.assertEqual(cd.error_count, 959)
        self.assertEqual(cd.numeric_count, 5999)
        # {17}
-        office_description_facet = facet.NumericFacet('Office Description',
+        office_description_facet = facet.NumericFacet(
+            'Office Description',
            expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()')
        self.project.engine.add_facet(office_description_facet)
        response = self.project.compute_facets()
@ -212,8 +214,8 @@ class TutorialTestDuplicateDetection(refinetest.RefineTestCase):
        indexes = [row.index for row in response.rows]
        self.assertEqual(indexes, range(10))
        # {10}
-        self.project.add_column('email', 'count',
-            'facetCount(value, "value", "email")')
+        self.project.add_column(
+            'email', 'count', 'facetCount(value, "value", "email")')
        self.assertInResponse('column email by filling 10 rows')
        response = self.project.get_rows()
        self.assertEqual(self.project.column_order['email'], 0)  # i.e. 1st
@ -258,8 +260,8 @@ class TutorialTestTransposeColumnsIntoRows(refinetest.RefineTestCase):
        self.project.add_column('pair', 'year', 'value[2,6].toNumber()')
        self.assertInResponse('filling 26185 rows')
        # {5}
-        self.project.text_transform(column='pair',
-            expression='value.substring(7).toNumber()')
+        self.project.text_transform(
+            column='pair', expression='value.substring(7).toNumber()')
        self.assertInResponse('transform on 26185 cells')
        # {6}
        self.project.rename_column('pair', 'amount')
@ -278,11 +280,12 @@ class TutorialTestTransposeColumnsIntoRows(refinetest.RefineTestCase):
        self.assertEqual(row10['amount'], 113777303)


-class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
+class TutorialTestTransposeFixedNumberOfRowsIntoColumns(
        refinetest.RefineTestCase):
    project_file = 'fixed-rows.csv'
    project_file_options = {'split_into_columns': False,
                            'header_lines': 0}
+
    def test_transpose_fixed_number_of_rows_into_columns(self):
        # Section "5. Structural Editing,
        #             Transpose Fixed Number of Rows into Columns"
@ -293,7 +296,8 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
        self.assertInResponse('Transpose every 4 cells in column Column')
        # {9} - renaming column triggers a bug in Refine
        # {10}
-        self.project.add_column('Column 1', 'Transaction',
+        self.project.add_column(
+            'Column 1', 'Transaction',
            'if(value.contains(" sent "), "send", "receive")')
        self.assertInResponse('Column 1 by filling 4 rows')
        # {11}
@ -302,17 +306,20 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
        self.project.engine.add_facet(transaction_facet)
        self.project.compute_facets()
        # {12}, {13}, {14}
-        self.project.add_column('Column 1', 'Sender',
+        self.project.add_column(
+            'Column 1', 'Sender',
            'value.partition(" sent ")[0]')
        # XXX resetting the facet shows data in rows with Transaction=receive
        #     which shouldn't have been possible with the facet.
-        self.project.add_column('Column 1', 'Recipient',
+        self.project.add_column(
+            'Column 1', 'Recipient',
            'value.partition(" to ")[2].partition(" on ")[0]')
-        self.project.add_column('Column 1', 'Amount',
+        self.project.add_column(
+            'Column 1', 'Amount',
            'value.partition(" sent ")[2].partition(" to ")[0]')
        # {15}
        transaction_facet.reset().include('receive')
-        response = self.project.get_rows()
+        self.project.get_rows()
        # XXX there seems to be some kind of bug where the model doesn't
        #     match get_rows() output - cellIndex being returned that are
        #     out of range.
@ -322,13 +329,11 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
        # {16}
        for column, expression in (
            ('Sender',
-             'cells["Column 1"].value.partition(" from ")[2]'
-              '.partition(" on ")[0]'),
+             'cells["Column 1"].value.partition(" from ")[2].partition(" on ")[0]'),
            ('Recipient',
             'cells["Column 1"].value.partition(" received ")[0]'),
            ('Amount',
-             'cells["Column 1"].value.partition(" received ")[2]'
-             '.partition(" from ")[0]')
+             'cells["Column 1"].value.partition(" received ")[2].partition(" from ")[0]')
        ):
            self.project.text_transform(column, expression)
            self.assertInResponse('2 cells')
@ -343,7 +348,7 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
        self.assertInResponse('Reorder columns')


-class TutorialTestTransposeVariableNumbeOfRowsIntoColumns(
+class TutorialTestTransposeVariableNumberOfRowsIntoColumns(
        refinetest.RefineTestCase):
    project_file = 'variable-rows.csv'
    project_file_options = {'split_into_columns': False,
@ -351,13 +356,14 @@ class TutorialTestTransposeVariableNumbeOfRowsIntoColumns(

    def test_transpose_variable_number_of_rows_into_columns(self):
        # {20}, {21}
-        self.project.add_column('Column', 'First Line',
-            'if(value.contains(" on "), value, null)')
+        self.project.add_column(
+            'Column', 'First Line', 'if(value.contains(" on "), value, null)')
        self.assertInResponse('Column by filling 4 rows')
        response = self.project.get_rows()
        first_names = [row['First Line'][0:10] if row['First Line'] else None
                       for row in response.rows]
-        self.assertEqual(first_names, ['Tom Dalton', None, None, None,
+        self.assertEqual(first_names, [
+            'Tom Dalton', None, None, None,
            'Morgan Law', None, None, None, None, 'Eric Batem'])
        # {22}
        self.project.move_column('First Line', 0)
@ -369,12 +375,12 @@ class TutorialTestTransposeVariableNumbeOfRowsIntoColumns(
        self.assertEqual(response.mode, 'record-based')
        self.assertEqual(response.filtered, 4)
        # {24}
-        self.project.add_column('Column', 'Status',
-            'row.record.cells["Column"].value[-1]')
+        self.project.add_column(
+            'Column', 'Status', 'row.record.cells["Column"].value[-1]')
        self.assertInResponse('filling 18 rows')
        # {25}
-        self.project.text_transform('Column',
-            'row.record.cells["Column"].value[1, -1].join("|")')
+        self.project.text_transform(
+            'Column', 'row.record.cells["Column"].value[1, -1].join("|")')
        self.assertInResponse('18 cells')
        # {26}
        self.project.engine.mode = 'row-based'