#!/usr/bin/env python """ Google Refine Facets, Engine, and Facet Responses. """ # Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see import json import re def to_camel(attr): """convert this_attr_name to thisAttrName.""" # Do lower case first letter return (attr[0].lower() + re.sub(r'_(.)', lambda x: x.group(1).upper(), attr[1:])) def from_camel(attr): """convert thisAttrName to this_attr_name.""" # Don't add an underscore for capitalized first letter return re.sub(r'(?<=.)([A-Z])', lambda x: '_' + x.group(1), attr).lower() class Facet(object): def __init__(self, column, type, **options): self.type = type self.name = column self.column_name = column for k, v in options.items(): setattr(self, k, v) def as_dict(self): return dict([(to_camel(k), v) for k, v in self.__dict__.items() if v is not None]) class TextFilterFacet(Facet): def __init__(self, column, query, **options): super(TextFilterFacet, self).__init__( column, query=query, case_sensitive=False, type='text', mode='text', **options) class TextFacet(Facet): def __init__(self, column, selection=None, expression='value', omit_blank=False, omit_error=False, select_blank=False, select_error=False, invert=False, **options): super(TextFacet, self).__init__( column, type='list', omit_blank=omit_blank, omit_error=omit_error, select_blank=select_blank, select_error=select_error, invert=invert, **options) self.expression = expression self.selection = [] if selection is None: selection = [] elif not isinstance(selection, list): selection = [selection] for value in selection: self.include(value) def include(self, value): for s in self.selection: if s['v']['v'] == value: return self.selection.append({'v': {'v': value, 'l': value}}) return self def exclude(self, value): self.selection = [s for s in self.selection if s['v']['v'] != value] return self def reset(self): self.selection = [] return self class BoolFacet(TextFacet): def __init__(self, column, expression=None, selection=None): if selection is not None and not isinstance(selection, bool): raise ValueError('selection must be True or False.') if expression is None: raise ValueError('Missing expression') super(BoolFacet, self).__init__(column, expression=expression, selection=selection) class StarredFacet(BoolFacet): def __init__(self, selection=None): super(StarredFacet, self).__init__('', expression='row.starred', selection=selection) class FlaggedFacet(BoolFacet): def __init__(self, selection=None): super(FlaggedFacet, self).__init__('', expression='row.flagged', selection=selection) class BlankFacet(BoolFacet): def __init__(self, column, selection=None): super(BlankFacet, self).__init__(column, expression='isBlank(value)', selection=selection) class ReconJudgmentFacet(TextFacet): def __init__(self, column, **options): super(ReconJudgmentFacet, self).__init__(column, expression=('forNonBlank(cell.recon.judgment, v, v, ' 'if(isNonBlank(value), "(unreconciled)", "(blank)"))'), **options) # Capitalize 'From' to get around python's reserved word. class NumericFacet(Facet): def __init__(self, column, From=None, to=None, expression='value', select_blank=True, select_error=True, select_non_numeric=True, select_numeric=True, **options): super(NumericFacet, self).__init__( column, From=From, to=to, expression=expression, type='range', select_blank=select_blank, select_error=select_error, select_non_numeric=select_non_numeric, select_numeric=select_numeric, **options) def reset(self): self.From = None self.to = None return self class FacetResponse(object): """Class for unpacking an individual facet response.""" def __init__(self, facet): for k, v in facet.items(): if isinstance(k, bool) or isinstance(k, basestring): setattr(self, from_camel(k), v) self.choices = {} class FacetChoice(object): def __init__(self, c): self.count = c['c'] self.selected = c['s'] if 'choices' in facet: for choice in facet['choices']: self.choices[choice['v']['v']] = FacetChoice(choice) if 'blankChoice' in facet: self.blank_choice = FacetChoice(facet['blankChoice']) else: self.blank_choice = None if 'bins' in facet: self.bins = facet['bins'] self.base_bins = facet['baseBins'] class FacetsResponse(object): """FacetsResponse unpacking the compute-facets response. It has two attributes: facets & mode. Mode is either 'row-based' or 'record-based'. facets is a list of facets produced by compute-facets, in the same order as they were specified in the Engine. By coupling the engine object with a custom container it's possible to look up the computed facet by the original facet's object. """ def __init__(self, engine, facets): class FacetResponseContainer(object): facets = None def __init__(self, facet_responses): self.facets = [FacetResponse(fr) for fr in facet_responses] def __iter__(self): for facet in self.facets: yield facet def __getitem__(self, index): if not isinstance(index, int): index = engine.facet_index_by_id[id(index)] assert self.facets[index].name == engine.facets[index].name return self.facets[index] self.facets = FacetResponseContainer(facets['facets']) self.mode = facets['mode'] class Engine(object): """An Engine keeps track of Facets, and responses to facet computation.""" facets = [] facet_index_by_id = {} # dict of facets by Facet object id def __init__(self, *facets, **kwargs): self.set_facets(*facets) self.mode = kwargs.get('mode', 'row-based') def set_facets(self, *facets): """facets may be a Facet or list of Facets.""" self.remove_all() for facet in facets: self.add_facet(facet) def facets_response(self, response): """Unpack a compute-facets response.""" return FacetsResponse(self, response) def __len__(self): return len(self.facets) def as_json(self): """Return a JSON string suitable for use as a POST parameter.""" return json.dumps({ 'facets': [f.as_dict() for f in self.facets], # XXX how with json? 'mode': self.mode, }) def add_facet(self, facet): # Record the facet's object id so facet response can be looked up by id self.facet_index_by_id[id(facet)] = len(self.facets) self.facets.append(facet) def remove_all(self): """Remove all facets.""" self.facet_index_by_id = {} self.facets = [] def reset_all(self): """Reset all facets.""" for facet in self.facets: facet.reset() class Sorting(object): """Class representing the current sorting order for a project. Used in RefineProject.get_rows()""" def __init__(self, criteria=None): self.criteria = [] if criteria is None: criteria = [] if not isinstance(criteria, list): criteria = [criteria] for criterion in criteria: # A string criterion defaults to a string sort on that column if isinstance(criterion, basestring): criterion = { 'column': criterion, 'valueType': 'string', 'caseSensitive': False, } criterion.setdefault('reverse', False) criterion.setdefault('errorPosition', 1) criterion.setdefault('blankPosition', 2) self.criteria.append(criterion) def as_json(self): return json.dumps({'criteria': self.criteria}) def __len__(self): return len(self.criteria)