Whitespace & minor renaming to bring in line with PEP8 guidelines
This commit is contained in:
parent
e9ef9a6d56
commit
bc0a8e7c7b
|
@ -28,6 +28,7 @@ def to_camel(attr):
|
||||||
return (attr[0].lower() +
|
return (attr[0].lower() +
|
||||||
re.sub(r'_(.)', lambda x: x.group(1).upper(), attr[1:]))
|
re.sub(r'_(.)', lambda x: x.group(1).upper(), attr[1:]))
|
||||||
|
|
||||||
|
|
||||||
def from_camel(attr):
|
def from_camel(attr):
|
||||||
"""convert thisAttrName to this_attr_name."""
|
"""convert thisAttrName to this_attr_name."""
|
||||||
# Don't add an underscore for capitalized first letter
|
# Don't add an underscore for capitalized first letter
|
||||||
|
@ -35,8 +36,8 @@ def from_camel(attr):
|
||||||
|
|
||||||
|
|
||||||
class Facet(object):
|
class Facet(object):
|
||||||
def __init__(self, column, type, **options):
|
def __init__(self, column, facet_type, **options):
|
||||||
self.type = type
|
self.type = facet_type
|
||||||
self.name = column
|
self.name = column
|
||||||
self.column_name = column
|
self.column_name = column
|
||||||
for k, v in options.items():
|
for k, v in options.items():
|
||||||
|
@ -50,17 +51,17 @@ class Facet(object):
|
||||||
class TextFilterFacet(Facet):
|
class TextFilterFacet(Facet):
|
||||||
def __init__(self, column, query, **options):
|
def __init__(self, column, query, **options):
|
||||||
super(TextFilterFacet, self).__init__(
|
super(TextFilterFacet, self).__init__(
|
||||||
column, query=query, case_sensitive=False, type='text',
|
column, query=query, case_sensitive=False, facet_type='text',
|
||||||
mode='text', **options)
|
mode='text', **options)
|
||||||
|
|
||||||
|
|
||||||
class TextFacet(Facet):
|
class TextFacet(Facet):
|
||||||
def __init__(self, column, selection=None, expression='value',
|
def __init__(self, column, selection=None, expression='value',
|
||||||
omit_blank=False, omit_error=False, select_blank=False,
|
omit_blank=False, omit_error=False, select_blank=False,
|
||||||
select_error=False, invert=False, **options):
|
select_error=False, invert=False, **options):
|
||||||
super(TextFacet, self).__init__(
|
super(TextFacet, self).__init__(
|
||||||
column,
|
column,
|
||||||
type='list',
|
facet_type='list',
|
||||||
omit_blank=omit_blank,
|
omit_blank=omit_blank,
|
||||||
omit_error=omit_error,
|
omit_error=omit_error,
|
||||||
select_blank=select_blank,
|
select_blank=select_blank,
|
||||||
|
@ -99,37 +100,39 @@ class BoolFacet(TextFacet):
|
||||||
raise ValueError('selection must be True or False.')
|
raise ValueError('selection must be True or False.')
|
||||||
if expression is None:
|
if expression is None:
|
||||||
raise ValueError('Missing expression')
|
raise ValueError('Missing expression')
|
||||||
super(BoolFacet, self).__init__(column,
|
super(BoolFacet, self).__init__(
|
||||||
expression=expression, selection=selection)
|
column, expression=expression, selection=selection)
|
||||||
|
|
||||||
|
|
||||||
class StarredFacet(BoolFacet):
|
class StarredFacet(BoolFacet):
|
||||||
def __init__(self, selection=None):
|
def __init__(self, selection=None):
|
||||||
super(StarredFacet, self).__init__('',
|
super(StarredFacet, self).__init__(
|
||||||
expression='row.starred', selection=selection)
|
'', expression='row.starred', selection=selection)
|
||||||
|
|
||||||
|
|
||||||
class FlaggedFacet(BoolFacet):
|
class FlaggedFacet(BoolFacet):
|
||||||
def __init__(self, selection=None):
|
def __init__(self, selection=None):
|
||||||
super(FlaggedFacet, self).__init__('',
|
super(FlaggedFacet, self).__init__(
|
||||||
expression='row.flagged', selection=selection)
|
'', expression='row.flagged', selection=selection)
|
||||||
|
|
||||||
|
|
||||||
class BlankFacet(BoolFacet):
|
class BlankFacet(BoolFacet):
|
||||||
def __init__(self, column, selection=None):
|
def __init__(self, column, selection=None):
|
||||||
super(BlankFacet, self).__init__(column,
|
super(BlankFacet, self).__init__(
|
||||||
expression='isBlank(value)', selection=selection)
|
column, expression='isBlank(value)', selection=selection)
|
||||||
|
|
||||||
|
|
||||||
class ReconJudgmentFacet(TextFacet):
|
class ReconJudgmentFacet(TextFacet):
|
||||||
def __init__(self, column, **options):
|
def __init__(self, column, **options):
|
||||||
super(ReconJudgmentFacet, self).__init__(column,
|
super(ReconJudgmentFacet, self).__init__(
|
||||||
|
column,
|
||||||
expression=('forNonBlank(cell.recon.judgment, v, v, '
|
expression=('forNonBlank(cell.recon.judgment, v, v, '
|
||||||
'if(isNonBlank(value), "(unreconciled)", "(blank)"))'),
|
'if(isNonBlank(value), "(unreconciled)", "(blank)"))'),
|
||||||
**options)
|
**options)
|
||||||
|
|
||||||
|
|
||||||
# Capitalize 'From' to get around python's reserved word.
|
# Capitalize 'From' to get around python's reserved word.
|
||||||
|
#noinspection PyPep8Naming
|
||||||
class NumericFacet(Facet):
|
class NumericFacet(Facet):
|
||||||
def __init__(self, column, From=None, to=None, expression='value',
|
def __init__(self, column, From=None, to=None, expression='value',
|
||||||
select_blank=True, select_error=True, select_non_numeric=True,
|
select_blank=True, select_error=True, select_non_numeric=True,
|
||||||
|
@ -139,7 +142,7 @@ class NumericFacet(Facet):
|
||||||
From=From,
|
From=From,
|
||||||
to=to,
|
to=to,
|
||||||
expression=expression,
|
expression=expression,
|
||||||
type='range',
|
facet_type='range',
|
||||||
select_blank=select_blank,
|
select_blank=select_blank,
|
||||||
select_error=select_error,
|
select_error=select_error,
|
||||||
select_non_numeric=select_non_numeric,
|
select_non_numeric=select_non_numeric,
|
||||||
|
@ -155,10 +158,12 @@ class NumericFacet(Facet):
|
||||||
class FacetResponse(object):
|
class FacetResponse(object):
|
||||||
"""Class for unpacking an individual facet response."""
|
"""Class for unpacking an individual facet response."""
|
||||||
def __init__(self, facet):
|
def __init__(self, facet):
|
||||||
|
self.name = None
|
||||||
for k, v in facet.items():
|
for k, v in facet.items():
|
||||||
if isinstance(k, bool) or isinstance(k, basestring):
|
if isinstance(k, bool) or isinstance(k, basestring):
|
||||||
setattr(self, from_camel(k), v)
|
setattr(self, from_camel(k), v)
|
||||||
self.choices = {}
|
self.choices = {}
|
||||||
|
|
||||||
class FacetChoice(object):
|
class FacetChoice(object):
|
||||||
def __init__(self, c):
|
def __init__(self, c):
|
||||||
self.count = c['c']
|
self.count = c['c']
|
||||||
|
@ -188,11 +193,14 @@ class FacetsResponse(object):
|
||||||
def __init__(self, engine, facets):
|
def __init__(self, engine, facets):
|
||||||
class FacetResponseContainer(object):
|
class FacetResponseContainer(object):
|
||||||
facets = None
|
facets = None
|
||||||
|
|
||||||
def __init__(self, facet_responses):
|
def __init__(self, facet_responses):
|
||||||
self.facets = [FacetResponse(fr) for fr in facet_responses]
|
self.facets = [FacetResponse(fr) for fr in facet_responses]
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for facet in self.facets:
|
for facet in self.facets:
|
||||||
yield facet
|
yield facet
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
if not isinstance(index, int):
|
if not isinstance(index, int):
|
||||||
index = engine.facet_index_by_id[id(index)]
|
index = engine.facet_index_by_id[id(index)]
|
||||||
|
|
|
@ -18,15 +18,13 @@ Google Refine history: parsing responses.
|
||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
class HistoryEntry(object):
|
class HistoryEntry(object):
|
||||||
# N.B. e.g. **response['historyEntry'] won't work as keys are unicode :-/
|
# N.B. e.g. **response['historyEntry'] won't work as keys are unicode :-/
|
||||||
def __init__(self, id=None, time=None, description=None, **kwargs):
|
#noinspection PyUnusedLocal
|
||||||
if id is None:
|
def __init__(self, history_entry_id=None, time=None, description=None, **kwargs):
|
||||||
|
if history_entry_id is None:
|
||||||
raise ValueError('History entry id must be set')
|
raise ValueError('History entry id must be set')
|
||||||
self.id = id
|
self.id = history_entry_id
|
||||||
self.description = description
|
self.description = description
|
||||||
self.time = time
|
self.time = time
|
||||||
|
|
|
@ -50,7 +50,7 @@ class RefineServer(object):
|
||||||
|
|
||||||
def __init__(self, server=None):
|
def __init__(self, server=None):
|
||||||
if server is None:
|
if server is None:
|
||||||
server=self.url()
|
server = self.url()
|
||||||
self.server = server[:-1] if server.endswith('/') else server
|
self.server = server[:-1] if server.endswith('/') else server
|
||||||
self.__version = None # see version @property below
|
self.__version = None # see version @property below
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ class RefineServer(object):
|
||||||
url += '?' + urllib.urlencode(params)
|
url += '?' + urllib.urlencode(params)
|
||||||
req = urllib2.Request(url)
|
req = urllib2.Request(url)
|
||||||
if data:
|
if data:
|
||||||
req.add_data(data) # data = urllib.urlencode(data)
|
req.add_data(data) # data = urllib.urlencode(data)
|
||||||
#req.add_header('Accept-Encoding', 'gzip')
|
#req.add_header('Accept-Encoding', 'gzip')
|
||||||
try:
|
try:
|
||||||
response = urllib2.urlopen(req)
|
response = urllib2.urlopen(req)
|
||||||
|
@ -114,6 +114,7 @@ class RefineServer(object):
|
||||||
self.__version = self.get_version()['version']
|
self.__version = self.get_version()['version']
|
||||||
return self.__version
|
return self.__version
|
||||||
|
|
||||||
|
|
||||||
class Refine:
|
class Refine:
|
||||||
"""Class representing a connection to a Refine server."""
|
"""Class representing a connection to a Refine server."""
|
||||||
def __init__(self, server):
|
def __init__(self, server):
|
||||||
|
@ -145,19 +146,19 @@ class Refine:
|
||||||
return RefineProject(self.server, project_id)
|
return RefineProject(self.server, project_id)
|
||||||
|
|
||||||
def new_project(self, project_file=None, project_url=None,
|
def new_project(self, project_file=None, project_url=None,
|
||||||
project_name=None,
|
project_name=None,
|
||||||
split_into_columns=True,
|
split_into_columns=True,
|
||||||
separator='',
|
separator='',
|
||||||
ignore_initial_non_blank_lines=0,
|
ignore_initial_non_blank_lines=0,
|
||||||
header_lines=1, # use 0 if your data has no header
|
header_lines=1, # use 0 if your data has no header
|
||||||
skip_initial_data_rows=0,
|
skip_initial_data_rows=0,
|
||||||
limit=None, # no more than this number of rows
|
limit=None, # no more than this number of rows
|
||||||
guess_value_type=True, # numbers, dates, etc.
|
guess_value_type=True, # numbers, dates, etc.
|
||||||
ignore_quotes=False):
|
ignore_quotes=False):
|
||||||
|
|
||||||
if ((project_file and project_url) or
|
if (project_file and project_url) or (not project_file and not project_url):
|
||||||
(not project_file and not project_url)):
|
|
||||||
raise ValueError('One (only) of project_file and project_url must be set')
|
raise ValueError('One (only) of project_file and project_url must be set')
|
||||||
|
|
||||||
def s(opt):
|
def s(opt):
|
||||||
if isinstance(opt, bool):
|
if isinstance(opt, bool):
|
||||||
return 'on' if opt else ''
|
return 'on' if opt else ''
|
||||||
|
@ -211,6 +212,7 @@ def RowsResponseFactory(column_index):
|
||||||
self.index = row_response['i']
|
self.index = row_response['i']
|
||||||
self.row = [c['v'] if c else None
|
self.row = [c['v'] if c else None
|
||||||
for c in row_response['cells']]
|
for c in row_response['cells']]
|
||||||
|
|
||||||
def __getitem__(self, column):
|
def __getitem__(self, column):
|
||||||
# Trailing nulls seem to be stripped from row data
|
# Trailing nulls seem to be stripped from row data
|
||||||
try:
|
try:
|
||||||
|
@ -220,11 +222,14 @@ def RowsResponseFactory(column_index):
|
||||||
|
|
||||||
def __init__(self, rows_response):
|
def __init__(self, rows_response):
|
||||||
self.rows_response = rows_response
|
self.rows_response = rows_response
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for row_response in self.rows_response:
|
for row_response in self.rows_response:
|
||||||
yield self.RefineRow(row_response)
|
yield self.RefineRow(row_response)
|
||||||
|
|
||||||
def __getitem__(self, index):
|
def __getitem__(self, index):
|
||||||
return self.RefineRow(self.rows_response[index])
|
return self.RefineRow(self.rows_response[index])
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.rows_response)
|
return len(self.rows_response)
|
||||||
|
|
||||||
|
@ -331,12 +336,12 @@ class RefineProject:
|
||||||
return
|
return
|
||||||
|
|
||||||
def apply_operations(self, file_path, wait=True):
|
def apply_operations(self, file_path, wait=True):
|
||||||
json = open(file_path).read()
|
json_data = open(file_path).read()
|
||||||
response_json = self.do_json('apply-operations', {'operations': json})
|
response_json = self.do_json('apply-operations', {'operations': json_data})
|
||||||
if response_json['code'] == 'pending' and wait:
|
if response_json['code'] == 'pending' and wait:
|
||||||
self.wait_until_idle()
|
self.wait_until_idle()
|
||||||
return 'ok'
|
return 'ok'
|
||||||
return response_json['code'] # can be 'ok' or 'pending'
|
return response_json['code'] # can be 'ok' or 'pending'
|
||||||
|
|
||||||
def export(self, export_format='tsv'):
|
def export(self, export_format='tsv'):
|
||||||
"""Return a fileobject of a project's data."""
|
"""Return a fileobject of a project's data."""
|
||||||
|
@ -426,6 +431,7 @@ class RefineProject:
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def compute_clusters(self, column, clusterer_type='binning',
|
def compute_clusters(self, column, clusterer_type='binning',
|
||||||
function=None, params=None):
|
function=None, params=None):
|
||||||
"""Returns a list of clusters of {'value': ..., 'count': ...}."""
|
"""Returns a list of clusters of {'value': ..., 'count': ...}."""
|
||||||
|
@ -443,7 +449,7 @@ class RefineProject:
|
||||||
def annotate_one_row(self, row, annotation, state=True):
|
def annotate_one_row(self, row, annotation, state=True):
|
||||||
if annotation not in ('starred', 'flagged'):
|
if annotation not in ('starred', 'flagged'):
|
||||||
raise ValueError('annotation must be one of starred or flagged')
|
raise ValueError('annotation must be one of starred or flagged')
|
||||||
state = 'true' if state == True else 'false'
|
state = 'true' if state is True else 'false'
|
||||||
return self.do_json('annotate-one-row', {'row': row.index,
|
return self.do_json('annotate-one-row', {'row': row.index,
|
||||||
annotation: state})
|
annotation: state})
|
||||||
|
|
||||||
|
@ -457,18 +463,19 @@ class RefineProject:
|
||||||
column_insert_index=None, on_error='set-to-blank'):
|
column_insert_index=None, on_error='set-to-blank'):
|
||||||
if column_insert_index is None:
|
if column_insert_index is None:
|
||||||
column_insert_index = self.column_order[column] + 1
|
column_insert_index = self.column_order[column] + 1
|
||||||
response = self.do_json('add-column', {'baseColumnName': column,
|
response = self.do_json('add-column', {
|
||||||
'newColumnName': new_column, 'expression': expression,
|
'baseColumnName': column, 'newColumnName': new_column,
|
||||||
'columnInsertIndex': column_insert_index, 'onError': on_error})
|
'expression': expression, 'columnInsertIndex': column_insert_index,
|
||||||
|
'onError': on_error})
|
||||||
self.get_models()
|
self.get_models()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def split_column(self, column, separator=',', mode='separator',
|
def split_column(self, column, separator=',', mode='separator',
|
||||||
regex=False, guess_cell_type=True,
|
regex=False, guess_cell_type=True,
|
||||||
remove_original_column=True):
|
remove_original_column=True):
|
||||||
response = self.do_json('split-column', {'columnName': column,
|
response = self.do_json('split-column', {
|
||||||
'separator': separator, 'mode': mode, 'regex': regex,
|
'columnName': column, 'separator': separator, 'mode': mode,
|
||||||
'guessCellType': guess_cell_type,
|
'regex': regex, 'guessCellType': guess_cell_type,
|
||||||
'removeOriginalColumn': remove_original_column})
|
'removeOriginalColumn': remove_original_column})
|
||||||
self.get_models()
|
self.get_models()
|
||||||
return response
|
return response
|
||||||
|
@ -505,9 +512,11 @@ class RefineProject:
|
||||||
self.get_models()
|
self.get_models()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def transpose_columns_into_rows(self, start_column, column_count,
|
def transpose_columns_into_rows(
|
||||||
combined_column_name, separator=':', prepend_column_name=True,
|
self, start_column, column_count,
|
||||||
ignore_blank_cells=True):
|
combined_column_name, separator=':', prepend_column_name=True,
|
||||||
|
ignore_blank_cells=True):
|
||||||
|
|
||||||
response = self.do_json('transpose-columns-into-rows', {
|
response = self.do_json('transpose-columns-into-rows', {
|
||||||
'startColumnName': start_column, 'columnCount': column_count,
|
'startColumnName': start_column, 'columnCount': column_count,
|
||||||
'combinedColumnName': combined_column_name,
|
'combinedColumnName': combined_column_name,
|
||||||
|
@ -550,7 +559,8 @@ class RefineProject:
|
||||||
return recon_service
|
return recon_service
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def reconcile(self, column, service, type=None, config=None):
|
def reconcile(self, column, service, reconciliation_type=None,
|
||||||
|
reconciliation_config=None):
|
||||||
"""Perform a reconciliation asynchronously.
|
"""Perform a reconciliation asynchronously.
|
||||||
|
|
||||||
config: {
|
config: {
|
||||||
|
@ -570,21 +580,21 @@ class RefineProject:
|
||||||
for reconciliation to complete.
|
for reconciliation to complete.
|
||||||
"""
|
"""
|
||||||
# Create a reconciliation config by looking up recon service info
|
# Create a reconciliation config by looking up recon service info
|
||||||
if config is None:
|
if reconciliation_config is None:
|
||||||
service = self.get_reconciliation_service_by_name_or_url(service)
|
service = self.get_reconciliation_service_by_name_or_url(service)
|
||||||
if type is None:
|
if reconciliation_type is None:
|
||||||
raise ValueError('Must have at least one of config or type')
|
raise ValueError('Must have at least one of config or type')
|
||||||
config = {
|
reconciliation_config = {
|
||||||
'mode': 'standard-service',
|
'mode': 'standard-service',
|
||||||
'service': service['url'],
|
'service': service['url'],
|
||||||
'identifierSpace': service['identifierSpace'],
|
'identifierSpace': service['identifierSpace'],
|
||||||
'schemaSpace': service['schemaSpace'],
|
'schemaSpace': service['schemaSpace'],
|
||||||
'type': {
|
'type': {
|
||||||
'id': type['id'],
|
'id': reconciliation_type['id'],
|
||||||
'name': type['name'],
|
'name': reconciliation_type['name'],
|
||||||
},
|
},
|
||||||
'autoMatch': True,
|
'autoMatch': True,
|
||||||
'columnDetails': [],
|
'columnDetails': [],
|
||||||
}
|
}
|
||||||
return self.do_json('reconcile', {
|
return self.do_json('reconcile', {
|
||||||
'columnName': column, 'config': json.dumps(config)})
|
'columnName': column, 'config': json.dumps(reconciliation_config)})
|
||||||
|
|
11
refine.py
11
refine.py
|
@ -50,16 +50,19 @@ PARSER.add_option('-E', '--export', dest='export', action='store_true',
|
||||||
PARSER.add_option('-f', '--apply', dest='apply',
|
PARSER.add_option('-f', '--apply', dest='apply',
|
||||||
help='Apply a JSON commands file to a project')
|
help='Apply a JSON commands file to a project')
|
||||||
|
|
||||||
|
|
||||||
def list_projects():
|
def list_projects():
|
||||||
"""Query the Refine server and list projects by ID: name."""
|
"""Query the Refine server and list projects by ID: name."""
|
||||||
projects = refine.Refine(refine.RefineServer()).list_projects().items()
|
projects = refine.Refine(refine.RefineServer()).list_projects().items()
|
||||||
|
|
||||||
def date_to_epoch(json_dt):
|
def date_to_epoch(json_dt):
|
||||||
"Convert a JSON date time into seconds-since-epoch."
|
"""Convert a JSON date time into seconds-since-epoch."""
|
||||||
return time.mktime(time.strptime(json_dt, '%Y-%m-%dT%H:%M:%SZ'))
|
return time.mktime(time.strptime(json_dt, '%Y-%m-%dT%H:%M:%SZ'))
|
||||||
projects.sort(key=lambda v: date_to_epoch(v[1]['modified']), reverse=True)
|
projects.sort(key=lambda v: date_to_epoch(v[1]['modified']), reverse=True)
|
||||||
for project_id, project_info in projects:
|
for project_id, project_info in projects:
|
||||||
print('{0:>14}: {1}'.format(project_id, project_info['name']))
|
print('{0:>14}: {1}'.format(project_id, project_info['name']))
|
||||||
|
|
||||||
|
|
||||||
def export_project(project, options):
|
def export_project(project, options):
|
||||||
"""Dump a project to stdout or options.output file."""
|
"""Dump a project to stdout or options.output file."""
|
||||||
export_format = 'tsv'
|
export_format = 'tsv'
|
||||||
|
@ -73,8 +76,10 @@ def export_project(project, options):
|
||||||
output.writelines(project.export(export_format=export_format))
|
output.writelines(project.export(export_format=export_format))
|
||||||
output.close()
|
output.close()
|
||||||
|
|
||||||
|
|
||||||
|
#noinspection PyPep8Naming
|
||||||
def main():
|
def main():
|
||||||
"Main."
|
"""Main."""
|
||||||
options, args = PARSER.parse_args()
|
options, args = PARSER.parse_args()
|
||||||
|
|
||||||
if options.host:
|
if options.host:
|
||||||
|
@ -100,4 +105,4 @@ def main():
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# return project so that it's available interactively, python -i refine.py
|
# return project so that it's available interactively, python -i refine.py
|
||||||
project = main()
|
refine_project = main()
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
urllib2-file>=0.2.1
|
urllib2_file>=0.2.1
|
19
setup.py
19
setup.py
|
@ -20,8 +20,9 @@ import os
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
from setuptools import find_packages
|
from setuptools import find_packages
|
||||||
|
|
||||||
def read(fname):
|
|
||||||
return open(os.path.join(os.path.dirname(__file__), fname)).read()
|
def read(filename):
|
||||||
|
return open(os.path.join(os.path.dirname(__file__), filename)).read()
|
||||||
|
|
||||||
setup(name='refine-client',
|
setup(name='refine-client',
|
||||||
version='0.2.1',
|
version='0.2.1',
|
||||||
|
@ -35,13 +36,13 @@ setup(name='refine-client',
|
||||||
install_requires=['urllib2_file'],
|
install_requires=['urllib2_file'],
|
||||||
platforms=['Any'],
|
platforms=['Any'],
|
||||||
classifiers = [
|
classifiers = [
|
||||||
'Development Status :: 3 - Alpha',
|
'Development Status :: 3 - Alpha',
|
||||||
'Intended Audience :: Developers',
|
'Intended Audience :: Developers',
|
||||||
'License :: OSI Approved :: GNU General Public License (GPL)',
|
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||||
'Operating System :: OS Independent',
|
'Operating System :: OS Independent',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python',
|
||||||
'Topic :: Software Development :: Libraries :: Python Modules',
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
||||||
'Topic :: Text Processing',
|
'Topic :: Text Processing',
|
||||||
],
|
],
|
||||||
test_suite='tests',
|
test_suite='tests',
|
||||||
)
|
)
|
||||||
|
|
|
@ -20,6 +20,7 @@ from google.refine import refine
|
||||||
PATH_TO_TEST_DATA = os.path.join('tests', 'data')
|
PATH_TO_TEST_DATA = os.path.join('tests', 'data')
|
||||||
|
|
||||||
|
|
||||||
|
#noinspection PyPep8Naming
|
||||||
class RefineTestCase(unittest.TestCase):
|
class RefineTestCase(unittest.TestCase):
|
||||||
project_file = None
|
project_file = None
|
||||||
project_file_options = {}
|
project_file_options = {}
|
||||||
|
@ -42,6 +43,7 @@ class RefineTestCase(unittest.TestCase):
|
||||||
self.project = None
|
self.project = None
|
||||||
|
|
||||||
def assertInResponse(self, expect):
|
def assertInResponse(self, expect):
|
||||||
|
desc = None
|
||||||
try:
|
try:
|
||||||
desc = self.project.history_entry.description
|
desc = self.project.history_entry.description
|
||||||
self.assertTrue(expect in desc)
|
self.assertTrue(expect in desc)
|
||||||
|
|
|
@ -13,11 +13,11 @@ from google.refine.history import *
|
||||||
class HistoryTest(unittest.TestCase):
|
class HistoryTest(unittest.TestCase):
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
response = {
|
response = {
|
||||||
u"code":"ok",
|
u"code": "ok",
|
||||||
u"historyEntry": {
|
u"historyEntry": {
|
||||||
u"id":1303851435223,
|
u"id": 1303851435223,
|
||||||
u"description":"Split 4 cells",
|
u"description": "Split 4 cells",
|
||||||
u"time":"2011-04-26T16:45:08Z"
|
u"time": "2011-04-26T16:45:08Z"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
he = response['historyEntry']
|
he = response['historyEntry']
|
||||||
|
|
|
@ -46,7 +46,7 @@ class RefineProjectTest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
# Mock out get_models so it doesn't attempt to connect to a server
|
# Mock out get_models so it doesn't attempt to connect to a server
|
||||||
self._get_models = refine.RefineProject.get_models
|
self._get_models = refine.RefineProject.get_models
|
||||||
refine.RefineProject.get_models = lambda self: self
|
refine.RefineProject.get_models = lambda me: me
|
||||||
# Save REFINE_{HOST,PORT} as tests overwrite it
|
# Save REFINE_{HOST,PORT} as tests overwrite it
|
||||||
self._refine_host_port = refine.REFINE_HOST, refine.REFINE_PORT
|
self._refine_host_port = refine.REFINE_HOST, refine.REFINE_PORT
|
||||||
refine.REFINE_HOST, refine.REFINE_PORT = '127.0.0.1', '3333'
|
refine.REFINE_HOST, refine.REFINE_PORT = '127.0.0.1', '3333'
|
||||||
|
@ -65,8 +65,8 @@ class RefineProjectTest(unittest.TestCase):
|
||||||
p = RP('1658955153749')
|
p = RP('1658955153749')
|
||||||
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
|
self.assertEqual(p.server.server, 'http://127.0.0.1:3333')
|
||||||
self.assertEqual(p.project_id, '1658955153749')
|
self.assertEqual(p.project_id, '1658955153749')
|
||||||
refine.REFINE_HOST='10.0.0.1'
|
refine.REFINE_HOST = '10.0.0.1'
|
||||||
refine.REFINE_PORT='80'
|
refine.REFINE_PORT = '80'
|
||||||
p = RP('1658955153749')
|
p = RP('1658955153749')
|
||||||
self.assertEqual(p.server.server, 'http://10.0.0.1')
|
self.assertEqual(p.server.server, 'http://10.0.0.1')
|
||||||
|
|
||||||
|
|
|
@ -107,7 +107,8 @@ class TutorialTestFacets(refinetest.RefineTestCase):
|
||||||
self.assertEqual(p.expression, 'value[0, 3]')
|
self.assertEqual(p.expression, 'value[0, 3]')
|
||||||
self.assertEqual(p.choices['318'].count, 2331)
|
self.assertEqual(p.choices['318'].count, 2331)
|
||||||
# {16}
|
# {16}
|
||||||
commissioned_date_facet = facet.NumericFacet('Commissioned Date',
|
commissioned_date_facet = facet.NumericFacet(
|
||||||
|
'Commissioned Date',
|
||||||
expression='value.toDate().datePart("year")')
|
expression='value.toDate().datePart("year")')
|
||||||
self.project.engine.add_facet(commissioned_date_facet)
|
self.project.engine.add_facet(commissioned_date_facet)
|
||||||
response = self.project.compute_facets()
|
response = self.project.compute_facets()
|
||||||
|
@ -115,7 +116,8 @@ class TutorialTestFacets(refinetest.RefineTestCase):
|
||||||
self.assertEqual(cd.error_count, 959)
|
self.assertEqual(cd.error_count, 959)
|
||||||
self.assertEqual(cd.numeric_count, 5999)
|
self.assertEqual(cd.numeric_count, 5999)
|
||||||
# {17}
|
# {17}
|
||||||
office_description_facet = facet.NumericFacet('Office Description',
|
office_description_facet = facet.NumericFacet(
|
||||||
|
'Office Description',
|
||||||
expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()')
|
expression=r'value.match(/\D*(\d+)\w\w Rep.*/)[0].toNumber()')
|
||||||
self.project.engine.add_facet(office_description_facet)
|
self.project.engine.add_facet(office_description_facet)
|
||||||
response = self.project.compute_facets()
|
response = self.project.compute_facets()
|
||||||
|
@ -212,8 +214,8 @@ class TutorialTestDuplicateDetection(refinetest.RefineTestCase):
|
||||||
indexes = [row.index for row in response.rows]
|
indexes = [row.index for row in response.rows]
|
||||||
self.assertEqual(indexes, range(10))
|
self.assertEqual(indexes, range(10))
|
||||||
# {10}
|
# {10}
|
||||||
self.project.add_column('email', 'count',
|
self.project.add_column(
|
||||||
'facetCount(value, "value", "email")')
|
'email', 'count', 'facetCount(value, "value", "email")')
|
||||||
self.assertInResponse('column email by filling 10 rows')
|
self.assertInResponse('column email by filling 10 rows')
|
||||||
response = self.project.get_rows()
|
response = self.project.get_rows()
|
||||||
self.assertEqual(self.project.column_order['email'], 0) # i.e. 1st
|
self.assertEqual(self.project.column_order['email'], 0) # i.e. 1st
|
||||||
|
@ -258,8 +260,8 @@ class TutorialTestTransposeColumnsIntoRows(refinetest.RefineTestCase):
|
||||||
self.project.add_column('pair', 'year', 'value[2,6].toNumber()')
|
self.project.add_column('pair', 'year', 'value[2,6].toNumber()')
|
||||||
self.assertInResponse('filling 26185 rows')
|
self.assertInResponse('filling 26185 rows')
|
||||||
# {5}
|
# {5}
|
||||||
self.project.text_transform(column='pair',
|
self.project.text_transform(
|
||||||
expression='value.substring(7).toNumber()')
|
column='pair', expression='value.substring(7).toNumber()')
|
||||||
self.assertInResponse('transform on 26185 cells')
|
self.assertInResponse('transform on 26185 cells')
|
||||||
# {6}
|
# {6}
|
||||||
self.project.rename_column('pair', 'amount')
|
self.project.rename_column('pair', 'amount')
|
||||||
|
@ -274,15 +276,16 @@ class TutorialTestTransposeColumnsIntoRows(refinetest.RefineTestCase):
|
||||||
row10 = response.rows[9]
|
row10 = response.rows[9]
|
||||||
self.assertEqual(row10['country_name'], 'Afghanistan')
|
self.assertEqual(row10['country_name'], 'Afghanistan')
|
||||||
self.assertEqual(row10['program_name'],
|
self.assertEqual(row10['program_name'],
|
||||||
'Department of Defense Security Assistance')
|
'Department of Defense Security Assistance')
|
||||||
self.assertEqual(row10['amount'], 113777303)
|
self.assertEqual(row10['amount'], 113777303)
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
|
class TutorialTestTransposeFixedNumberOfRowsIntoColumns(
|
||||||
refinetest.RefineTestCase):
|
refinetest.RefineTestCase):
|
||||||
project_file = 'fixed-rows.csv'
|
project_file = 'fixed-rows.csv'
|
||||||
project_file_options = {'split_into_columns': False,
|
project_file_options = {'split_into_columns': False,
|
||||||
'header_lines': 0}
|
'header_lines': 0}
|
||||||
|
|
||||||
def test_transpose_fixed_number_of_rows_into_columns(self):
|
def test_transpose_fixed_number_of_rows_into_columns(self):
|
||||||
# Section "5. Structural Editing,
|
# Section "5. Structural Editing,
|
||||||
# Transpose Fixed Number of Rows into Columns"
|
# Transpose Fixed Number of Rows into Columns"
|
||||||
|
@ -293,7 +296,8 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
|
||||||
self.assertInResponse('Transpose every 4 cells in column Column')
|
self.assertInResponse('Transpose every 4 cells in column Column')
|
||||||
# {9} - renaming column triggers a bug in Refine
|
# {9} - renaming column triggers a bug in Refine
|
||||||
# {10}
|
# {10}
|
||||||
self.project.add_column('Column 1', 'Transaction',
|
self.project.add_column(
|
||||||
|
'Column 1', 'Transaction',
|
||||||
'if(value.contains(" sent "), "send", "receive")')
|
'if(value.contains(" sent "), "send", "receive")')
|
||||||
self.assertInResponse('Column 1 by filling 4 rows')
|
self.assertInResponse('Column 1 by filling 4 rows')
|
||||||
# {11}
|
# {11}
|
||||||
|
@ -302,17 +306,20 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
|
||||||
self.project.engine.add_facet(transaction_facet)
|
self.project.engine.add_facet(transaction_facet)
|
||||||
self.project.compute_facets()
|
self.project.compute_facets()
|
||||||
# {12}, {13}, {14}
|
# {12}, {13}, {14}
|
||||||
self.project.add_column('Column 1', 'Sender',
|
self.project.add_column(
|
||||||
|
'Column 1', 'Sender',
|
||||||
'value.partition(" sent ")[0]')
|
'value.partition(" sent ")[0]')
|
||||||
# XXX resetting the facet shows data in rows with Transaction=receive
|
# XXX resetting the facet shows data in rows with Transaction=receive
|
||||||
# which shouldn't have been possible with the facet.
|
# which shouldn't have been possible with the facet.
|
||||||
self.project.add_column('Column 1', 'Recipient',
|
self.project.add_column(
|
||||||
'value.partition(" to ")[2].partition(" on ")[0]')
|
'Column 1', 'Recipient',
|
||||||
self.project.add_column('Column 1', 'Amount',
|
'value.partition(" to ")[2].partition(" on ")[0]')
|
||||||
'value.partition(" sent ")[2].partition(" to ")[0]')
|
self.project.add_column(
|
||||||
|
'Column 1', 'Amount',
|
||||||
|
'value.partition(" sent ")[2].partition(" to ")[0]')
|
||||||
# {15}
|
# {15}
|
||||||
transaction_facet.reset().include('receive')
|
transaction_facet.reset().include('receive')
|
||||||
response = self.project.get_rows()
|
self.project.get_rows()
|
||||||
# XXX there seems to be some kind of bug where the model doesn't
|
# XXX there seems to be some kind of bug where the model doesn't
|
||||||
# match get_rows() output - cellIndex being returned that are
|
# match get_rows() output - cellIndex being returned that are
|
||||||
# out of range.
|
# out of range.
|
||||||
|
@ -322,13 +329,11 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
|
||||||
# {16}
|
# {16}
|
||||||
for column, expression in (
|
for column, expression in (
|
||||||
('Sender',
|
('Sender',
|
||||||
'cells["Column 1"].value.partition(" from ")[2]'
|
'cells["Column 1"].value.partition(" from ")[2].partition(" on ")[0]'),
|
||||||
'.partition(" on ")[0]'),
|
|
||||||
('Recipient',
|
('Recipient',
|
||||||
'cells["Column 1"].value.partition(" received ")[0]'),
|
'cells["Column 1"].value.partition(" received ")[0]'),
|
||||||
('Amount',
|
('Amount',
|
||||||
'cells["Column 1"].value.partition(" received ")[2]'
|
'cells["Column 1"].value.partition(" received ")[2].partition(" from ")[0]')
|
||||||
'.partition(" from ")[0]')
|
|
||||||
):
|
):
|
||||||
self.project.text_transform(column, expression)
|
self.project.text_transform(column, expression)
|
||||||
self.assertInResponse('2 cells')
|
self.assertInResponse('2 cells')
|
||||||
|
@ -343,21 +348,22 @@ class TutorialTestTransposeFixedNumbeOfRowsIntoColumns(
|
||||||
self.assertInResponse('Reorder columns')
|
self.assertInResponse('Reorder columns')
|
||||||
|
|
||||||
|
|
||||||
class TutorialTestTransposeVariableNumbeOfRowsIntoColumns(
|
class TutorialTestTransposeVariableNumberOfRowsIntoColumns(
|
||||||
refinetest.RefineTestCase):
|
refinetest.RefineTestCase):
|
||||||
project_file = 'variable-rows.csv'
|
project_file = 'variable-rows.csv'
|
||||||
project_file_options = {'split_into_columns': False,
|
project_file_options = {'split_into_columns': False,
|
||||||
'header_lines': 0}
|
'header_lines': 0}
|
||||||
|
|
||||||
def test_transpose_variable_number_of_rows_into_columns(self):
|
def test_transpose_variable_number_of_rows_into_columns(self):
|
||||||
# {20}, {21}
|
# {20}, {21}
|
||||||
self.project.add_column('Column', 'First Line',
|
self.project.add_column(
|
||||||
'if(value.contains(" on "), value, null)')
|
'Column', 'First Line', 'if(value.contains(" on "), value, null)')
|
||||||
self.assertInResponse('Column by filling 4 rows')
|
self.assertInResponse('Column by filling 4 rows')
|
||||||
response = self.project.get_rows()
|
response = self.project.get_rows()
|
||||||
first_names = [row['First Line'][0:10] if row['First Line'] else None
|
first_names = [row['First Line'][0:10] if row['First Line'] else None
|
||||||
for row in response.rows]
|
for row in response.rows]
|
||||||
self.assertEqual(first_names, ['Tom Dalton', None, None, None,
|
self.assertEqual(first_names, [
|
||||||
|
'Tom Dalton', None, None, None,
|
||||||
'Morgan Law', None, None, None, None, 'Eric Batem'])
|
'Morgan Law', None, None, None, None, 'Eric Batem'])
|
||||||
# {22}
|
# {22}
|
||||||
self.project.move_column('First Line', 0)
|
self.project.move_column('First Line', 0)
|
||||||
|
@ -369,12 +375,12 @@ class TutorialTestTransposeVariableNumbeOfRowsIntoColumns(
|
||||||
self.assertEqual(response.mode, 'record-based')
|
self.assertEqual(response.mode, 'record-based')
|
||||||
self.assertEqual(response.filtered, 4)
|
self.assertEqual(response.filtered, 4)
|
||||||
# {24}
|
# {24}
|
||||||
self.project.add_column('Column', 'Status',
|
self.project.add_column(
|
||||||
'row.record.cells["Column"].value[-1]')
|
'Column', 'Status', 'row.record.cells["Column"].value[-1]')
|
||||||
self.assertInResponse('filling 18 rows')
|
self.assertInResponse('filling 18 rows')
|
||||||
# {25}
|
# {25}
|
||||||
self.project.text_transform('Column',
|
self.project.text_transform(
|
||||||
'row.record.cells["Column"].value[1, -1].join("|")')
|
'Column', 'row.record.cells["Column"].value[1, -1].join("|")')
|
||||||
self.assertInResponse('18 cells')
|
self.assertInResponse('18 cells')
|
||||||
# {26}
|
# {26}
|
||||||
self.project.engine.mode = 'row-based'
|
self.project.engine.mode = 'row-based'
|
||||||
|
|
Loading…
Reference in New Issue