add encoding option (defaults to UTF-8 for csv/tsv/txt) and fix templating feature suffixById
This commit is contained in:
parent
4ed6925b25
commit
3c16169767
|
@ -162,20 +162,25 @@ def download(url, output_file=None):
|
||||||
print('Download to file %s complete' % output_file)
|
print('Download to file %s complete' % output_file)
|
||||||
|
|
||||||
|
|
||||||
def export(project_id, output_file=None, export_format=None):
|
def export(project_id, encoding=None, output_file=None, export_format=None):
|
||||||
"""Dump a project to stdout or file."""
|
"""Dump a project to stdout or file."""
|
||||||
project = refine.RefineProject(project_id)
|
project = refine.RefineProject(project_id)
|
||||||
if not export_format:
|
if not export_format:
|
||||||
export_format = 'tsv'
|
export_format = 'tsv'
|
||||||
if not output_file:
|
if not output_file:
|
||||||
|
if export_format in ['csv', 'tsv', 'txt']:
|
||||||
|
encoding = 'UTF-8'
|
||||||
sys.stdout.write(project.export(
|
sys.stdout.write(project.export(
|
||||||
export_format=export_format).read().decode('UTF-8'))
|
export_format=export_format, encoding=encoding).read())
|
||||||
else:
|
else:
|
||||||
ext = os.path.splitext(output_file)[1][1:]
|
ext = os.path.splitext(output_file)[1][1:]
|
||||||
if ext:
|
if ext:
|
||||||
export_format = ext.lower()
|
export_format = ext.lower()
|
||||||
|
if export_format in ['csv', 'tsv', 'txt']:
|
||||||
|
encoding = 'UTF-8'
|
||||||
with open(output_file, 'wb') as f:
|
with open(output_file, 'wb') as f:
|
||||||
f.write(project.export(export_format).read())
|
f.write(project.export(
|
||||||
|
export_format=export_format, encoding=encoding).read())
|
||||||
print('Export to file %s complete' % output_file)
|
print('Export to file %s complete' % output_file)
|
||||||
|
|
||||||
|
|
||||||
|
@ -222,6 +227,7 @@ def ls():
|
||||||
|
|
||||||
def templating(project_id,
|
def templating(project_id,
|
||||||
template,
|
template,
|
||||||
|
encoding='UTF-8',
|
||||||
output_file=None,
|
output_file=None,
|
||||||
mode=None,
|
mode=None,
|
||||||
prefix='',
|
prefix='',
|
||||||
|
@ -240,7 +246,8 @@ def templating(project_id,
|
||||||
templateconfig = {'prefix': prefix,
|
templateconfig = {'prefix': prefix,
|
||||||
'suffix': suffix,
|
'suffix': suffix,
|
||||||
'template': template,
|
'template': template,
|
||||||
'rowSeparator': rowSeparator}
|
'rowSeparator': rowSeparator,
|
||||||
|
'encoding': encoding}
|
||||||
|
|
||||||
# construct the engine config
|
# construct the engine config
|
||||||
if mode == 'record-based':
|
if mode == 'record-based':
|
||||||
|
@ -261,21 +268,20 @@ def templating(project_id,
|
||||||
engine['facets'].append(textFilter)
|
engine['facets'].append(textFilter)
|
||||||
templateconfig.update({'engine': json.dumps(engine)})
|
templateconfig.update({'engine': json.dumps(engine)})
|
||||||
|
|
||||||
# normal output or some refinable magic for splitToFiles functionality
|
|
||||||
if not splitToFiles:
|
if not splitToFiles:
|
||||||
|
# normal output
|
||||||
if not output_file:
|
if not output_file:
|
||||||
sys.stdout.write(project.export_templating(
|
sys.stdout.write(project.export_templating(
|
||||||
**templateconfig).read().decode('UTF-8'))
|
**templateconfig).read())
|
||||||
else:
|
else:
|
||||||
with open(output_file, 'wb') as f:
|
with open(output_file, 'wb') as f:
|
||||||
f.write(project.export_templating(**templateconfig).read())
|
f.write(project.export_templating(**templateconfig).read())
|
||||||
print('Export to file %s complete' % output_file)
|
print('Export to file %s complete' % output_file)
|
||||||
else:
|
else:
|
||||||
# common config for row-based and record-based
|
# splitToFiles functionality
|
||||||
prefix = templateconfig['prefix']
|
prefix = templateconfig['prefix']
|
||||||
suffix = templateconfig['suffix']
|
suffix = templateconfig['suffix']
|
||||||
split = '===|||THISISTHEBEGINNINGOFANEWRECORD|||==='
|
split = '===|||THISISTHEBEGINNINGOFANEWRECORD|||==='
|
||||||
keyColumn = project.get_models()['columnModel']['keyColumnName']
|
|
||||||
if not output_file:
|
if not output_file:
|
||||||
output_file = time.strftime('%Y%m%d')
|
output_file = time.strftime('%Y%m%d')
|
||||||
else:
|
else:
|
||||||
|
@ -283,23 +289,24 @@ def templating(project_id,
|
||||||
ext = os.path.splitext(output_file)[1][1:]
|
ext = os.path.splitext(output_file)[1][1:]
|
||||||
if not ext:
|
if not ext:
|
||||||
ext = 'txt'
|
ext = 'txt'
|
||||||
|
# generate config for subfeature suffixById
|
||||||
if suffixById:
|
if suffixById:
|
||||||
ids_template = ('{{forNonBlank(cells["' +
|
ids_template = ('{{forNonBlank(' +
|
||||||
keyColumn +
|
'with(row.columnNames[0],cn,cells[cn].value),' +
|
||||||
'"].value, v, v, "")}}')
|
'v,v,"")}}')
|
||||||
ids_templateconfig = {'engine': json.dumps(engine),
|
ids_templateconfig = {'engine': json.dumps(engine),
|
||||||
'template': ids_template,
|
'template': ids_template,
|
||||||
'rowSeparator': '\n'}
|
'rowSeparator': '\n',
|
||||||
|
'encoding': encoding}
|
||||||
ids = [line.rstrip('\n') for line in project.export_templating(
|
ids = [line.rstrip('\n') for line in project.export_templating(
|
||||||
**ids_templateconfig) if line.rstrip('\n')]
|
**ids_templateconfig) if line.rstrip('\n')]
|
||||||
|
# generate common config
|
||||||
if mode == 'record-based':
|
if mode == 'record-based':
|
||||||
# record-based: split-character into template
|
# record-based: split-character into template
|
||||||
# if key column is not blank (=record)
|
# if key column is not blank (=record)
|
||||||
template = ('{{forNonBlank(cells["' +
|
template = ('{{forNonBlank(' +
|
||||||
keyColumn +
|
'with(row.columnNames[0],cn,cells[cn].value),' +
|
||||||
'"].value, v, "' +
|
'v,"' + split + '")}}' +
|
||||||
split +
|
|
||||||
'", "")}}' +
|
|
||||||
templateconfig['template'])
|
templateconfig['template'])
|
||||||
templateconfig.update({'prefix': '',
|
templateconfig.update({'prefix': '',
|
||||||
'suffix': '',
|
'suffix': '',
|
||||||
|
@ -312,6 +319,7 @@ def templating(project_id,
|
||||||
'suffix': '',
|
'suffix': '',
|
||||||
'template': template,
|
'template': template,
|
||||||
'rowSeparator': ''})
|
'rowSeparator': ''})
|
||||||
|
# execute
|
||||||
records = project.export_templating(
|
records = project.export_templating(
|
||||||
**templateconfig).read().split(split)
|
**templateconfig).read().split(split)
|
||||||
del records[0] # skip first blank entry
|
del records[0] # skip first blank entry
|
||||||
|
|
|
@ -427,25 +427,31 @@ class RefineProject:
|
||||||
return 'ok'
|
return 'ok'
|
||||||
return response_json['code'] # can be 'ok' or 'pending'
|
return response_json['code'] # can be 'ok' or 'pending'
|
||||||
|
|
||||||
def export(self, export_format='tsv'):
|
def export(self, encoding=None, export_format='tsv'):
|
||||||
"""Return a fileobject of a project's data."""
|
"""Return a fileobject of a project's data."""
|
||||||
url = ('export-rows/' +
|
url = ('export-rows/' +
|
||||||
urllib.quote(self.project_name().encode('utf8')) +
|
urllib.quote(self.project_name().encode('utf8')) +
|
||||||
'.' + export_format)
|
'.' + export_format)
|
||||||
return self.do_raw(url, data={'format': export_format})
|
data = {'format': export_format}
|
||||||
|
if encoding:
|
||||||
|
data['encoding'] = encoding
|
||||||
|
return self.do_raw(url, data)
|
||||||
|
|
||||||
def export_templating(self, engine='', prefix='',
|
def export_templating(self, encoding=None, engine='', prefix='',
|
||||||
template='', rowSeparator='\n', suffix=''):
|
template='', rowSeparator='\n', suffix=''):
|
||||||
"""Return a fileobject of a project's data in templating mode."""
|
"""Return a fileobject of a project's data in templating mode."""
|
||||||
url = ('export-rows/' +
|
url = ('export-rows/' +
|
||||||
urllib.quote(self.project_name().encode('utf8')) +
|
urllib.quote(self.project_name().encode('utf8')) +
|
||||||
'.' + 'txt')
|
'.' + 'txt')
|
||||||
return self.do_raw(url, data={'format': 'template',
|
data = {'format': 'template',
|
||||||
'template': template,
|
'template': template,
|
||||||
'engine': engine,
|
'engine': engine,
|
||||||
'prefix': prefix,
|
'prefix': prefix,
|
||||||
'suffix': suffix,
|
'suffix': suffix,
|
||||||
'separator': rowSeparator})
|
'separator': rowSeparator}
|
||||||
|
if encoding:
|
||||||
|
data['encoding'] = encoding
|
||||||
|
return self.do_raw(url, data)
|
||||||
|
|
||||||
def export_rows(self, **kwargs):
|
def export_rows(self, **kwargs):
|
||||||
"""Return an iterable of parsed rows of a project's data."""
|
"""Return an iterable of parsed rows of a project's data."""
|
||||||
|
|
Loading…
Reference in New Issue