add encoding option (defaults to UTF-8 for csv/tsv/txt) and fix templating feature suffixById

This commit is contained in:
Felix Lohmeier 2019-08-21 19:24:13 +02:00
parent 4ed6925b25
commit 3c16169767
2 changed files with 40 additions and 26 deletions

View File

@ -162,20 +162,25 @@ def download(url, output_file=None):
print('Download to file %s complete' % output_file) print('Download to file %s complete' % output_file)
def export(project_id, output_file=None, export_format=None): def export(project_id, encoding=None, output_file=None, export_format=None):
"""Dump a project to stdout or file.""" """Dump a project to stdout or file."""
project = refine.RefineProject(project_id) project = refine.RefineProject(project_id)
if not export_format: if not export_format:
export_format = 'tsv' export_format = 'tsv'
if not output_file: if not output_file:
if export_format in ['csv', 'tsv', 'txt']:
encoding = 'UTF-8'
sys.stdout.write(project.export( sys.stdout.write(project.export(
export_format=export_format).read().decode('UTF-8')) export_format=export_format, encoding=encoding).read())
else: else:
ext = os.path.splitext(output_file)[1][1:] ext = os.path.splitext(output_file)[1][1:]
if ext: if ext:
export_format = ext.lower() export_format = ext.lower()
if export_format in ['csv', 'tsv', 'txt']:
encoding = 'UTF-8'
with open(output_file, 'wb') as f: with open(output_file, 'wb') as f:
f.write(project.export(export_format).read()) f.write(project.export(
export_format=export_format, encoding=encoding).read())
print('Export to file %s complete' % output_file) print('Export to file %s complete' % output_file)
@ -222,6 +227,7 @@ def ls():
def templating(project_id, def templating(project_id,
template, template,
encoding='UTF-8',
output_file=None, output_file=None,
mode=None, mode=None,
prefix='', prefix='',
@ -240,7 +246,8 @@ def templating(project_id,
templateconfig = {'prefix': prefix, templateconfig = {'prefix': prefix,
'suffix': suffix, 'suffix': suffix,
'template': template, 'template': template,
'rowSeparator': rowSeparator} 'rowSeparator': rowSeparator,
'encoding': encoding}
# construct the engine config # construct the engine config
if mode == 'record-based': if mode == 'record-based':
@ -261,21 +268,20 @@ def templating(project_id,
engine['facets'].append(textFilter) engine['facets'].append(textFilter)
templateconfig.update({'engine': json.dumps(engine)}) templateconfig.update({'engine': json.dumps(engine)})
# normal output or some refinable magic for splitToFiles functionality
if not splitToFiles: if not splitToFiles:
# normal output
if not output_file: if not output_file:
sys.stdout.write(project.export_templating( sys.stdout.write(project.export_templating(
**templateconfig).read().decode('UTF-8')) **templateconfig).read())
else: else:
with open(output_file, 'wb') as f: with open(output_file, 'wb') as f:
f.write(project.export_templating(**templateconfig).read()) f.write(project.export_templating(**templateconfig).read())
print('Export to file %s complete' % output_file) print('Export to file %s complete' % output_file)
else: else:
# common config for row-based and record-based # splitToFiles functionality
prefix = templateconfig['prefix'] prefix = templateconfig['prefix']
suffix = templateconfig['suffix'] suffix = templateconfig['suffix']
split = '===|||THISISTHEBEGINNINGOFANEWRECORD|||===' split = '===|||THISISTHEBEGINNINGOFANEWRECORD|||==='
keyColumn = project.get_models()['columnModel']['keyColumnName']
if not output_file: if not output_file:
output_file = time.strftime('%Y%m%d') output_file = time.strftime('%Y%m%d')
else: else:
@ -283,23 +289,24 @@ def templating(project_id,
ext = os.path.splitext(output_file)[1][1:] ext = os.path.splitext(output_file)[1][1:]
if not ext: if not ext:
ext = 'txt' ext = 'txt'
# generate config for subfeature suffixById
if suffixById: if suffixById:
ids_template = ('{{forNonBlank(cells["' + ids_template = ('{{forNonBlank(' +
keyColumn + 'with(row.columnNames[0],cn,cells[cn].value),' +
'"].value, v, v, "")}}') 'v,v,"")}}')
ids_templateconfig = {'engine': json.dumps(engine), ids_templateconfig = {'engine': json.dumps(engine),
'template': ids_template, 'template': ids_template,
'rowSeparator': '\n'} 'rowSeparator': '\n',
'encoding': encoding}
ids = [line.rstrip('\n') for line in project.export_templating( ids = [line.rstrip('\n') for line in project.export_templating(
**ids_templateconfig) if line.rstrip('\n')] **ids_templateconfig) if line.rstrip('\n')]
# generate common config
if mode == 'record-based': if mode == 'record-based':
# record-based: split-character into template # record-based: split-character into template
# if key column is not blank (=record) # if key column is not blank (=record)
template = ('{{forNonBlank(cells["' + template = ('{{forNonBlank(' +
keyColumn + 'with(row.columnNames[0],cn,cells[cn].value),' +
'"].value, v, "' + 'v,"' + split + '")}}' +
split +
'", "")}}' +
templateconfig['template']) templateconfig['template'])
templateconfig.update({'prefix': '', templateconfig.update({'prefix': '',
'suffix': '', 'suffix': '',
@ -312,6 +319,7 @@ def templating(project_id,
'suffix': '', 'suffix': '',
'template': template, 'template': template,
'rowSeparator': ''}) 'rowSeparator': ''})
# execute
records = project.export_templating( records = project.export_templating(
**templateconfig).read().split(split) **templateconfig).read().split(split)
del records[0] # skip first blank entry del records[0] # skip first blank entry

View File

@ -427,25 +427,31 @@ class RefineProject:
return 'ok' return 'ok'
return response_json['code'] # can be 'ok' or 'pending' return response_json['code'] # can be 'ok' or 'pending'
def export(self, export_format='tsv'): def export(self, encoding=None, export_format='tsv'):
"""Return a fileobject of a project's data.""" """Return a fileobject of a project's data."""
url = ('export-rows/' + url = ('export-rows/' +
urllib.quote(self.project_name().encode('utf8')) + urllib.quote(self.project_name().encode('utf8')) +
'.' + export_format) '.' + export_format)
return self.do_raw(url, data={'format': export_format}) data = {'format': export_format}
if encoding:
data['encoding'] = encoding
return self.do_raw(url, data)
def export_templating(self, engine='', prefix='', def export_templating(self, encoding=None, engine='', prefix='',
template='', rowSeparator='\n', suffix=''): template='', rowSeparator='\n', suffix=''):
"""Return a fileobject of a project's data in templating mode.""" """Return a fileobject of a project's data in templating mode."""
url = ('export-rows/' + url = ('export-rows/' +
urllib.quote(self.project_name().encode('utf8')) + urllib.quote(self.project_name().encode('utf8')) +
'.' + 'txt') '.' + 'txt')
return self.do_raw(url, data={'format': 'template', data = {'format': 'template',
'template': template, 'template': template,
'engine': engine, 'engine': engine,
'prefix': prefix, 'prefix': prefix,
'suffix': suffix, 'suffix': suffix,
'separator': rowSeparator}) 'separator': rowSeparator}
if encoding:
data['encoding'] = encoding
return self.do_raw(url, data)
def export_rows(self, **kwargs): def export_rows(self, **kwargs):
"""Return an iterable of parsed rows of a project's data.""" """Return an iterable of parsed rows of a project's data."""