improved templating option splitToFiles

This commit is contained in:
Felix Lohmeier 2017-12-11 17:32:10 +01:00
parent f7b33684b3
commit c896248c8c
1 changed files with 15 additions and 23 deletions

View File

@ -152,7 +152,7 @@ help='facets config in json format (may be extracted with browser dev tools in b
group6.add_option('--splitToFiles', dest='splitToFiles', metavar='true/false', choices=('true', 'false'), group6.add_option('--splitToFiles', dest='splitToFiles', metavar='true/false', choices=('true', 'false'),
help='will split each row/record into a single file; it specifies a presumably unique character series for splitting; --prefix and --suffix will be applied to all files; filename-prefix can be specified with --output (default: %Y%m%d)') help='will split each row/record into a single file; it specifies a presumably unique character series for splitting; --prefix and --suffix will be applied to all files; filename-prefix can be specified with --output (default: %Y%m%d)')
group6.add_option('--suffixById', dest='suffixById', metavar='true/false', choices=('true', 'false'), group6.add_option('--suffixById', dest='suffixById', metavar='true/false', choices=('true', 'false'),
help='enhancement option for --splitToFiles; will generate filename-suffix from values in key column (default: row number)') help='enhancement option for --splitToFiles; will generate filename-suffix from values in key column')
PARSER.add_option_group(group6) PARSER.add_option_group(group6)
@ -217,7 +217,7 @@ def create_project(options):
def export_project(project, options): def export_project(project, options):
"""Dump a project to stdout or options.output file.""" """Dump a project to stdout or options.output file."""
export_format = 'tsv' export_format = 'tsv'
if options.output: if options.output and not options.splitToFiles == 'true':
ext = os.path.splitext(options.output)[1][1:] ext = os.path.splitext(options.output)[1][1:]
if ext: if ext:
export_format = ext.lower() export_format = ext.lower()
@ -249,40 +249,32 @@ def export_project(project, options):
if not options.output: if not options.output:
filename = time.strftime('%Y%m%d') filename = time.strftime('%Y%m%d')
else: else:
filename = options.output filename = os.path.splitext(options.output)[0]
ext = os.path.splitext(options.output)[1][1:]
if not ext:
ext = 'txt'
if options.suffixById: if options.suffixById:
ids_template = '{{forNonBlank(cells["' + keyColumn + '"].value, v, v, "")}}' ids_template = '{{forNonBlank(cells["' + keyColumn + '"].value, v, v, "")}}'
ids_templateconfig = { 'engine': json.dumps(engine), 'template': ids_template, 'rowSeparator':'\n' } ids_templateconfig = { 'engine': json.dumps(engine), 'template': ids_template, 'rowSeparator':'\n' }
ids = [line.rstrip('\n') for line in project.export_templating(**ids_templateconfig) if line.rstrip('\n')] ids = [line.rstrip('\n') for line in project.export_templating(**ids_templateconfig) if line.rstrip('\n')]
if options.splitToFiles == 'true' and not options.mode == 'record-based': if options.mode == 'record-based':
# row-based: split-character into template
template = split + templateconfig['template']
templateconfig.update({ 'prefix': '', 'suffix': '', 'template': template, 'rowSeparator':'' })
records = project.export_templating(**templateconfig).read().split(split)
del records[0] # skip first blank line
if options.suffixById:
for index, record in enumerate(records):
output = open(filename + '_' + ids[index], 'wb')
output.writelines([prefix, record, suffix])
else:
zeros = len(str(len(records)))
for index, record in enumerate(records):
output = open(filename + '_' + str(index+1).zfill(zeros), 'wb')
output.writelines([prefix, record, suffix])
if options.splitToFiles == 'true' and options.mode == 'record-based':
# record-based: split-character into template if key column is not blank (=record) # record-based: split-character into template if key column is not blank (=record)
template = '{{forNonBlank(cells["' + keyColumn + '"].value, v, "' + split + '", "")}}' + templateconfig['template'] template = '{{forNonBlank(cells["' + keyColumn + '"].value, v, "' + split + '", "")}}' + templateconfig['template']
templateconfig.update({ 'prefix': '', 'suffix': '', 'template': template, 'rowSeparator':'' }) templateconfig.update({ 'prefix': '', 'suffix': '', 'template': template, 'rowSeparator':'' })
else:
# row-based: split-character into template
template = split + templateconfig['template']
templateconfig.update({ 'prefix': '', 'suffix': '', 'template': template, 'rowSeparator':'' })
records = project.export_templating(**templateconfig).read().split(split) records = project.export_templating(**templateconfig).read().split(split)
del records[0] # skip first blank entry del records[0] # skip first blank entry
if options.suffixById: if options.suffixById:
for index, record in enumerate(records): for index, record in enumerate(records):
output = open(filename + '_' + ids[index], 'wb') output = open(filename + '_' + ids[index] + '.' + ext, 'wb')
output.writelines([prefix, record, suffix]) output.writelines([prefix, record, suffix])
else: else:
zeros = len(str(len(records))) zeros = len(str(len(records)))
for index, record in enumerate(records): for index, record in enumerate(records):
output = open(filename + '_' + str(index+1).zfill(zeros), 'wb') output = open(filename + '_' + str(index+1).zfill(zeros) + '.' + ext, 'wb')
output.writelines([prefix, record, suffix]) output.writelines([prefix, record, suffix])
else: else:
output.writelines(project.export_templating(**templateconfig)) output.writelines(project.export_templating(**templateconfig))