From 375ac42be04ea807ef4fd812c861b86fc36f75d0 Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Tue, 20 Aug 2019 04:30:50 +0200 Subject: [PATCH] realigned create/new_project to upstream new feature: xml root element will be discovered if recordPath is not set bugfix: newly introduced option projectTags was not working in 0.3.7 bugfix: txt defaulted to fixed-width (should be line-based) bugfix: default recordPath for json was not working in 0.3.7 bugfix: default sheets option was broken (but xls, xlsx, ods is broken in OpenRefine >=2.8 anyway, see #4) tests: added sample files and an ipython notebook for comprehensive tests of create option --- google/refine/__main__.py | 2 +- google/refine/cli.py | 32 +- google/refine/refine.py | 142 +- tests/cli_create.ipynb | 2383 +++++++++++++++++ tests/data/{ => cli}/duplicates-deletion.json | 0 tests/data/cli/duplicates.csv | 11 + tests/data/cli/duplicates.json | 92 + tests/data/cli/duplicates.ods | Bin 0 -> 13949 bytes tests/data/cli/duplicates.tsv | 11 + tests/data/cli/duplicates.txt | 11 + tests/data/cli/duplicates.xls | Bin 0 -> 7680 bytes tests/data/cli/duplicates.xlsx | Bin 0 -> 5672 bytes tests/data/cli/duplicates.xml | 93 + tests/data/cli/duplicates.zip | Bin 0 -> 1082 bytes tests/data/cli/duplicates1.xml | 10 + tests/data/cli/duplicates2.ods | Bin 0 -> 12139 bytes tests/data/cli/duplicates2.xls | Bin 0 -> 10752 bytes tests/data/cli/duplicates2.xlsx | Bin 0 -> 7802 bytes 18 files changed, 2749 insertions(+), 38 deletions(-) create mode 100644 tests/cli_create.ipynb rename tests/data/{ => cli}/duplicates-deletion.json (100%) create mode 100644 tests/data/cli/duplicates.csv create mode 100644 tests/data/cli/duplicates.json create mode 100644 tests/data/cli/duplicates.ods create mode 100644 tests/data/cli/duplicates.tsv create mode 100644 tests/data/cli/duplicates.txt create mode 100644 tests/data/cli/duplicates.xls create mode 100644 tests/data/cli/duplicates.xlsx create mode 100644 tests/data/cli/duplicates.xml create mode 100644 tests/data/cli/duplicates.zip create mode 100644 tests/data/cli/duplicates1.xml create mode 100644 tests/data/cli/duplicates2.ods create mode 100644 tests/data/cli/duplicates2.xls create mode 100644 tests/data/cli/duplicates2.xlsx diff --git a/google/refine/__main__.py b/google/refine/__main__.py index 1c64362..f1b21a2 100644 --- a/google/refine/__main__.py +++ b/google/refine/__main__.py @@ -145,7 +145,7 @@ group5.add_option('--projectTags', dest='projectTags', help='(all formats), please provide tags in multiple arguments, e.g. --projectTags=beta --projectTags=client1') group5.add_option('--recordPath', dest='recordPath', action='append', - help='(xml,json), please provide path in multiple arguments without slashes, e.g. /collection/record/ should be entered like this: --recordPath=collection --recordPath=record, default xml: record, default json: _ _') + help='(xml,json), please provide path in multiple arguments, e.g. /collection/record/ should be entered: --recordPath=collection --recordPath=record, default xml: root element, default json: _ _') group5.add_option('--separator', dest='separator', help='(csv,tsv), default csv: , default tsv: \\t') group5.add_option('--sheets', dest='sheets', diff --git a/google/refine/cli.py b/google/refine/cli.py index 106a3d2..4a48ea7 100644 --- a/google/refine/cli.py +++ b/google/refine/cli.py @@ -25,6 +25,7 @@ import ssl import sys import time import urllib +from xml.etree import ElementTree from google.refine import refine @@ -43,7 +44,6 @@ def apply(project_id, history_file): def create(project_file, project_format=None, - project_name=None, columnWidths=None, encoding=None, guessCellValueTypes=False, @@ -54,6 +54,7 @@ def create(project_file, linesPerRow=None, processQuotes=True, projectName=None, + projectTags=None, recordPath=None, separator=None, sheets=None, @@ -69,15 +70,15 @@ def create(project_file, project_format = os.path.splitext(project_file)[1][1:].lower() if project_format == 'txt': try: - columnWidths + columnWidths[0] project_format = 'fixed-width' - except NameError: + except TypeError: project_format = 'line-based' # defaults for each file type if project_format == 'xml': project_format = 'text/xml' if not recordPath: - recordPath = 'record' + recordPath = [ElementTree.parse(project_file).getroot().tag] elif project_format == 'csv': project_format = 'text/line-based/*sv' elif project_format == 'tsv': @@ -95,22 +96,35 @@ def create(project_file, elif project_format == 'json': project_format = 'text/json' if not recordPath: - recordPath = ('_', '_') + recordPath = ['_', '_'] elif project_format == 'xls': project_format = 'binary/text/xml/xls/xlsx' if not sheets: - sheets = 0 + sheets = [0] + # TODO: new format for sheets option introduced in OpenRefine 2.8 elif project_format == 'xlsx': project_format = 'binary/text/xml/xls/xlsx' if not sheets: - sheets = 0 + sheets = [0] + # TODO: new format for sheets option introduced in OpenRefine 2.8 elif project_format == 'ods': project_format = 'text/xml/ods' if not sheets: - sheets = 0 + sheets = [0] + # TODO: new format for sheets option introduced in OpenRefine 2.8 # execute kwargs = {k: v for k, v in vars().items() if v is not None} - project = refine.Refine(refine.RefineServer()).new_project(**kwargs) + project = refine.Refine(refine.RefineServer()).new_project( + guess_cell_value_types=guessCellValueTypes, + ignore_lines=ignoreLines, + header_lines=headerLines, + skip_data_lines=skipDataLines, + store_blank_rows=storeBlankRows, + process_quotes=processQuotes, + project_name=projectName, + store_blank_cells_as_nulls=storeBlankCellsAsNulls, + include_file_sources=includeFileSources, + **kwargs) rows = project.do_json('get-rows')['total'] if rows > 0: print('{0}: {1}'.format('id', project.project_id)) diff --git a/google/refine/refine.py b/google/refine/refine.py index 33dcb11..b85e533 100644 --- a/google/refine/refine.py +++ b/google/refine/refine.py @@ -147,41 +147,127 @@ class Refine: """Open a Refine project.""" return RefineProject(self.server, project_id) - def new_project(self, project_file=None, project_name=None, - project_format='text/line-based/*sv', **kwargs): - """Create a Refine project.""" + # These aren't used yet but are included for reference + new_project_defaults = { + 'text/line-based/*sv': { + 'encoding': '', + 'separator': ',', + 'ignore_lines': -1, + 'header_lines': 1, + 'skip_data_lines': 0, + 'limit': -1, + 'store_blank_rows': True, + 'guess_cell_value_types': True, + 'process_quotes': True, + 'store_blank_cells_as_nulls': True, + 'include_file_sources': False}, + 'text/line-based': { + 'encoding': '', + 'lines_per_row': 1, + 'ignore_lines': -1, + 'limit': -1, + 'skip_data_lines': -1, + 'store_blank_rows': True, + 'store_blank_cells_as_nulls': True, + 'include_file_sources': False}, + 'text/line-based/fixed-width': { + 'encoding': '', + 'column_widths': [20], + 'ignore_lines': -1, + 'header_lines': 0, + 'skip_data_lines': 0, + 'limit': -1, + 'guess_cell_value_types': False, + 'store_blank_rows': True, + 'store_blank_cells_as_nulls': True, + 'include_file_sources': False}, + 'text/line-based/pc-axis': { + 'encoding': '', + 'limit': -1, + 'skip_data_lines': -1, + 'include_file_sources': False}, + 'text/rdf+n3': {'encoding': ''}, + 'text/xml/ods': { + 'sheets': [], + 'ignore_lines': -1, + 'header_lines': 1, + 'skip_data_lines': 0, + 'limit': -1, + 'store_blank_rows': True, + 'store_blank_cells_as_nulls': True, + 'include_file_sources': False}, + 'binary/xls': { + 'xml_based': False, + 'sheets': [], + 'ignore_lines': -1, + 'header_lines': 1, + 'skip_data_lines': 0, + 'limit': -1, + 'store_blank_rows': True, + 'store_blank_cells_as_nulls': True, + 'include_file_sources': False} + } - defaults = {'guessCellValueTypes': False, - 'headerLines': 1, - 'ignoreLines': -1, - 'includeFileSources': False, - 'limit': -1, - 'linesPerRow': 1, - 'processQuotes': True, - 'separator': ',', - 'skipDataLines': 0, - 'storeBlankCellsAsNulls': True, - 'storeBlankRows': True, - 'storeEmptyStrings': True, - 'trimStrings': False} + def new_project(self, project_file=None, project_url=None, project_name=None, project_format='text/line-based/*sv', + encoding='', + separator=',', + ignore_lines=-1, + header_lines=1, + skip_data_lines=0, + limit=-1, + store_blank_rows=True, + guess_cell_value_types=False, + process_quotes=True, + store_blank_cells_as_nulls=True, + include_file_sources=False, + **opts): - # options - options = {'format': project_format} - if project_file is not None: - options['project-file'] = {'fd': open(project_file), - 'filename': project_file} + if (project_file and project_url) or (not project_file and not project_url): + raise ValueError('One (only) of project_file and project_url must be set') + + def s(opt): + if isinstance(opt, bool): + return 'true' if opt else 'false' + if opt is None: + return '' + return str(opt) + + # the new APIs requires a json in the 'option' POST or GET argument + # POST is broken at the moment, so we send it in the URL + new_style_options = dict(opts, **{ + 'encoding': s(encoding), + }) + params = { + 'options': json.dumps(new_style_options), + } + + # old style options + options = { + 'format': project_format, + 'separator': s(separator), + 'ignore-lines': s(ignore_lines), + 'header-lines': s(header_lines), + 'skip-data-lines': s(skip_data_lines), + 'limit': s(limit), + 'guess-value-type': s(guess_cell_value_types), + 'process-quotes': s(process_quotes), + 'store-blank-rows': s(store_blank_rows), + 'store-blank-cells-as-nulls': s(store_blank_cells_as_nulls), + 'include-file-sources': s(include_file_sources), + } + + if project_url is not None: + options['url'] = project_url + elif project_file is not None: + options['project-file'] = { + 'fd': open(project_file), + 'filename': project_file, + } if project_name is None: # make a name for itself by stripping extension and directories project_name = (project_file or 'New project').rsplit('.', 1)[0] project_name = os.path.basename(project_name) options['project-name'] = project_name - - # params - params_dict = dict(defaults) - params_dict.update(kwargs) - params = {'options': json.dumps(params_dict)} - - # submit response = self.server.urlopen( 'create-project-from-upload', options, params ) diff --git a/tests/cli_create.ipynb b/tests/cli_create.ipynb new file mode 100644 index 0000000..9f7116b --- /dev/null +++ b/tests/cli_create.ipynb @@ -0,0 +1,2383 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test function create in module cli" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Install\n", + "\n", + "This notebook requires a Python 2.7 environment and an OpenRefine server running at http://127.0.0.1:3333." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33mDEPRECATION: Python 2.7 will reach the end of its life on January 1st, 2020. Please upgrade your Python as Python 2.7 won't be maintained after that date. A future version of pip will drop support for Python 2.7. More details about Python 2 support in pip, can be found at https://pip.pypa.io/en/latest/development/release-process/#python-2-support\u001b[0m\n", + "Processing /home/felix/git/openrefine-client\n", + "Requirement already satisfied, skipping upgrade: urllib2_file in /home/felix/.local/lib/python2.7/site-packages (from openrefine-client==0.3.7) (0.2.1)\n", + "Installing collected packages: openrefine-client\n", + " Found existing installation: openrefine-client 0.3.7\n", + " Uninstalling openrefine-client-0.3.7:\n", + " Successfully uninstalled openrefine-client-0.3.7\n", + " Running setup.py install for openrefine-client ... \u001b[?25ldone\n", + "\u001b[?25hSuccessfully installed openrefine-client-0.3.7\n" + ] + } + ], + "source": [ + "import sys\n", + "!{sys.executable} -m pip install .. --user --upgrade" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from google.refine import cli" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CSV" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1618143866116\n", + "rows: 10\n", + " id: 1618143866116\n", + " url: http://127.0.0.1:3333/project?project=1618143866116\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:53Z\n", + " created: 2019-08-20T02:12:53Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "Project 1618143866116 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### encoding\n", + "\n", + "check TV symbol in line 1" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1676755759011\n", + "rows: 10\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: รฐยŸย“ยบ)\t1\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "Project 1676755759011 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', encoding='ISO-8859-1')\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1970849280401\n", + "rows: 10\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "Project 1970849280401 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', encoding='UTF-8')\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### guessCellValueTypes\n", + "\n", + "check OpenRefine GUI at url below: numbers should be green" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2231557582225\n", + "rows: 10\n", + " id: 2231557582225\n", + " url: http://127.0.0.1:3333/project?project=2231557582225\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:53Z\n", + " created: 2019-08-20T02:12:53Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': True, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', guessCellValueTypes=True)\n", + "cli.info(p.project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project 2231557582225 has been successfully deleted\n" + ] + } + ], + "source": [ + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### headerLines\n", + "\n", + "check column names, should be Column 1..." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2294888751269\n", + "rows: 11\n", + "Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\tColumn 7\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "Project 2294888751269 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', headerLines=0)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ignoreLines\n", + "\n", + "check column names, should start with arthur.duff as header" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1990694976789\n", + "rows: 5\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "Project 1990694976789 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', ignoreLines=5)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### limit\n", + "\n", + "should contain 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1834697810094\n", + "rows: 5\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "Project 1834697810094 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', limit=5)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### separator and processQuotes\n", + "\n", + "should contain 10 rows and 2 columns (Column 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1745680810911\n", + "rows: 10\n", + "email,name,state,gender,purchase,count,date\tColumn 2\n", + "\"danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: ๐Ÿ“บ),1,\"\"Wed, 4 Jul 2001\"\t\n", + "melanie.white@example2.edu,Melanie White,NC,F,,1,2001-07-04T12:08:56\t\n", + "danny.baron@example1.com, D.\t\"(\"\"Tab\"\") Baron,CA,M,Winter jacket,1,2001-07-04\"\n", + "ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04\t\n", + "arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07\t\n", + "danny.baron@example1.com,Daniel Baron,,,Bike,1,2001\t\n", + "jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000\t\n", + "melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999\t\n", + "ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998\t\n", + "arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997\t\n", + "Project 1745680810911 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', separator=' ', processQuotes=False)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### projectName" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2022088294800\n", + "rows: 10\n", + " id: 2022088294800\n", + " url: http://127.0.0.1:3333/project?project=2022088294800\n", + " name: foo\n", + " modified: 2019-08-20T02:12:53Z\n", + " created: 2019-08-20T02:12:53Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'foo', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "Project 2022088294800 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', projectName='foo')\n", + "cli.info(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### projectTags (introduced in OpenRefine 2.8)\n", + "\n", + "check manually at http://127.0.0.1:3333 > Open Project if tags where stored" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2228120867351\n", + "rows: 10\n", + " id: 2228120867351\n", + " url: http://127.0.0.1:3333/project?project=2228120867351\n", + " name: duplicates\n", + " tags: [u'client1', u'beta']\n", + " modified: 2019-08-20T02:12:53Z\n", + " created: 2019-08-20T02:12:53Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'projectTags': [u'client1', u'beta'], u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', projectTags=['client1', 'beta'])\n", + "cli.info(p.project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project 2228120867351 has been successfully deleted\n" + ] + } + ], + "source": [ + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### skipDataLines\n", + "\n", + "should contain 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1725478809832\n", + "rows: 5\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "Project 1725478809832 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', skipDataLines=5)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### storeBlankCellsAsNulls\n", + "\n", + "check OpenRefine GUI at url below:\n", + "* All > View > Show/Hide 'null' values in cells\n", + "* row 6 should contain null values in columns state and gender" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2533896794214\n", + "rows: 10\n", + " id: 2533896794214\n", + " url: http://127.0.0.1:3333/project?project=2533896794214\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': True, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', guessCellValueTypes=True)\n", + "cli.info(p.project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project 2533896794214 has been successfully deleted\n" + ] + } + ], + "source": [ + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TSV" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2281824651803\n", + "rows: 10\n", + " id: 2281824651803\n", + " url: http://127.0.0.1:3333/project?project=2281824651803\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\"D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "Project 2281824651803 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.tsv')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## JSON" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2534262116323\n", + "rows: 10\n", + " id: 2534262116323\n", + " url: http://127.0.0.1:3333/project?project=2534262116323\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: _ - name\n", + " column 002: _ - date\n", + " column 003: _ - email\n", + " column 004: _ - state\n", + " column 005: _ - count\n", + " column 006: _ - gender\n", + " column 007: _ - purchase\n", + "_ - name\t_ - date\t_ - email\t_ - state\t_ - count\t_ - gender\t_ - purchase\n", + "Danny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\tCA\t1\tM\tTV (UTF-8: ๐Ÿ“บ)\n", + "Melanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\tNC\t1\tF\t\n", + "\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\tCA\t1\tM\tWinter jacket\n", + "Ben Tyler\t2001/07/04\tben.tyler@example3.org\tNV\t1\tM\tFlashlight\n", + "Arthur Duff\t2001-07\tarthur.duff@example4.com\tOR\t1\tM\tDining table\n", + "Daniel Baron\t2001\tdanny.baron@example1.com\t\t1\t\tBike\n", + "Jean Griffith\t2000\tjean.griffith@example5.org\tWA\t1\tF\tPower drill\n", + "Melanie White\t1999\tmelanie.white@example2.edu\tNC\t1\tF\t'iPad'\n", + "Ben Morisson\t1998\tben.morisson@example6.org\tFL\t1\tM\tAmplifier\n", + "Arthur Duff\t1997\tarthur.duff@example4.com\tOR\t1\tM\tNight table\n", + "Project 2534262116323 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.json')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### trimStrings (broken, does not work in the GUI either)\n", + "\n", + "check row 3 if spaces before `D.` are deleted" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2495073177504\n", + "rows: 10\n", + " id: 2495073177504\n", + " url: http://127.0.0.1:3333/project?project=2495073177504\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': True, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: _ - name\n", + " column 002: _ - date\n", + " column 003: _ - email\n", + " column 004: _ - state\n", + " column 005: _ - count\n", + " column 006: _ - gender\n", + " column 007: _ - purchase\n", + "_ - name\t_ - date\t_ - email\t_ - state\t_ - count\t_ - gender\t_ - purchase\n", + "Danny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\tCA\t1\tM\tTV (UTF-8: ๐Ÿ“บ)\n", + "Melanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\tNC\t1\tF\t\n", + "\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\tCA\t1\tM\tWinter jacket\n", + "Ben Tyler\t2001/07/04\tben.tyler@example3.org\tNV\t1\tM\tFlashlight\n", + "Arthur Duff\t2001-07\tarthur.duff@example4.com\tOR\t1\tM\tDining table\n", + "Daniel Baron\t2001\tdanny.baron@example1.com\t\t1\t\tBike\n", + "Jean Griffith\t2000\tjean.griffith@example5.org\tWA\t1\tF\tPower drill\n", + "Melanie White\t1999\tmelanie.white@example2.edu\tNC\t1\tF\t'iPad'\n", + "Ben Morisson\t1998\tben.morisson@example6.org\tFL\t1\tM\tAmplifier\n", + "Arthur Duff\t1997\tarthur.duff@example4.com\tOR\t1\tM\tNight table\n", + "Project 2495073177504 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.json', trimStrings=True)\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### recordPath" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1671966444040\n", + "rows: 10\n", + " id: 1671966444040\n", + " url: http://127.0.0.1:3333/project?project=1671966444040\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_', u'purchase'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: purchase\n", + "purchase\n", + "TV (UTF-8: ๐Ÿ“บ)\n", + "\n", + "Winter jacket\n", + "Flashlight\n", + "Dining table\n", + "Bike\n", + "Power drill\n", + "'iPad'\n", + "Amplifier\n", + "Night table\n", + "Project 1671966444040 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.json', recordPath=['_', '_', 'purchase'])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### storeEmptyStrings\n", + "\n", + "default: True; set to False for null values\n", + "\n", + "check OpenRefine GUI at url below:\n", + "* All > View > Show/Hide 'null' values in cells\n", + "* row 6 should contain null values in columns state and gender" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2078676878032\n", + "rows: 10\n", + " id: 2078676878032\n", + " url: http://127.0.0.1:3333/project?project=2078676878032\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': False, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: _ - name\n", + " column 002: _ - date\n", + " column 003: _ - email\n", + " column 004: _ - count\n", + " column 005: _ - purchase\n", + " column 006: _ - state\n", + " column 007: _ - gender\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.json', storeEmptyStrings=False)\n", + "cli.info(p.project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project 2078676878032 has been successfully deleted\n" + ] + } + ], + "source": [ + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## XML" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2264312539076\n", + "rows: 80\n", + " id: 2264312539076\n", + " url: http://127.0.0.1:3333/project?project=2264312539076\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 80\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: root\n", + " column 002: root - record\n", + " column 003: root - record - name\n", + " column 004: root - record - date\n", + " column 005: root - record - email\n", + " column 006: root - record - count\n", + " column 007: root - record - purchase\n", + " column 008: root - record - state\n", + " column 009: root - record - gender\n", + "root\troot - record\troot - record - name\troot - record - date\troot - record - email\troot - record - count\troot - record - purchase\troot - record - state\troot - record - gender\n", + "\"\n", + " \"\t\"\n", + " \"\tDanny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\t1\tTV (UTF-8: ๐Ÿ“บ)\tCA\tM\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\tMelanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\t1\t\tNC\tF\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + "\"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\t1\tWinter jacket\tCA\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tBen Tyler\t2001/07/04\tben.tyler@example3.org\t1\tFlashlight\tNV\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tArthur Duff\t2001-07\tarthur.duff@example4.com\t1\tDining table\tOR\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tDaniel Baron\t2001\tdanny.baron@example1.com\t1\tBike\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tJean Griffith\t2000\tjean.griffith@example5.org\t1\tPower drill\tWA\tF\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tMelanie White\t1999\tmelanie.white@example2.edu\t1\t'iPad'\tNC\tF\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tBen Morisson\t1998\tben.morisson@example6.org\t1\tAmplifier\tFL\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tArthur Duff\t1997\tarthur.duff@example4.com\t1\tNight table\tOR\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "Project 2264312539076 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.xml')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### trimStrings (broken, does not work in the GUI either)\n", + "\n", + "check if spaces before `D.` are deleted" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1917953863988\n", + "rows: 80\n", + " id: 1917953863988\n", + " url: http://127.0.0.1:3333/project?project=1917953863988\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 80\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': True, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: root\n", + " column 002: root - record\n", + " column 003: root - record - name\n", + " column 004: root - record - date\n", + " column 005: root - record - email\n", + " column 006: root - record - count\n", + " column 007: root - record - purchase\n", + " column 008: root - record - state\n", + " column 009: root - record - gender\n", + "root\troot - record\troot - record - name\troot - record - date\troot - record - email\troot - record - count\troot - record - purchase\troot - record - state\troot - record - gender\n", + "\"\n", + " \"\t\"\n", + " \"\tDanny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\t1\tTV (UTF-8: ๐Ÿ“บ)\tCA\tM\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + " \"\t\"\n", + " \"\tMelanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\t1\t\tNC\tF\n", + "\"\n", + " \"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\"\n", + "\"\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\t1\tWinter jacket\tCA\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tBen Tyler\t2001/07/04\tben.tyler@example3.org\t1\tFlashlight\tNV\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tArthur Duff\t2001-07\tarthur.duff@example4.com\t1\tDining table\tOR\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tDaniel Baron\t2001\tdanny.baron@example1.com\t1\tBike\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tJean Griffith\t2000\tjean.griffith@example5.org\t1\tPower drill\tWA\tF\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tMelanie White\t1999\tmelanie.white@example2.edu\t1\t'iPad'\tNC\tF\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tBen Morisson\t1998\tben.morisson@example6.org\t1\tAmplifier\tFL\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\tArthur Duff\t1997\tarthur.duff@example4.com\t1\tNight table\tOR\tM\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "\t\"\n", + " \"\t\t\t\t\t\t\t\n", + "Project 1917953863988 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.xml', trimStrings=True)\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### recordPath" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2293178566671\n", + "rows: 10\n", + " id: 2293178566671\n", + " url: http://127.0.0.1:3333/project?project=2293178566671\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root', u'record', u'purchase'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: purchase\n", + "purchase\n", + "TV (UTF-8: ๐Ÿ“บ)\n", + "\n", + "Winter jacket\n", + "Flashlight\n", + "Dining table\n", + "Bike\n", + "Power drill\n", + "'iPad'\n", + "Amplifier\n", + "Night table\n", + "Project 2293178566671 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.xml', recordPath=['root', 'record', 'purchase'])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### storeEmptyStrings\n", + "\n", + "default: True; set to False for null values\n", + "\n", + "check OpenRefine GUI at url below:\n", + "* All > View > Show/Hide 'null' values in cells\n", + "* row 6 should contain null values in columns state and gender" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2438123269695\n", + "rows: 10\n", + " id: 2438123269695\n", + " url: http://127.0.0.1:3333/project?project=2438123269695\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:54Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': False, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.csv', storeEmptyStrings=False)\n", + "cli.info(p.project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Project 2438123269695 has been successfully deleted\n" + ] + } + ], + "source": [ + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TXT" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default (line-based)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1913292396645\n", + "rows: 11\n", + " id: 1913292396645\n", + " url: http://127.0.0.1:3333/project?project=1913292396645\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:55Z\n", + " created: 2019-08-20T02:12:54Z\n", + " rowCount: 11\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'ignoreLines': -1, u'projectName': u'duplicates', u'processQuotes': True, u'skipDataLines': -1, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n", + " column 001: Column 1\n", + "Column 1\n", + "email name state gender purchase count date \n", + "danny.baron@example1.com Danny Baron CA M TV (UTF-8: ๐Ÿ“บ) 1 Wed, 4 Jul 2001 \n", + "melanie.white@example2.edu Melanie White NC F 1 2001-07-04T12:08:5\n", + "\"danny.baron@example1.com D.\t(\"\"Tab\"\") Baron CA M Winter jacket 1 2001-07-04 \"\n", + "ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 \n", + "arthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 \n", + "danny.baron@example1.com Daniel Baron Bike 1 2001 \n", + "jean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 \n", + "melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 \n", + "ben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 \n", + "arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 \n", + "Project 1913292396645 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.txt')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### linesPerRow\n", + "\n", + "should return 6 rows in 2 columns" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1958513543951\n", + "rows: 6\n", + " id: 1958513543951\n", + " url: http://127.0.0.1:3333/project?project=1958513543951\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:55Z\n", + " created: 2019-08-20T02:12:55Z\n", + " rowCount: 6\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'ignoreLines': -1, u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'skipDataLines': -1, u'separator': u',', u'trimStrings': False, u'linesPerRow': 2, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n", + " column 001: Column 1\n", + " column 002: Column 2\n", + "Column 1\tColumn 2\n", + "email name state gender purchase count date \tdanny.baron@example1.com Danny Baron CA M TV (UTF-8: ๐Ÿ“บ) 1 Wed, 4 Jul 2001 \n", + "melanie.white@example2.edu Melanie White NC F 1 2001-07-04T12:08:5\t\"danny.baron@example1.com D.\t(\"\"Tab\"\") Baron CA M Winter jacket 1 2001-07-04 \"\n", + "ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 \tarthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 \n", + "danny.baron@example1.com Daniel Baron Bike 1 2001 \tjean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 \n", + "melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 \tben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 \n", + "arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 \t\n", + "Project 1958513543951 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.txt', linesPerRow=2)\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### fixed-width: columnWidths and headerLines" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1703842312470\n", + "rows: 10\n", + " id: 1703842312470\n", + " url: http://127.0.0.1:3333/project?project=1703842312470\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:55Z\n", + " created: 2019-08-20T02:12:55Z\n", + " rowCount: 10\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'separator': u',', u'trimStrings': False, u'columnWidths': [27, 21, 6, 7, 15, 6, 1000], u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 1}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com \tDanny Baron \tCA \tM \tTV (UTF-8: ๐Ÿ“บ) \t1 \tWed, 4 Jul 2001 \n", + "melanie.white@example2.edu \tMelanie White \tNC \tF \t \t1 \t2001-07-04T12:08:5\n", + "danny.baron@example1.com \t\" D.\t(\"\"Tab\"\") Baron \"\tCA \tM \tWinter jacket \t1 \t2001-07-04 \n", + "ben.tyler@example3.org \tBen Tyler \tNV \tM \tFlashlight \t1 \t2001/07/04 \n", + "arthur.duff@example4.com \tArthur Duff \tOR \tM \tDining table \t1 \t2001-07 \n", + "danny.baron@example1.com \tDaniel Baron \t \t \tBike \t1 \t2001 \n", + "jean.griffith@example5.org \tJean Griffith \tWA \tF \tPower drill \t1 \t2000 \n", + "melanie.white@example2.edu \tMelanie White \tNC \tF \t'iPad' \t1 \t1999 \n", + "ben.morisson@example6.org \tBen Morisson \tFL \tM \tAmplifier \t1 \t1998 \n", + "arthur.duff@example4.com \tArthur Duff \tOR \tM \tNight table \t1 \t1997 \n", + "Project 1703842312470 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.txt', columnWidths=[27, 21, 6, 7, 15, 6, 1000], headerLines=1)\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ZIP" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default\n", + "\n", + "should contain 16 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2381217278039\n", + "rows: 16\n", + " id: 2381217278039\n", + " url: http://127.0.0.1:3333/project?project=2381217278039\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:55Z\n", + " created: 2019-08-20T02:12:55Z\n", + " rowCount: 16\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}, {u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ), Winter jacket, bike\t3\tWed, 4 Jul 2001, 2001-07-04, 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t, 'iPad'\t2\t2001-07-04T12:08:56, 1999\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2\t2001-07, 1997\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "Project 2381217278039 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.zip')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### includeFileSources\n", + "\n", + "should contain column File" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2314884555837\n", + "rows: 16\n", + " id: 2314884555837\n", + " url: http://127.0.0.1:3333/project?project=2314884555837\n", + " name: duplicates\n", + " modified: 2019-08-20T02:12:55Z\n", + " created: 2019-08-20T02:12:55Z\n", + " rowCount: 16\n", + "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': True}, {u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': True}]\n", + " column 001: File\n", + " column 002: email\n", + " column 003: name\n", + " column 004: state\n", + " column 005: gender\n", + " column 006: purchase\n", + " column 007: count\n", + " column 008: date\n", + "File\temail\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "duplicates.csv\tdanny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1\tWed, 4 Jul 2001\n", + "duplicates.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", + "duplicates.csv\tdanny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", + "duplicates.csv\tben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "duplicates.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", + "duplicates.csv\tdanny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", + "duplicates.csv\tjean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "duplicates.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", + "duplicates.csv\tben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "duplicates.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", + "duplicates2.csv\tdanny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ), Winter jacket, bike\t3\tWed, 4 Jul 2001, 2001-07-04, 2001\n", + "duplicates2.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t, 'iPad'\t2\t2001-07-04T12:08:56, 1999\n", + "duplicates2.csv\tben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", + "duplicates2.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2\t2001-07, 1997\n", + "duplicates2.csv\tjean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", + "duplicates2.csv\tben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", + "Project 2314884555837 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.zip', includeFileSources=True)\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ODS (broken in OpenRefine >=2.8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default\n", + "\n", + "many blank columns and rows in OpenRefine <=2.7 (also with manual import via GUI)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1620818141127\n", + "rows: 11\n", + " id: 1620818141127\n", + " url: http://127.0.0.1:3333/project?project=1620818141127\n", + " name: duplicates\n", + " modified: 2019-08-20T02:13:41Z\n", + " created: 2019-08-20T02:13:41Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + " column 008: Column\n", + " column 009: Column 9\n", + " column 010: Column 10\n", + " column 011: Column 11\n", + " column 012: Column 12\n", + " column 013: Column 13\n", + " column 014: Column 14\n", + " column 015: Column 15\n", + " column 016: Column 16\n", + " column 017: Column 17\n", + " column 018: Column 18\n", + " column 019: Column 19\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\tColumn\tColumn 9\tColumn 10\tColumn 11\tColumn 12\tColumn 13\tColumn 14\tColumn 15\tColumn 16\tColumn 17\tColumn 18\tColumn 19\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\t\t\t\t\t\t\t\t\t\t\t\t\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\t\t\t\t\t\t\t\t\t\t\t\t\n", + "danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\t\t\t\t\t\t\t\t\t\t\t\t\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "Project 1620818141127 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.ods')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### sheets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "first sheet from file with 2 sheets" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1985853059017\n", + "rows: 11\n", + " id: 1985853059017\n", + " url: http://127.0.0.1:3333/project?project=1985853059017\n", + " name: duplicates2\n", + " modified: 2019-08-20T02:13:47Z\n", + " created: 2019-08-20T02:13:47Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + " column 008: Column\n", + " column 009: Column 9\n", + " column 010: Column 10\n", + " column 011: Column 11\n", + " column 012: Column 12\n", + " column 013: Column 13\n", + " column 014: Column 14\n", + " column 015: Column 15\n", + " column 016: Column 16\n", + " column 017: Column 17\n", + " column 018: Column 18\n", + " column 019: Column 19\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\tColumn\tColumn 9\tColumn 10\tColumn 11\tColumn 12\tColumn 13\tColumn 14\tColumn 15\tColumn 16\tColumn 17\tColumn 18\tColumn 19\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\t\t\t\t\t\t\t\t\t\t\t\t\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\t\t\t\t\t\t\t\t\t\t\t\t\n", + "danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\t\t\t\t\t\t\t\t\t\t\t\t\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\t\t\t\t\t\t\t\t\t\t\t\t\n", + "Project 1985853059017 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates2.ods', sheets=[0])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "both sheets from file with 2 sheets: should contain 16 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2325827930833\n", + "rows: 18\n", + " id: 2325827930833\n", + " url: http://127.0.0.1:3333/project?project=2325827930833\n", + " name: duplicates2\n", + " modified: 2019-08-20T02:13:49Z\n", + " created: 2019-08-20T02:13:49Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + " column 008: Column\n", + " column 009: Column 9\n", + " column 010: Column 10\n", + " column 011: Column 11\n", + " column 012: Column 12\n", + " column 013: Column 13\n", + " column 014: Column 14\n", + " column 015: Column 15\n", + " column 016: Column 16\n", + " column 017: Column 17\n", + " column 018: Column 18\n", + " column 019: Column 19\n", + " column 020: Column 20\n", + " column 021: Column 21\n", + " column 022: Column 22\n", + " column 023: Column 23\n", + " column 024: Column 24\n", + " column 025: Column 25\n", + " column 026: Column 26\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\tColumn\tColumn 9\tColumn 10\tColumn 11\tColumn 12\tColumn 13\tColumn 14\tColumn 15\tColumn 16\tColumn 17\tColumn 18\tColumn 19\tColumn 20\tColumn 21\tColumn 22\tColumn 23\tColumn 24\tColumn 25\tColumn 26\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ), Winter jacket, bike\t3.0\tWed, 4 Jul 2001, 2001-07-04, 2001\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t, 'iPad'\t2.0\t2001-07-04T12:08:56, 1999\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2.0\t2001-07, 1997\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n", + "Project 2325827930833 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates2.ods', sheets=[0, 1])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## XLS (broken in OpenRefine >=2.8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1607123650693\n", + "rows: 10\n", + " id: 1607123650693\n", + " url: http://127.0.0.1:3333/project?project=1607123650693\n", + " name: duplicates\n", + " modified: 2019-08-20T02:13:52Z\n", + " created: 2019-08-20T02:13:52Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D. (\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n", + "Project 1607123650693 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.xls')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### sheets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "first sheet from file with 2 sheets" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2439816728218\n", + "rows: 10\n", + " id: 2439816728218\n", + " url: http://127.0.0.1:3333/project?project=2439816728218\n", + " name: duplicates2\n", + " modified: 2019-08-20T02:13:58Z\n", + " created: 2019-08-20T02:13:58Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n", + "Project 2439816728218 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates2.xls', sheets=[0])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "both sheets from file with 2 sheets: should contain 16 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1954256360738\n", + "rows: 16\n", + " id: 1954256360738\n", + " url: http://127.0.0.1:3333/project?project=1954256360738\n", + " name: duplicates2\n", + " modified: 2019-08-20T02:13:59Z\n", + " created: 2019-08-20T02:13:59Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ), Winter jacket, bike\t3.0\tWed, 4 Jul 2001, 2001-07-04, 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t, 'iPad'\t2.0\t2001-07-04T12:08:56, 1999\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2.0\t2001-07, 1997\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "Project 1954256360738 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates2.xls', sheets=[0, 1])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## XLSX (broken in OpenRefine >=2.8)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### default" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 2423289296267\n", + "rows: 10\n", + " id: 2423289296267\n", + " url: http://127.0.0.1:3333/project?project=2423289296267\n", + " name: duplicates\n", + " modified: 2019-08-20T02:14:01Z\n", + " created: 2019-08-20T02:14:01Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D. (\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n", + "Project 2423289296267 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates.xlsx')\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### sheets" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "first sheet from file with 2 sheets" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1593486586431\n", + "rows: 10\n", + " id: 1593486586431\n", + " url: http://127.0.0.1:3333/project?project=1593486586431\n", + " name: duplicates2\n", + " modified: 2019-08-20T02:14:04Z\n", + " created: 2019-08-20T02:14:04Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n", + "Project 1593486586431 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates2.xlsx', sheets=[0])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "both sheets from file with 2 sheets: should contain 16 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: 1857964669991\n", + "rows: 16\n", + " id: 1857964669991\n", + " url: http://127.0.0.1:3333/project?project=1857964669991\n", + " name: duplicates2\n", + " modified: 2019-08-20T02:14:09Z\n", + " created: 2019-08-20T02:14:09Z\n", + " column 001: email\n", + " column 002: name\n", + " column 003: state\n", + " column 004: gender\n", + " column 005: purchase\n", + " column 006: count\n", + " column 007: date\n", + "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ)\t1.0\tWed, 4 Jul 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1.0\t2001-07-04T12:08:56\n", + "danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n", + "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n", + "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ๐Ÿ“บ), Winter jacket, bike\t3.0\tWed, 4 Jul 2001, 2001-07-04, 2001\n", + "melanie.white@example2.edu\tMelanie White\tNC\tF\t, 'iPad'\t2.0\t2001-07-04T12:08:56, 1999\n", + "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n", + "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2.0\t2001-07, 1997\n", + "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n", + "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n", + "Project 1857964669991 has been successfully deleted\n" + ] + } + ], + "source": [ + "p = cli.create('data/cli/duplicates2.xlsx', sheets=[0, 1])\n", + "cli.info(p.project_id)\n", + "cli.export(p.project_id)\n", + "cli.delete(p.project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/data/duplicates-deletion.json b/tests/data/cli/duplicates-deletion.json similarity index 100% rename from tests/data/duplicates-deletion.json rename to tests/data/cli/duplicates-deletion.json diff --git a/tests/data/cli/duplicates.csv b/tests/data/cli/duplicates.csv new file mode 100644 index 0000000..7a79dfe --- /dev/null +++ b/tests/data/cli/duplicates.csv @@ -0,0 +1,11 @@ +email,name,state,gender,purchase,count,date +danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: ๐Ÿ“บ),1,"Wed, 4 Jul 2001" +melanie.white@example2.edu,Melanie White,NC,F,,1,2001-07-04T12:08:56 +danny.baron@example1.com, D. ("Tab") Baron,CA,M,Winter jacket,1,2001-07-04 +ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04 +arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07 +danny.baron@example1.com,Daniel Baron,,,Bike,1,2001 +jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000 +melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999 +ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998 +arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997 diff --git a/tests/data/cli/duplicates.json b/tests/data/cli/duplicates.json new file mode 100644 index 0000000..d94ebea --- /dev/null +++ b/tests/data/cli/duplicates.json @@ -0,0 +1,92 @@ +[ + { + "email": "danny.baron@example1.com", + "name": "Danny Baron", + "state": "CA", + "gender": "M", + "purchase": "TV (UTF-8: ๐Ÿ“บ)", + "count": 1, + "date": "Wed, 4 Jul 2001" + }, + { + "email": "melanie.white@example2.edu", + "name": "Melanie White", + "state": "NC", + "gender": "F", + "purchase": "", + "count": 1, + "date": "2001-07-04T12:08:56" + }, + { + "email": "danny.baron@example1.com", + "name": " D.\t(\"Tab\") Baron", + "state": "CA", + "gender": "M", + "purchase": "Winter jacket", + "count": 1, + "date": "2001-07-04" + }, + { + "email": "ben.tyler@example3.org", + "name": "Ben Tyler", + "state": "NV", + "gender": "M", + "purchase": "Flashlight", + "count": 1, + "date": "2001/07/04" + }, + { + "email": "arthur.duff@example4.com", + "name": "Arthur Duff", + "state": "OR", + "gender": "M", + "purchase": "Dining table", + "count": 1, + "date": "2001-07" + }, + { + "email": "danny.baron@example1.com", + "name": "Daniel Baron", + "state": "", + "gender": "", + "purchase": "Bike", + "count": 1, + "date": 2001 + }, + { + "email": "jean.griffith@example5.org", + "name": "Jean Griffith", + "state": "WA", + "gender": "F", + "purchase": "Power drill", + "count": 1, + "date": 2000 + }, + { + "email": "melanie.white@example2.edu", + "name": "Melanie White", + "state": "NC", + "gender": "F", + "purchase": "'iPad'", + "count": 1, + "date": 1999 + }, + { + "email": "ben.morisson@example6.org", + "name": "Ben Morisson", + "state": "FL", + "gender": "M", + "purchase": "Amplifier", + "count": 1, + "date": 1998 + }, + { + "email": "arthur.duff@example4.com", + "name": "Arthur Duff", + "state": "OR", + "gender": "M", + "purchase": "Night table", + "count": 1, + "date": 1997 + } +] diff --git a/tests/data/cli/duplicates.ods b/tests/data/cli/duplicates.ods new file mode 100644 index 0000000000000000000000000000000000000000..375a0afab8c9129622f58933438db65f5c264c6b GIT binary patch literal 13949 zcmdtJWpo_L(k3jHEVP&8e40V5LsyvIA|>BkDtJai3-YpoaaA|cPP-0 z_vXyS?T?d|tfZn44h{|>At5CtB?AKk2M33MfPkc=q@tpthK7cLfq{jEg@c2Gr>AE? zKtNbnSX^9OT3T9eZfl%F4>l&d$ln$<58p+uPgkzTV%b zfosNrfY9Q_1o;$Qm(MgIJ+MWpIn#Qj$Jt)qH=fMV;w627sD9;nxV-&90k_T@Xg+@Oh?|8$^zyl}Y|^ z?69uy-PIlwD5L-`9ec|q_Q>lZVT+<*f7OPqBr}G;aFr-TbHC5w?yB}T;a-|eAs@>7 zE zmF3N^(%Uz(quZPH^Xt)N_`9&`6=h)Z?Ou6NnOk-0E2KufN|>r&c#!9QL`G2sHc92A;Y=8(L-?}*@UiCo4nUP2$}F@4}q zFqWI0Ye#g2;qSvsg9$f`YHh#!T=k<1Z7DY2< z@%g8ghp~)<2(La0*B&XZNDpb&FU#@?Q)uf@l&ZD6ICV&eHoA+HtakGk?!u@VLP(wE zUz690Of>bXPn#i8!VxxdhLmN9;gg*0>q}B~-1#AC%RnpV&DgqNAnz~Hod;oWJ-J=o zSZ)O>jcO~;vFAQlvIpvqlX;dWvM>LtexcOJr}Yozpp7oRhE_)VvimSvQAuL#QQjfx zR*fNlmIFjZ6Gx}WjHtgW#t02LnqKJ*kmT4?C|Z4Bb^G2T-_#ZqxP;JKdP~Ej8?k|Y zg{_g7EA;7`kYg(M;@l`n!Q3^awJ!140F3I`9+E(WS%JJsIthsKP=$*M=)K`3;X_J^ z=v*P2r>R?kA5#0~Ict$jdAE7Eu9cXPqXU`!pXob`7dUJqef?%DgCW-riDth zA3DUt=q(I6rH5T>3DX*wM?r2QBPI~!jW35@Z!A`H2=8o|PY6l_?}DluNpWo&hCN;m zbY&nZTxyCc)6TC$*N*rbHOVc`mEb$cN3{<(lt#bmVk*0=;XLc+>(MI-ER(U<%DKX& z=T6l%)4Y`+M$m?qha>~$IV4?0KU+*iu&Sr5N1zyVm10!n2Snb2HAq4Vacy`k`5-X7 zF@LM?;D#YV5y(GG9o@m*TPk6Wowt6tbY=J~HB0OqFp?_Y52Grg4>Vo^pSL(#P8RQ^ z4t|>9L^o}dfJUM)8OWTQgN!AdV$`Pr&6knC<}n=!p|B{XKGQs!5Cs1OVAa%H%@FY?Rz0R~`T&Js z$6^)LrKPQ~fGQVA#>)l%z(eW{=}MLx7(wdR zRt}RhXf*hm9R&w;&@ELtlJd_~Qe~LVSe9ImN7)tF&DS(y?`@Qa9U0!816mQpR#?H9bV-*-waiGNS;wKR5|zWC^_;+5Prf0=^iO!S6;tF97H7phok28oZ9%Wr#14j zoy4VZfFg7&Y_m~Or|iW(2*?fak+t<4(Q6)#x_0`|w8pMBffb`meReGrNE1se+WcAQ z67L{(sk-OC2W}-_<*MxoG%{`n`{F;E0(aY{fAWQ&EVpYs+BtTzOg!7_MX={p)d6O` zHNeRXAoPCAoa}Om3^@|5tD=dXb7!i21sdv#BsVEvEqin<8!IFUBhHJvbIn??Z!Of* zQM>~`b^Ki&9ltxZOv@(lfL=vtUYnSJ%Os5LaL*-yS2F%!e7HE?u!)f3WRs%x2l3*U z4MCpc?iDONf{DO58&%|;+ZaPK^{l;ery|`PApt*6wNXYrx&XdZrwVmTvc%UN$NvSPm(BR%z@+drfrwNwr2OthkPI;ZAcwWbwJw zm^vtW4NcM#gxCe(Npn~B?v(C(6 zfYI?SnAa_z!>8F=efLI1ZNGA!ZiZic9h)7jZ6+qm-7X6fln1{ibhHw_Bq@c!ZINb~ zgC@m;NvG+9+|fQ|7%b{FQ9NmcBeA;870>`vbwq(bcUuN~^v*m^Uj4zdaUj9ou81Hu zc=JnDq>64GnF?Y`uqD@CtUN!F#J@5q=;Zx=Pv zw8zlQp2*5DZ-8NVx;5}E=V-LMQ?DL42QxG*;`9OfmtU!dGL#2f3yzMr7l3%G1k!c2 zm`)c>&5sGu6tpnvA_W6*?b_f>isJ8T=Du0F3XKV`FxQY5F?(LFU4K$xqHdqg)-@x` zac%e|Zt>KV_@uwdA=7|$GG)9U)hw6tXfdm)w$@sw_X-U2>(F^6lVi4Yxc~OqZ1dFs ze*s@jYJrzO?Rp0K2dxhr5k^|3_!`PF)%u2YUMgT?Lia9O?sAYIFeV##W?6%NhS7st z7b+-%JbMhs9gh&<(rLVSYrnPo9{lxq02IN&_Xa(2k_c~Od+NKLIk@sh4?7~l0H!l_wCXV1M43{G5ph`9 zu);*bOteR$VaiebI+e?y24Y#0wkOKv=koT#T;zC-u1%*wXr)#1jPW>%Ojmv<+@;SU zRm~~VtPronRF&`Yo;gh3Ke_;^VX~_0ee!*LJ?10IjhewnL@cljs(bdx!P&7I*tfBJ zh1(_zVF>(r>W37zFxYB(WW(l%qx6I`;Ow_?s0k`sK`7VuC!FBcGZIs@IKLoDA#Upt zQYf<5GAmQ{gfNbjCsS{Pe?0j*)#gC~1)74F_dM^bj%285*ZB* z6!TkUVP(JE4+VRnVVfh>);xp4oaz~lg&@jlJ+8p%ua|L-QxUZ#FmT#Pt#})$2#vVy zc1M}2OK>FE7RCL^L5bxRtuap9uS}&hy5G1~t&va5x+qqZzmGyaUhyrpLY#@AZ>{|F zvAR!-RxcxleXzlJ+^Ut9Nf_dZYnb5;ew8T1C_GiB)PG$s{+E|1rz4(yf1!*lNSzawps) z88sj(h5Gs>(G{J8JhFCuhgjfbT87?L#Qb8R^SL26kQUS2^=rc6fOxCIlWDn*3CtI( zz{W_3%59l|z`C5^ujd}H+R`eFD&i6jY{r|aSal8=d2{Zp;bbQm5K>P}SLb8t^sYx| zfhJ64x(OC&=O4XF**!)*|1{agC85l!$tQYP)l|fgbhx?E_4Db+aCp@zmytVouE|2b zym8lq%^$nA8BaPmdwouej9LIic3yYOB-XL{#i_Lbv$I0IFyfVudv}vTi5RKVqjC0{ zLeBAq>T?;bJ9G8p4&CiU?{g_TR1VI_oViPWg8M0`Hu_fR-CN2)*~Nk_lkrr`nMnT6 zsOv>HX*hq6#A%z1A33$&39jg0kY;Y16Y@riC-`r(H5m(u+dTp)C-q6(RM|KyMJSI zV{!>`si;dLCBe0)dWOc&?P|tKqG2o|DsdT#Cb!!9dR>r?LQHP6$e2;r;O)2mUVZ~s zJ?o>xM@E9=m|iGG=aeMr{=b5$l17i zgGL*>5O*&?aCPo$EQ=ax&q_O@ariMmzF}FnZe;0=o73aLq*85XKVJ72b>*UG8vz z;cKiz@qhsWB2*7(WV>#H145su@u*jiF`JM(nW2!I3FM3U&Hjo7?^N!&Y%Vy$hDD2F zwx-2_Ck)Xl*)<3cN_3lVKY)DZddfEXZ=G)U1}4#1RIKo+`7QdxDOeJyP+p)dk`m6* z3N99xA5Fr19P?Ux-{{t|nmn=ySOCZu(-{r&hXDh>e*VJW=ip-0N~~#iv*>yDsBDLP zunv}~V0s(_zQfXf*-5R)x4_alX_=lMcqlus$Vf=z?gSX@b@^zW94XEauWOsp_o^g* zq{1^eW-*>(xpl0=7$>T2k#fALi|J~MYCFj!dsyJs0xIqqY+mu>#JP^(5)KWm@T-;Z z^CPDlu7N5ju<0YfHkLJwy64IOcDSO)1cnBqrl0{ESFcOEu;ZzAFWS|r?cG3j-zOR6_SU6hzR|P z4Bzb%BO{)xagh>rVMo((GV@Uc&lBN?e~haG7i%v`Q4KLOIF zsOG>~W9fD`6%yec+G=C3D@~%gR_|RNuQjt>xs0C4F-Cfcvo3}2xOl$C7%??Wj23`{6MZ=0t5HR_KKa`g6D$m}evn+nvnLu@xeRg7T-um@ z>Qp*jPFcN;zsJZ#$@V!RHvgL&L_2mqzSyphDW2zw;|sq39Vhj5R{~Zh`1s03R@J;W zaM{6tqK=p0nfZ298!UPSwCb&iA7$6Jm2W8yzE20IJQgJS{Ye~kCcxxf`!Z2qiE=4r zo3cht{6)J<48n{^5Nb1xKyW*i2|r@HOPxFwC&$68=CgPRS>8R~byJN4GoX)8`i`VT z<1t>GqmGE*<3<5bOB9<8#(GbFYlVzHS)T>eg#^@ROuaVATMEPnT0LhWtz&AiiZvwd?5}$sNwZ zr2msyiJzw2}?{Q@z83t5_OYiT+xUzL-Pzsz$w&pr`1&5)V zf08U0HjIe`j_UIGBqtVDPq=*y(<;^fQfQhl32QJ z1FguHeO2smB4_zSF2`2C`&lID13iPG(IkVO|qn&bM~aYhr) z2E{XG>)j&SK~QA4%G-NP!`s$zBhPiv@zbm`V29LQ3SmT-X*%ev} z>X;-Mj?+FTrh)ObO?2QyYdAYj{5Ta0shQ8c8e^6CJ3J+YJfg|AmX6F4OSR*=_}+18 z%cxjJ)VQlmpQ?Ht^@1V|;If{Tr=6r$R0FZLLWatXPEG8fnL9O5`EZ*Y=DAL~08-uF z5AKdA^NCtu>@uyXYSQT#oa(~_4727PQmxpOSK|PYp6oTIU_xFR8NFGl?F_J!VB)KH zs;l#EOu)6pwiXBSMW#tn`9uz85NzP;OjQ}+khKdYEw8MXJeU?%c zgc>8u%Pm7bPil-zk;x`wl!`3+)Q>F_s`dzVi9(9=yLVp%L%1elJWkElzClyl1@NMeFZ+b8ysibaMFb zdVklOv89ckqtSos?eDaHkL|zrh5t7rGqch&F>;{gH*>Vovvv3{1H}0oWm|h26MG|v z|BoU5ooQ@sY@KX>TlN34M?C`rBTJ)?`Ly}Bd7z-6{zct<+}{5dn2(l$jkV)%UdGkR z@=Rk*euo*Q{X)TY8q?UNp#DYx3ZvT@rY2(vWNj(GrA^6lX`Vo%f++41$IG`}B&#H9 zl5oTsCl1U6m{zY*tkTh&TkK$lEw*PWO-(SuVia8Os&S^K85eYb=0+4JoWn=6o#BDQ zCenvhzgA zht8P#yX_S7A+4#;eK!CmpwmQ69)neKLfbxJH^tFf=n6G+7U;zuP$Ox@pa2E96~VTa zSbsNc)c1Ngb2uF39-r8^>&q=@k3a2JY)U7J2rAAmeGaceDv_TwoY=f?S<}+sQOPT` z=qpSY9i`kpSGPl#Ye=_4qfTRtK3V#rw{3xXO&l^S7>o+-rQwp)-K}BvJaJPaV;xM$ zh)&`HlSYJw)OVAzWsayE>1gVZPDe8ma+PM&Ua9_yq(Mg5?0zEK7HHbZWm8PniMd!A z0TeQ3;a|u@b6S3~v(-8Nz2w)CdZ6}#@;72)y{3aS9)#hS-5SRGTjrmnpc6==A;#km zJ-0gu=j|RWSa|%duZv~b8)zzcIM!GbfJ6dsVc%S3K$Wa~ZR~5147Etp8xK|WCFb){ z_Ye07)}fq)e0I#cu&R)q=`5(CXg?vG6Q^SWTkUYcH^4 zSpBeSV6p_Yu<3EbuCFf;_-MGUSXiyQ&zlNZ=GIrj-IsM4Pu^>2Ot&+wnl3+rlxrL_nB$~3F=Ft21;OrawgIJnJ zD4gcT4=*QE>`akbL3B4P|LRK&dnK>McUNBhIDLF7rzHA* zZ8eHrT;%<=QA!!s2SS*6AqAcCLb8ZP4r$yWTyZ&{3UV;yXmW$Q zozx8Zg~qC6!7siQlV@j!HInXAO=m2b&=jfkNFa-A5>=7GDlUjune(x1geL|Y8_7vy zwspHpB~iDnEgHWgTEgRX?Vl4a;&A(qwTV=+HO~|*Q<&fD7V79r5@)6AR4Us zw?Qfk9`WPG&1I#eJpvUdA!X}FB)AB!`=7em#>}riBT3xeiA@ujF zFp#N|jkt#LW)7RVl|7RkrF*1t`nO*P7d|>0D@`((C3c|W9wS|*OGpwI#dJ3?_uk)} zBOb_^W{IB@qZzwV`oJ`1qD3_)Yi;ystysL7V0Xg&r%DdO;*{6gEaq0=6M}XH2K8H! z2Ybp3%$x_?4jjp}Q`cga*4j?*GS1r>R#%WxxkEHdI<`7X+E?vNfQVm<@5~PX@k2cl zj8i_X-b3{WCub7(iKCC9tCX#JxX)#5t;Dk#n~U<&u6(PfxW=W3w=j_r65!cuqt8#6 zao4y)1bN4q?xmPdvVAu9H8JOd>sZPJG(GVe+Q+}ZXOn;;?eOQ4K40*7q}uPlL-AG&EC#OqVdsQ=2|nwHJgUE;XKt4lqNFO-fpHe##{ne z&Tg{wjjZMxpA?1%j2i1sMkuWC*@q7Q9YIzC5~@gD=Nz6!xy%4SEGX&^7i!b2`pq;$ z)453LeAMHzf&#l~xwYEOqJvo+-T^dsu^C-PauMRY0q8b2pPv{HURifHql;YQr6t|j zSaMwxtJyUavi=6LbnL9@CTx+X$(JqJ!uQ`|>Gv%ew-pX`SqSqwXV|7aD8Dx|b#{(l zMOR))J(Fj52n?CMgauA~4qL%Bs8qLQcLYj{X%!bs86W(C68~8|G+fc`sZx_B3`@jv zw<)k6dnA*0$e^0phFY}JFC?FR6pH_O%Sd5rsUHD&@j^P&fB7?=8|$W5M3>ky87u={rJ4miyVDLC`J=#C4lLolWTnFbwZjVW?Y8k{XFfQ z=bh&uD0o~V6Ksuz1Du>Cprd z5(f+fME0?ek6&hu>4Vrha0x!u8E)L4)=?5urlQ-28^bgy2{nvjt;W#xb! zv_p9&Zq*`PJJkYyP^iaOS8YR=Nx*ng_Crfp3ZRwuN0oIpI}T1+msR$H6gIo|_Cs&% z&xR(WRuT#L*OoQr6TP<5E$|z*TU|J}3|X`5an`Rrd0LmQFK_+401l)ic=_>WVXA&jF+Lc;_ADv8}WF^ zsjz8WBH43Q{0+`&2`uD}*VE*l<4}=jy>0CPRi;0ZxC*WWM7ny)8qHKIlU_NE0z6b- zW#2v0K@WWb%iZO3`;MS)MOQ$;CD7DQ<8)%LJmB#4l-EDMRtb2S4Rj?wp3|~ql^}Vyi zK)2_l8h8sMe7($Os_|NU5ba}hq}4ET#QQ9ahnNCOh!i7Cf-{rJs-Fs!>gH(fO7up> z?Zfh-4XJLJj)eCPo)m8bw|6`=?$zv-{M>4AL^LG19MsCw5d)MM_<`&BaV9<8ltx{h zYUDU22+{X7KxPW<*B^c5MdgzdDNOkf*ScTujHuq50x7YiaPG!c|$`>E?Q!og0ahwgd8KlI?Kz&Ui#<=>8iA70B`bxi|U0#*j~klcd@9Z zWPMv>%r8h@WDnS=a?fMGC`ud-H8(j9 z+T*+6is>;7;`}>HqcJ)?3>|2_#m|oZhv{^|HWv740f`?!m8 zI2|&F$z^Qe_$YKzq5Sez^eBEG;vwUe>EQaySC_WD8cmdVuh7J(M*3W$95- ztA$!pwtKd9t0Lt#y`wA+U(Jl=E_mZyvp)Zz3)EeC3YVpf+2E2D3e^eLB4hvgB@co1 z`a6mBbM|7I=IBF^zSZ22tZM6}Gs$@w(y>}G?vs*2?pIL|Mko2ao&APPfejM1hl3g} zR)GZEPs0|dcdbgy;!;7@YIo3wDT~lINh-%WFxA&C?fs3sw15vebv9UvtrIm3)PksL z(xn=Onrgkm60}!{dTY4N_l!11Ul^8Xz>={zNtP`(kM^DfYw6kXnr)}m$;H#1KcUm$ zw8KGGCb}0GqQN@dL+I1-hbj>WJHY4o&M0ijSj=y(hDadyRI)yHY;g;B8f+e2C*(*7 z=*4xhW@8u$Ah1ghd3H{phW@1aW~c~3`cRtqQ@oDhj`rZK&vCmE-Ql8d)QcwQa?a(< zQ3s3p)uVZ}jKKH=th{dX2|p@%0_H?&V*Z$RBOWUi?3j^@%~dXOKOHh(lO{9=L2e5) zyGB!6X>Qfok@omA=bdq3$+B37k`R=e5{k?sW{pz8;I)+XgIcpY{dI%|N^$i=kM~nT z3p#kqK;1w#{;&PxJts?bH6#iN2?Sw)c3k9`2hYH5vI$|4926{)=r6u@_}-sEO^PuW z_8S2&D7M(rZp?#(6MHuXlTT57t?u-mFzz)^ax*rw+XD()0I$!Q9*cpLhj=CaQ&&>U z5qt=zjJ~#mQLtV-eQ=<=Y`&zD9<-V;J80-COk;12w>!S5%OWEPJDML344_!-0)Q#+O{?3AWNND1OxZo&rDfu%_sXKFPWU zH#6PNoTbnJF<=xfyo1wbnqBEi=aAgNGc-s+`2$IyN(;E|FLg#<5<9{*ZcpUYBoiRj zZNajh5NRp+s`^(G(rzdh=51MFvfW|Wz)13g|56sCk#>S7hFbC^X@JWLB}E_UpwIX` zQ{`WBn2E*(y>q`4p3ET>0ZzWOJTc9~OYfa@S@yGr?T+3&;tPM2aJ@Vg*{~ITpw(C> zId$jvz3eC_q`EJ*;F!aLL%Q%^(T7MtZz1n-?D@WI>7K2CRRMFBT^5rq342Q1B9NVb z74)~IX*$1)BrfzojRepREf?HD7C>nPN8i6}c-xl=`~b)3~1FJI^pJ zO(6dF`V(rdL{o3^&4CCW7DwmHx4}X3b}BP|uZ+qI)?-N_%f;Rhug09MQrVqy zeLq6UIwK43r)Z5477m?fxx&N^)t!M(ESJnve^=hevz#Ki zQ$Xfa@Vjw+XMK1p4`w;DGS6n*6(JfsTAW<83p9I_o%@e|_Bt~2g}Sx$?wxKd6(iZC z1ug1e9HT(Z4MYC(A+zK5IYv~v;~W)S4L8bUGJ(2W@K-$nz>0Wlq?^DZD+-i%|lnbz$m05`J3X zW)MCbXcV2p)|_7tKs!}y@{}`m?h68&Rt>Ve0>3`-*b`0^BFJBFx|M@rD+b>EE-E{D zE2Fm36Q;KbeY(rh-fgq0*zn2L30Rtry>R;t%KISIgJO{BHo&s}i^aDm$eZfZ!Q&Cc zb3dSISC6Q~6h-kj+H9k{BmgL6&ZP-||F_l{(?@2g=ZzrQMzRT&9ws~xSfUv$3G7j>8_$r@IXvU&6~kYlcBqooWYPtXFW%KCqxd& z1!dWBfrCSc$$|{dePq>=z@#MbDm{GlJ?8u{j>9zX@gmObaZE%lu6BQt6VkHXSh{hj zs2XcmOh?Pfxo=ABA@<2?=dFMiCxwwTvx)*t+A^<-#*nv?DjN0`E(UCT zfPY&Zl?}L@;Ugqcf&I2RFbE3JKkotgF#Vt8-#bG7Q|mu>dHkWG_|g2QJs z&~Mu059$2aJ@TKd)Sua`zmDM#z5m&B^2hb#-{AbgZv8XLU+;$IgMj+ayZJNQ_0KrJ znX5m<_-}CjV88zVqNM)~${%dlKcoES!~T%zzd`vw*s*^G`^}I2AqbfNr-A;;mi;r* zUrjCccSwI?&;A+buXp74H#mP~)BXqP-$lMZm;ZkAX@5w|2hyL|wSWJI|E>7va?|hX z;2%Qr;p0B`Q~j$}_@7#T&hdYjqW+Kzfe`@~O^Zr%y sg5s~K@PBIl*?au<%zsEY-9PwgSqbou2N)nA#E+NJhtmQ1_WSC80qyD4{{R30 literal 0 HcmV?d00001 diff --git a/tests/data/cli/duplicates.tsv b/tests/data/cli/duplicates.tsv new file mode 100644 index 0000000..5a9767d --- /dev/null +++ b/tests/data/cli/duplicates.tsv @@ -0,0 +1,11 @@ +email name state gender purchase count date +danny.baron@example1.com Danny Baron CA M TV (UTF-8: ๐Ÿ“บ) 1 Wed, 4 Jul 2001 +melanie.white@example2.edu Melanie White NC F 1 2001-07-04T12:08:56 +danny.baron@example1.com "D. (""Tab"") Baron" CA M Winter jacket 1 2001-07-04 +ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 +arthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 +danny.baron@example1.com Daniel Baron Bike 1 2001 +jean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 +melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 +ben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 +arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 diff --git a/tests/data/cli/duplicates.txt b/tests/data/cli/duplicates.txt new file mode 100644 index 0000000..3d76a12 --- /dev/null +++ b/tests/data/cli/duplicates.txt @@ -0,0 +1,11 @@ +email name state gender purchase count date +danny.baron@example1.com Danny Baron CA M TV (UTF-8: ๐Ÿ“บ) 1 Wed, 4 Jul 2001 +melanie.white@example2.edu Melanie White NC F 1 2001-07-04T12:08:5 +danny.baron@example1.com D. ("Tab") Baron CA M Winter jacket 1 2001-07-04 +ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 +arthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 +danny.baron@example1.com Daniel Baron Bike 1 2001 +jean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 +melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 +ben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 +arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 diff --git a/tests/data/cli/duplicates.xls b/tests/data/cli/duplicates.xls new file mode 100644 index 0000000000000000000000000000000000000000..cbc29160b6c7dd851aa6841c6e4035f8ed4eb295 GIT binary patch literal 7680 zcmeHMU2I%O6+Uf-BKZ(6r|0HqUCRzW~{ZXA*#>5h;M6Y+R*EhMl z8?AShO)_Gpdvh=N(F=j4}fr95D!Hlq(}iFl<-!;!xj~xR!zjw|_m&#nEE2*u zd7myXFW1=kd{90^9=Pe*$~lZ+5~Bd;ya9*gGr%@mz4vEmh!0G?jR&Trh`+L! z^0XB2F3EY!@W#tdL`+cpMtTPG#YWoSsm%lZz6b8)E7k}7&-~3i9tQ5T{+au^{+oaV zfGt2;fL34|upQU|>;ygsd>(iR*abWcd;xd_*bO`in7|&O4R{QA9M}uA106so&;{%R z_5;jldAwoo?R)V5>#H%2S&T*H zXItN6%2&Dn1|s5zkt-@L{sP{Ma)z-nYPlKn-&IDG4VJ;!Ttbi@!IvSIz-^KrDDKbI z^N5N*G)jxf`R0k_LnEzhe_cJ>)UydsLw>6I{84dw5tqM4`{l2_0r_M_jbY?JM=Ws> zSF38#;C|Kaj%<<3++Uxu3wFtJcSmCKJ=iAc>uLpEmVYdp&{r!PVG_u}u05eG!3UpM5h@@3}t>XgzKt1^!>0WfwLhe7nq<|_05FL7Fo%PR)Xi}59_ zKZ&Ph_Xji@J4PHkM;tps{GS-dCz1(01wOMmPMdb#a$E>6Sa}=dLK#-!0GhK4S-XT2 z=$T3>GjA=hE>o-&%4nKJ6WEls3Wf8@S*uhmr0sK7{)}t)CNsr6j;dpGAKi!k6iosPhA}FjGgT^2h8UyuGyDL^-=1cD7Ahvu$14OvsyY3T3-wp0YBh?J~~XeqU^;vvwg_KJVJ4nic)YVkrl2kJtrs zniaI*B)W~e*228&s>&Qg737?l!i0@E z1*ecR%hs%GBWyftSV5P#+HB0mh;!P8t4`TgA(<;Vb8}94z9#-4jl|HNN2U3UR|y3( z!U>Fnc%sF-m zB9C*}+T>+iTe{_C7>It7iSGdL;T0ZJSVC}^=smD`)gHJ7(Vj(6aMx&2k>8B?{^a`~ z{C;9~^4M1te^K3#lpNX1HHoG13Tm!I_NgBG)$@XCyB(*SJviEowzo|?d3(V;Zl5)$ zig~NhJ!R)Au2pJl@9k?J-hmSj&Odi(H)-W;vwiYarQo7^w!@fyRo*|=5%})J_p3eE zF7#Zh9_tExzb$RJ++{I5{#}r6quZ$Uue}Ydln`2d)w}k8>mfQs6wO}SntVEhnzy1p z9mbLJIDz#%WSv`h2v+d+uF{Ksx3pup)yqS?-oj%D2HV|K7jAvT7SWV} zP^3alg;ZeHs4yswN7qF_aU`0Gg5nr76$3@j=!2Ccd^khO^Xf#C)yxM4r8n`)XDyV_c zo=}$`YKW}T5P^n-x)iY=XD6YqN1CDrUX(ON4ZJdGDySi#(?ah71>-3{;Y8w__M~C^=FOo|rFM`JuNaX|rl6^VG_4ufMQ2t9ia6fg>9v~Qme-1CW4s~PsT_3M#(~OyeWFCnjzV)Dzi^#^4W|Mv zHug#V91rs!%SL)Vfkw}H4GrjZ#okD_HQR8r(VF#h%|><1CqeVKT9&AMQ7RaJ9>03= zxButxUw}C^C*+jzYgiGS^-HIxpBv3a$IsjUd!ERsj6p+2*xK-`@@gZD80G<26e R{rBT_xBd_Q{{ElV|1ZAGtziHF literal 0 HcmV?d00001 diff --git a/tests/data/cli/duplicates.xlsx b/tests/data/cli/duplicates.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..30418c57dbffdeffd7923588b957151f275d978e GIT binary patch literal 5672 zcmaJ_Wmr^e*CwT5h(QqP?vzeZdT1qvMqp?dxe<(c-yCY= z(Pb#~&rep8tYa_kZyjNJVJR66NV#j%bPZ;ex^{_zdHS7&*&TQp^| z{_-&&4H(~Md-U1p)Yo?^N1Gz?o6+5hgg0P?SP>mP-ngwHKfdfNQ9nc!q81^ni8b_O zFI&i`pWIaCH?I9EZp3HRnIvUeJfNmoF*WcuY)6NG=sM-3-lA=A)s{0!ehFLR*T&HLO6%crz301I zT=8ZrwI6SPXZd>2n03Fh8Z{q?p{viCK(kJnda3I$A+Sad0wO2TJ~iBMizn>U5h0>s zqhO8rlB&M(L3t@uJw-E+*S(u4NV>&ne+uk;_?+H#rPBqDC1`s8L}@xM=UeqXt@{bk2H! zYTi-0DUe4=0VWmltAX@U4sp>tc(?T%_g8q1AjS zlfd+jPaE!->cB9sF*k+E4jV*htF#Gstp`$p4#vb2_?BxqW0=EvFrh-eU+QvJoLXse z>2w&l3vN>#^vAHb1JBNlbq?;7w>z_Bc1b+moF&1}3uE$2Tprm&_lZoIF^+-t+R-Sj zF$#fgP3E(n_}KXM`-F;~e57PkCn-8yc=yRaQu|_+A)%6Wgy@{SSCj?)AUZTBmVJ(k zE~MgxP$5smOOhPFNWSOb#Ai7&3AGqi2B<9qS1OnA?s;#_Nrx>Oj8Dy^$UE_(oyZfg zm4B-Hmt&C;3UfE?%?w7hkUNiD;lV?EhjN@V=GXXYN{s>1U14h(_uueE^(VfrYt92= zW8>+;|Hn=68e^?lAdh8nK;-T@w~6eq{mtM#8=-c640l_YkSfZZwl$mR&o1^$ajiW!OnLfmYi$JPEt#Y$QE4VW**lG)n4=|0i;`O9QQ;Ce z*CfXc2+>U_sFqik?pjtZIP1Y|4U05w6r=P1i1=iAw>_+QS)hgL&05FI1Axj?-S42i zbg{=$f%h8)vEFKuF3jk~8^p0pvGk2{O}}+$DVxOuwP6r5ENp+n5jGYRfSV{pPx&%B zJj0L~af?~QU`L$0Ofg}z*RE|Rm)3`1@sCOZg|OU|XR3lQNfYNxpr8&G-r`t1O+5+C zsVwy!v6_XgrV)aY8K(r@7ZhGJ8Dg6*l{(`sO7A@@U&zazCTR&hpyLcm0gc1>*Hq#a z#(9%8UUJ=-QW$;PVZ(DXZ`I51Wmj;;>63T z{ee;E!wWPw4sGeB$qhAo|??!0g#x7cH`X zRkVE0e`Q&p+dBs9776vdqz3P+albbSC#WsnyRk+3mwnF*zSAg%C#(BFJyz7v#fFqW z+HRAy#uL`)nqUv`m`rW040p(3Xz*gfW^o3hcU#tECQbY4F zCoU!;cm)+C33UGjVx{DMA;@+v^>f3YOO^#KCqa(-@hvRWc+?v|h#k zmP72loWrId3a<*>Y$YbC&5@@%J_)6D6}WYAd6{G~WxPs6?_OsEiG{7 zWdzr&B&3~vT5DA5>hcX)jFDQPry)qI`(SGK6Ap*iWP^qc)Ki_4;r_$o29?8jB-2$1E2T)B}%Z*215Vw`F0K=up0jXqarBu2tswRViA2B#2C zc_oq{_8vHeJoa9IAqezzU%J+CEZ=TSXF8dv!8J-oTyVQ%KLaQBUa(({%Hj1xl(QNV zDN0U~&`C#C6V_p25k>p0OxEYyDxlu^ptEoTlC#)dIwr5dd&|15erD`A=bOB>otdi0 zdUZ=JHoM%mk*34hJI9IjMDZM@;9esUHqW67t%VK}k!pDor_u9Up4INtd{hHH&IJy2 zBlyflCa9aL8Rr-kD50*xDg|vNGYFryWXGHKS30uO7Hs`<+k1o1U7zlBtlMR-rcJfRX96EMxZb;GYeLx6H%%rZJRo3?+goEBdV;*Tx20C(y*+AN zvnE0i!~XuD!uMO%@TsUM_#gr$F{{iLYbF#IzsLrcBO0@;CBwQKZOsS z<+FSWM!%-+m9wUoUf5`85bA%UZjztW{g+&d*BiK21+6FCSSV*+GY6VUko<%c>X?#SnI8 zm_-N6K-ff8(oy74mp8K3@S$z@%k>P6foX{r;>V0lQf2AAT< z5(`I}`=k`wp;WmC30Z6MHFWjgK`#*5&-3Km!Vno=as_ z(z52m-!@vWp1=%${2?iKs7f5p(b3SLnE!?wqW_bWo_RBE_0wTogOuK;(i{0~u1waBn*8c&yrG2S-A#23 z{`6REycG+MO|lg=mqA1gPD=k=5x6siPSoh>i^qYb zS^TNw9lkpq6`12G5KJ~-W4-P&B$H*6@L{RDsNjfl?eY^ASaiMq5H_LqtobCciN`Yj zn+RymwU7X`IdBM(hukU;sPw$y2!AQvK^rclgwKlC2D8c~cj5R_ptY&#Z9(Rq4OV$& zr1N$X#^t8^>H&r%ZC~1P);V#i7Ux=qOGxm0u{PQ8;;rxdp{WS8qXm#>4QF@mAyTska!QPAE`+D`kAwgQ8$<}nWY(c4lF@{dr{i_B@&GQB1CKjFKzNm^&`vJ5@kYKJm#GO{=2;f_dbj8ViZ_ zAnZf8(;-8O3p$)eVA@2d)6h(E*`nn3o2W^qVMXo*emFc-4i6L=&!3;zdgeXk!TSv3-mkB6pPt99&yiXyfA5vZemDs{~wq z@yOL$8|v!j!Efp2cCGgFlC-Y7?4I3oijKo%?Tn8K=^Ls+&c-he!D&^F%)bZ*FrOV_lU7`YZo8+ z5cKikP>)%&J)Nw<&ZsQV2UGF!5&_G2sacB#vK%_FE)zEfcKA+nou{4lK%LF2Ecjp1 zEjaizf?xK_1dHSQ}mFz%gr zVH7ho?QF;OHCwMe%X?MCYsg;51yI$c2(h~Aw1MkE60yo0(C1v5j#n~2zzw2{S{Z(% z0rss&J~_fLp@>Tz%H$kH20XLep1#H6rK(*nUsad(0SChE#Bp8%TvW)GwZMB0-Q;v3 zvYwDCEMm25Et#2qE4)akNJN(ElgWa0=_q5k;X=2bDO^JF*mbe=@ZRxe9(ikxeReUm z_qc#uM}F)nxY2<}p9YAjznW_^jB$iamypFNSY6Bn_D1!Se7?`Tm<@9vKj4(;p2*M~ zZ$y4}-2BeeGNy(WjLZ-&@8Qeds>ebX?VF1Eo~h23*vi3^N;@g1L*KQw0gW6~Q0sM< zj2ZBiV3_$~t<-NI)mfq-78Owmk<*me=G(Ds4x0_UMEe?r9RP6P4SCa1J2wz90a<>vh_i1D zqgOUvmZk}xqMX7&7j3*mDX*oAH)~pddfFb)#!u%gYAWUh$5=}nlc1g;Rh1`TPUG`9 zkZ#ke?)QYIBg7~JLtBw+#rPd&!mIbOiB3Kkjv8{tIr_F&a%Zhrb*Nk1EE;1aSHc-( zBuWm`fP*={!HEavZzVRq#bqBrH1MmfgoDA{9hm7xui%=wVaO9S17o@-b-6s9dkr%0 zbSLxa4Qk#>1GGUJl<%8KT`Au(I-?6F`p#>rs@Tp&;Girp!Bg9;k(Rr?yk_x6+I9DE z5`6AW8_b0S;uG6_vA^VyWC7`LppQq}gaI}yG5Lb90RkO2zmUsX9a63tHOJu`!`CiD z66bMmwBnoCccsbRNvx@`+*7D44jP<2IiYtY<{im>{hV<>-}>(N1D5^|Osq{W&h)<2 zN4W0=?3+$TUsU;~zdlx$IQ@fSU<^!f)2rPvxKgOv>gX60Xg`N~zYevohkE~Q|2*RR zJmkQa^S@F@R}uc_$m91YzbdM0z5Ubf;Qbxt zFZKPq^RI{0wdne3&Q~YX^&6 literal 0 HcmV?d00001 diff --git a/tests/data/cli/duplicates.xml b/tests/data/cli/duplicates.xml new file mode 100644 index 0000000..2402358 --- /dev/null +++ b/tests/data/cli/duplicates.xml @@ -0,0 +1,93 @@ + + + + danny.baron@example1.com + Danny Baron + CA + M + TV (UTF-8: ๐Ÿ“บ) + 1 + Wed, 4 Jul 2001 + + + melanie.white@example2.edu + Melanie White + NC + F + <iPhone> + 1 + 2001-07-04T12:08:56 + + + danny.baron@example1.com + D. ("Tab") Baron + CA + M + Winter jacket + 1 + 2001-07-04 + + + ben.tyler@example3.org + Ben Tyler + NV + M + Flashlight + 1 + 2001/07/04 + + + arthur.duff@example4.com + Arthur Duff + OR + M + Dining table + 1 + 2001-07 + + + danny.baron@example1.com + Daniel Baron + + + Bike + 1 + 2001 + + + jean.griffith@example5.org + Jean Griffith + WA + F + Power drill + 1 + 2000 + + + melanie.white@example2.edu + Melanie White + NC + F + 'iPad' + 1 + 1999 + + + ben.morisson@example6.org + Ben Morisson + FL + M + Amplifier + 1 + 1998 + + + arthur.duff@example4.com + Arthur Duff + OR + M + Night table + 1 + 1997 + + diff --git a/tests/data/cli/duplicates.zip b/tests/data/cli/duplicates.zip new file mode 100644 index 0000000000000000000000000000000000000000..eee03fff5f4b85177f17375cd82510d7d7034806 GIT binary patch literal 1082 zcmWIWW@Zs#U|`^2_^?*k|7AyjP8lNu!+s_P20jKEhLqBRoXq6JlGI|o5Kac> z%YUL`LAbPnn}Lz#1v3K!n3y^-*6)ymNbCH!T55Y1oJ`FSe5!uzguj7e(~*!$xfi+B zrBj>B@bg*hl@eifyVn?LFiJ-=Y%e!U^yvyY(i7qNC+oxW zb1g;nEC(jc(>hz}van)_CeO(uQ%l3H&5Js+?mNdcX6t84wcD1n&suVab;tidy;iy| z0o&JIfBp5;-bo?B(`rra?0w8`3(vT7V>Y{3kVn~(pm+b0*Hv!(ny9N?KF2=*9($&1 zg#F)h_P)*l#-2DZ_V^hTkYdjW5`DZ3?9+cn#lkRJ6#7o|^*d}J((>K%=w$N-6JAfv zFYX+HE4o@TRt7B0-d^d+)7LEHe!$X#;roLS8AkP`2aav}d-8MIu^5k~i75wH%qi*V z@jWDVe(T*Qo04))-Lg=TW9>9~rW@lOJ*&8G>e0>5EbdHURpVXI&Gw+ENAk$0#&7SZ zUp|-M5-2ZctG$-mvBJ;x@_OO8eJFH&0TT7v+DL<4z6pL zH2C6+o>rLelr9QN-sZX`OW@h`8BTLnxyDJ^F~42e>hq0dmq7ih(>tuY*cLw)4zQ8o zbKQ_8yvzJ-nR>0rG_%93(otsDXW0Mn2-aO4vs$OTV7bBDW~!3OCdXJme<^|+WoC# zU7bK!vV2wGz8^)xtGC>{)qN|%H8V8yZ|(Jy`I~O?AN+DGTDp2g(9_MwK0IbWXdd9r z$Rx*%E7M5;Ga~~7FculMG=f;@S&$Wy1<~RTWTptzOl(O3WD*E0Y1{`i8et|;2_wLp Tl?|ks2?#d=>0)4}XJ7yT5lg$O literal 0 HcmV?d00001 diff --git a/tests/data/cli/duplicates1.xml b/tests/data/cli/duplicates1.xml new file mode 100644 index 0000000..ad678e2 --- /dev/null +++ b/tests/data/cli/duplicates1.xml @@ -0,0 +1,10 @@ + + + danny.baron@example1.com + Danny Baron + CA + M + TV (UTF-8: ๐Ÿ“บ) + 1 + Wed, 4 Jul 2001 + diff --git a/tests/data/cli/duplicates2.ods b/tests/data/cli/duplicates2.ods new file mode 100644 index 0000000000000000000000000000000000000000..25751a81b12a4b8bcaad518f122c972d8f07cc69 GIT binary patch literal 12139 zcmd^lbzEH8wq^mrJxFi~?(QLIAh^30Ubwrv1PSi$?(V@gxCRLB?mFbYxi8)Qy6?>V zJ*(lIVc#KA((5(qN4W3d1CklxzTI7C)j6cG*w?)4{#VxNWOU*Bh6j~5t-*VpFs z`SsT~Em=v$PlSYoBqSs>G&HQNtUNqCLPA24l9GywiW(Xk`uh52W@dJFcJA)(etv#o zVPP>bF)1l2IXO8cB_%aAH7zYI-@bht9v+^Wnp#>~+Su4QI5@btxOjek{x#Rj%dApv z0sw#%B_=GO=)8E85|WJD5Fvurps3m9L9zP61s(h0C|*69$7y{eoR9K2LP<$e`YXUQ zbI7Z7_57m$Xzr`{1Bc%7_&t&tV!|`LvDUX|-q9)eW8Cs#5Mts$%34hf=PdK5BE*mjh)9 zI&58ha*l9SkMN3myb#fZEAC{R$2M#|8%?ecZ}D_QlGv!-J8i`=n|hc(d1EfVaMQ+d zS*PRD$_1S3n9utM4#BY_ibB$@a%7>Z6$YycfC%ztIaFUJWyWCJ57%CJGxi=I5v$r! zBKY+AT2VLlj*?!&Rrl;*s_v$;-i&=*82#y1dsx99KD+JXJt+P{>y4OG<>keZ^M#%w zXh#*Y^HcB?whB1WdJohrKQ>fz!)PJ_M6Di+^GOTSC_}^h+iha($b{y#ASROFW?q8E zxK$a?PQ99{2vIxlXzR>M@h+eB*gQXM%SitYKAI2+WBI^Xr{;CmgsAbV2vF*YD4z@v z$eeP$uEwJ}s)nx=H|AjSwphE=k5uswEvB%3qf_-y|DPU8Ig#9k)qbHhBo((nUjM(s%E_XER&ktG2-crZ>Y zS}DFi`gy6$x9Qf5BU!CmWtKWZe%zNfHY@mm@6(iKGZJNlHtNx79<{x|B`o};WBW+e z6U|LC?4ZI#mZY>p*?9ywpIn_|9+rdEB zpmzC2s9I;OhBe>34n>2@rK9VhV;V}g6=$XaTJTq`yJd0@Gl^juVOF-JZit#gs7i>V zsE5I*kd6sj#&r$HPAhSdZY|_F1ZK1*1;N_|XbS2SA$@RK$ezhkKWN|3P6Rd-W;~8b zw*N#p9-E#x7}1H)5a6@>Jdq&UzvZ>-XZW}?Ik3vg^Ne4$(h$?KWH9wo(P{17bvT&c z3}wNWZZ7lkWUS=Ift0M9Or!JobdOj4jh&RWQUiK5)$R#WS&(ph=X5F1M!xJey?)|k z?0XGaJ)IgB!U2Sq^GIST8Q;{Q10ZqDnAkHcql<(E6OCePcG zjQT8x=mq)1AAoLEEmEa78o23JMho2@m>im8v)nqm_-aIrCgj!oq4veLOwJ0sC#j+o zPqWS#+@cCD4N-IgA7-3>mfFE4IO0<-+8R=i|IQj$>$`l zJ999dgOMjOg~VB#=K*bMvL~Z;LLm{zuoMypN3*SIfa1tlhFCfiPl;*t>fHHCzPSJh ztLkN#GI9~qsW&1!a%^VBX|>cGszO*d*kM$>vovu&s)6B?Sev$*mE0+y+C0UN4K&JU zmVx_B#C5t%B1+*B+}ZQ0y{Roucxaib6a4qAHo{79`WHd8c*UvIQ0Lg8s2&_6fubx8 z>b)uY5yYZXr6o=M?Op1PxUyLLDU@65W(hx4_IZ^mhaG}3{Yo!1O0)0sS%Jfg$4nWH zly|cC4mJ6ZwS9pW%!v|a+lThU*?U>ikrq0PM7O2i9u%#gEdxSuoRH(g)#&ad3f{3r zc#@9zy!ve9=M_W#6OmPA65FQ-yvNJFkNXi%={9EaoXgz=hu6qu?-G5NJO6bwn22>8&yI0de~oSzoMElgr;R$VB0GO_ zk=jA*FcN>-;9X(^4qe7W-QdKNv7=Z8pI+L-L9W-#hC#%D%@;OOBQrfFyn6%r23w>6 zR_~-Xe(Ze4P3Ki6nUiRqh@cd2Gx{U^X_wZ`%bam1EYD+O_YKv7{yt)hD_2I zy5D+S{N1lpXcJnc&M*KhshxAUr=;ge32p{mQ! zS~~|b>a_D*OizY$GwPIj`HUdx866<7Tf4YjDuRgz({x}iMK-(pTQ1*1$iyS=Q2J0^ z?}(-dn!~=PAN*jZk=}YJ6Ah)DEx5ADSB?nM+QS%iYX&<3Ojn|?eexZ&Cg!n7@?lO-vE=>mC@l!-#E-fedB49 zBr=d55nB9xCkzc@WPY!kN>@en3Z6((zrqJb?CuBdn%1(eiiDr@$iW3;@u8oNB#-^G zP~{EXjaqW1%TFmQk$T-RQSL=ugo`4Ld(%a8Y|e(x^A3hHfx@(PH*r`z`W<>e@z&}Zd7Q94 z=Ht(xUVB&;HH4MI+%I5^w(B3}^1gWT-nrz~Y&KK13Xg#E6s;`bRb2}~5#F)Nu%FY{ zjtjMP9-E$?8zMeqf%m?_g}d<^LNe9}g56s~Y_EuiE6jUuufne~TerY+azD^jUD|9m zUwo~5${q`o5R<`XvY=%DCB)pkG>6bZCT5QM?J>A!nXzivMzyn6JZ!^HEKygVI4wKS zR^pB@YNDNH-p0uL=eArWTt}4_0St3D!Ksq^rr@9*&oaPNsDYg7jXDnX92Y*$XirQA zuVf#cis{M~&$Q-3Ueyurv3bklvh}pF0CG%D^||T=RLQlr@_h&At@#s)4GEg=lj zm26@jrUM$;Rh^Wvl-XL6a;tL?j^LA8JnpeacQEK?FVKX#X=d)5K|{n0qrRYiJ4P(A zzslBltE&lCt#O4-ifnOH>@qTP^$dv++-|Fec~_}_Ib{ECu>lKC47BYzTLW zo7D5TmCjb-Oo$i*)#KQRg!g)PF&U3f7HxHN4~Jzu+uHT7ems+x>uk zSUo%x1HZ^G=;c?mpL#)8LyL=>xIU`j8AUGBeg&J7-K|s9RpSbdjgAN8<5~$o2^1hb zAn>Sv@Ms;x7jad%%BoYd0)eIChC+S`#`^r^`GmMX-!x0l2{Vm)_w(V4RDjTMA%CKT zFYVH#s34gvSVHOzelg)p)2TGS{M~#7?egrJ4lj&eVg$wKBYum%KL2GTCfdLS>n_2w zP7lI^lvsvxR6>?JggMo6DiG*E@t9Rde4t(nR-h3*f)YzVDQ4LuP1tt56A?cy7H`CdS!MC4CO~I8SJ%+KHd-vib8KYlnHiV6`yJ;{mP@6Do4`zA^GtkSROPy;8*7H zab;=w?J(%HmK>UQ%&oPk_D3txRxoiJ$>rB0ZaglZq(GNch| z_Eh&v$hk2?)CMO0P^ILtzBIa`eamtllEh_7ngPWSANg^k2?}e+-9>_~u{X^)C#Sn} z`EASj5YcS^J5)!+XPEpOGg?c4w+$Dt^oCe++VmEUW7bpH%dfXD5m}iA{xw!$`|up9 z`#HU%ory!8pp8ezVq_N01VQ!|Kx@1Iut2=OQMR_V zGPX6e`#&u4?@VKDW$j@7%c}p6GXnMX4b2T-@2Azj%>x4i^RKqe>*f8oz`u_4tt{<- z^<vvDr?F+2NupiV>&>~O4#kY+mqtk~C%y24-%#}sb%AG+W#mBb z^12oiWC3*EgXHqfLztAFtLStI3B9#wD^(9JN4vX*@N*GRohJ89)8fIk{1ON1D9TnX zS4u?7IE?vH-wq2x;idGP;_uqd5YfNu!00-vwwdPv-A&`8b)w+5bo!W&gfpFG;@X8z zO>gk4%y-}N(vK3L6f|*Zo>q@Uvwwrn-(rLA0a?cQx~Lh6(U7x^T7In_fe%y)3s!Ub z&?$%13r=$5IGJp79PZ%pF~jVlU8JzgDKawhDw6}uoYX#D#)jVzhr36XTHjUWV4tZB zf{QXtW-_0+shuUD_Hr74L{M5RN{mLT*Eq>I3@d(NaCk6AZ=vPn*u{#=!Y{F;#-GX- z*RHmrcyE+E2j;F^*)Dn?w~OGZsYQPj%5CT%wm71QjqZ{cge_li2TKgAIEcdS256Yu zL%2M(3w3Recb7&DV*O-UO2z?TkH)4LD;7hu*V2+LJe5v%Ul+E0uL>q4gWQ6qXM`F0 z{Iq{w!{mpg`ZlLOL}`;5X?bfzwh1PH1b-jh)&JZ;uk<{nLbXiW*P;(!v(;f>C2=}G zzbRU=bMHV87gf4+g_!e&sivX4AxNK>Nc^P047(X$Ym$_nD&DQ>PJt&x2J=jdWf`h_ z&>J3Ep0ovkG~b5}3xe<4gngfr7#|(Au#4JLbweSfP8EnU1MJ4~pFp!pT_5X1+@~E1 zi3yaJq+wv%xrEa9Xg9U6`nXpu69-gy6ZlzSzwx_STCcI6HG{6}q;3d{uPGQrF*z$E z4-ewaZLS(DpTu&^EZJKyl0@w-7VCh5kZ?|N2bA`VS|o4Ld9et3b8{-}iD>P*EE;k_ z^LOSESi`c;!MJ)tZ4l^?vju$M;Ug$2^lSLmK6K8ZRa-E_-Yv#>+~WdHQk<~)f?O}B zVdr<6cW*2RA=HDbgMnrySJ_vQi$3EaFrWch2(A#|8$K3&md_k=nBm0p48s0>?Hl!a zWA-`wuslJrBm?MSUkcEDH5}jq2H}0=9cY=}XOJj}%mGCY2)LxZ`5=w)>$R2UT}dpQ zTe0gsmFAoAkDP0<>jezw?-zdtUa*&%O9j0St{qhuS{>>p0_R1PfCq%8PotUmO7*$C z7L60N6WK@M(Jn}dCSo~oEe0D1@7>7tZuoJp{g1DlJ)o4j5%$K`or#ztY9Hp9y8T`rqxvApxUFAOy5oNM zz?BB8d83^IF20{+Zda0 zc2-5>@Ib9%eWar=Jx^c)UG;u_7IbqsEs(h#L`)3yYPHNha;)<}Os9&o>TC?aO&7XZ z3^H49R@gO_hYiQ)a276~_FWNBY5Iy%9A2t(Jx{f&-)W@!5q)}*q`~19ao>@fq>Wu0 zmNMf`k;P*;^vPD%h2I0$TbBOK>&l<+L6N){;7#jxCZEHt=t+#E@Gd^3+B0k#qVsJL z;j;Tz6vz#w7X4(wV5KNq@hiP&@8)SPi^)q+qCcvSea6YtXct&BW&b1ym(hF8Tl&a< zE6^$H8KJA_mQ-||uWZr0MUoU2mMn(q`K&(`cG)kj=5;vTLzr^8qg}w~x2|=Gn)p-4 zZ8!E@=k3tmJAXDC@gyc2m``8m#=V19$w0OkS%tHr^f$MT*UGfy)!#AY9#}l@=~5vD)f8L%RAfn7^?Mg$%X_e>SZ2bb#zn)zScxUZjIYdXtifmxHiO~ zj9w-b<+c|yTtL#>=-*;6oE1E(@MSzf%Su4QAi`F-JV65hJn8_ze@l z+UD@I>MJLZPD5DfQq58d|BRjkK%deq6bgSfC;SG^%eqSa^hk>(Tdub1V+({XN9XwCK6^FE!Z}*+uKCa>x>m>@|nZ}DKXYCWT#k^ ztHrv1WguxC!OwrNOYbe};R4l96(OlZ2hy^uUX$*G?=@O)`?0mh(m!I2ZIywEu=nbB z&({5H@&1VfxpP*5nV#8ap5&K z!bm?No4lnv^!nItlYf16-a!sf>i7JfBRfjYz2GS2F3-*^<=_2cN3kEA&zE zmR(?!Q30f?uREmj~YJIQA$fmlktf(Ns?Csq`%@9OYJT!f7hKo2>ZZR!X)6u7A zoer|*RU+>?RZM!Lw1742Amd)Ji*LX0wbJ%uCoVWBBCnFI*j-?%%M!<4Dcw#xx%iD_ zm6=_Cxtq_39b_91%qlE(NMYxbpEV&%he2w;Rxa3>khk@Ka-O>n=Fpz@Xu{{HSa>y@ zw@4X>-f1>!#b($w&+(52xrC5PjDkJZ-}D$SoDYpS z8`c=6$9;u=pbOYj;IZ=Pge9etlz?Q-KePmTMJl8}Eq5k~EEN|iJ5#)ebw=!sIWmc# zL7|HKD9NBfTR4Vj9|>}o%DW0RxA7&3+G{8S!_!;VA{TqaIv36&yf33EE|Iu|&wgFrC>CHfr~<;R*(mN?jTo2vzf z5+@_y`lhaGEq_*Rryuk^A$%w zCMLT%n3{ju#GvP&EI?T0V)Ng^-*W3(nrL^Mg1${}Al!Jmt-RTwIQMiHJmK?Yixl65 zxB`#C1rg4NxBb0gP-D#K`}Jn~i0CMHTb!~eKi2DJ5F zmV&|Q5}bN$h%?9M%6z{#<2kA0ZBNK^t?zC+W3Uxx!r&Zn%W6fcFR`cGJdXSeP3$)Q zJTPLF!d`L)j~E$F^%F&IM*UZGtTZOxD15|eF?1`^6M;)?2igF_HO7uXX{DbXHUxNi zJRAp!I5!?z8$#j}4jFtWi83;3p&SMzy&pe~eG>n8hDdD4&cj31v0h^wlC4if+`ihh zpB^3MRrNXIhZwuqT|S;0-J5qi8K|`xLVEc7<2jTr^DFo7R-t7wwg;?j8<0}55Y`4D zgrpalnxa^2@->}JTn<48T&A0~_)e3~Z!D1KS3go&#Q4=pjY(-_@>z(=Zdg)#DL%Ag zN$m4{5(O#>dqZ3RwvwE-b;#Q|(wUBxwruY?7Rx6v()AeRx>JO2@sd^s=ec7-V zFdN(#t94~w^hnWnHBVWM)%&@ej0~+mzH-p{b1R#wJjG5cqOd|Gt5Z$dUTBPGS;dq- zTwq^~$-0PssgeK0WwjM9K9!|0YYJ&EY2Np^756;l%leX52FtX&b#--xVamQu2lDe5 znO8zNfbZ)@Vj4EB5J2bH+V!H*Ifpi^Z&KEF?RAc;kB1E?#@U0G;-@1x$+`A;#<>F2 zY+%-Gi$w+7#>^v=EwN^9A#Zy{iz^2t?+|MEPm7eY?FpKyqUx&M7O##f&+X#Qqs9># zhb@R`g|w`U*c3!?)FhPiOzO;ymdSQ*$tVM@-8)Pnsg>L=!rq^gI5Xy`meq*blp`SCKv;EQmm;e^71_{dle#?`?iTu{ye00%`Fa|{Wc)Kz zzq}0UW+zK(E}i%QIt~Q$@tSN(=j4kIGOh2sW;ks=BlOiqz`zoO3xcjT-Ep|a)?tlQ z$Xk# zWq;5;rHL4yUkOIY-_l-dt31~9TXl~!aXr>|b(rcsTP2CrO=Uio*>B--dD(fUAAiAK zuc&ntHs>G9F}{!5sXZrD60P`UzStyJv)#NWr9VN%+#m%A?5G%ZfZRos0U!E*^*&Hba)!VfCO!Xx{l3_V}h{E_jp8i zie+xEhbMdA!4Gg|=Vs>e6%9d>vOAz8Va9r%9G{Ea$v+;%OOp=-x&$q@!-q6(%EEB@ z>kRDq?=(chhmL1_=L1;gYO zuH<{Zxg%SHN)0xMf*wGv5&FJ2zF>cBe#}sniAbTe%;w_J`LHs0<9GL0mdhN|R;1=4Rwth<>!j6JEwSq+d@I_%Z z9}hMO;+r25?`V?MS5as!7aZF(=GuH=z;YZ7hSYwi$GZ_q9+Dwqv`KH+tP@ZNi)^ae zz52_WQwNPq$mL92Z%LAooHVHRDUR)-IzP7Tp`oP4Gv!l!(-1g;A6P#9vCX_8Z_R{* zGRZriKkkc6q#U400rDAXEBGM!+R<7ePsU7S^-Vf#HDdK(g5}Xif+)73s`BWvru-z% zNS!dzB>kXL!QC*(%)l8VQ5@H{$6KZz=zbE?rRQ7g?S0IqN}ad-nA|O@aBBibDE0GY z1wEUaer}(M%iN}_Z8iqS&c9|o+|%P*T8JyOTLz|zO@;+ND%}*+l`jq_e>-o2sLKG zW)_$LT6WmX)e8z8U143PEp_w@`sI_;DwqXfcdcVW>&Boc$ZGNKMQm(oW6}lQlecSE zguoP!g* ztKA0@JuNg%xWL#<2m9FP1&y>Fh}CZpoap_8dsbGr2>uh7G{z5;1K_kRDRePl6hThJ z88WO)@$427250w)nd;3s2365t}As!58vMj4-dizy0@vMU}()JH-SUgmA&*obAxK8blhgnDto=nio>5xW{P(O^yjaIG z5%70i6?8l@)Mu{LI9j>SUmZ4aj$=VY^L@3XxNX_ozO!=%i|ICw34!C#p8ACug-QI> z!P#jSE^W{TL0I0ctN*1x0zFx2GZ-s1V>T7E!X9< z9+O-C+%I<~Yk5ashy_y}Wp+kwQCO+(9i_K{9|__WW#v@pe!!bsgy7F@+Ain3HXm(9 zr#mwPbPUYNgW$CRKghpD{v*d5*-D42cbQ74HileB;6DJ!q*+9@S>~4OVXOLhwEMoc70+ zJMQWhk_gwe;P~b(#kBi~_qgZ|m9Q)~K3H%F8D)_Ev6rk` z!W$_Gf=V}UJ+~Pjto<;}Yl842TRh{>W@nq3R3r>s7v?TJDyl}><&#lza;|FR{mLfPdcc`fB<=$iMf${#EO*ds=_1D83H=X@~1yQGROy0KbT$-_rWJSN5+w+#mV7 zKd<4p-hb_|{e6Dvn|EGce%n$xE(w|NJ>F<#Kz!&~A&Y!Qy<8N^O z%pd+2(!YCwf9w+eVi12z!zPt^N*hQ&zemne@>VGs`*Fn@yj#+7FVi&@YAvq Vkgr7!0D$`XFno17r_{gB{ufcKM+pD` literal 0 HcmV?d00001 diff --git a/tests/data/cli/duplicates2.xls b/tests/data/cli/duplicates2.xls new file mode 100644 index 0000000000000000000000000000000000000000..5774b299eb1f0d042493e6e255c4c0780b1d3192 GIT binary patch literal 10752 zcmeHNU2GKB6+W|ntZnSI`3(@r0Ko=hjQ_%48}J{7L;jg*!O=iC|g5AQbIdh_k|?@3wvf&}GGu}UI7yn*KxE?h4X!!vPr zip8Rf#^b|wA6ekGqiYZj455e7>GNgiQS>VGYV;cP<>; zEby#k@s}4<4oe32oScUz7vI((V*G|&%uIj2Sj_lO8}m@UXMs<%73qHaS4EM?BVqYf z?KHQ%+WYTGKz04SJCU_Erowa zU7vJoQn#4#L!(=1|C+jPQP(;_^hc_vXI=Uyu-y;(5iaDxm63y4anLB=QdQNj>1aOiqsh>TbX)G&8@tm5ULBU zLHEpO=am)Z(u0TQrEUr>muswKN9>H9v(lSF5%~dPK)P?qD$Kej|0$ZdyKTxO9(0ZW zaB+A#N%1KgUkOjH^NoLS@z-Eelgo3OYv-NX4h$UCT>Op-_?8Oz<_h?#dGTS_?>~3M zhN2tk_qq5h8;Fn08UM!GS@<%)FPk&|tr*8g=isZO#89+TBP$M(eIb|!DOo9#rc z@u502YON2|NmQ*o!h^6gvFlQxTGsKD+-oIBUXB`q40AY_%jPtP4dgJ8&E_-NyiHV% zJj$|qB%4(;>SZHO(?@M9k;;r{RqADvAE$JwlgD`acE@#I3e>|ooO%ANolB=OXUMz` zmcgjw_Cy{TPK(<@amO9$wX&7^KbN=i6XTlxIO!+tbFc#$Y$JmqE09^-RDlJnc-{nZ!j<_uNbg? zbP5L+iFV7T4;bZm$%;a)=vwsuBJe+v=AaFH2RcH}9_TW90Q}^iKK|F1nnNT)%<(>m8Gy=PObw1^GxkB4oqTq+?cLSZb+WzVK-<0p zPwauUOw;Ud-M;0dHN0hq))MT7QklG+Gf!LbGj<-k&YQYMWZ2HM=Fg|?oU24h8Tf=D^Il<9}1X?7m)rK>AQ)g^g>a=ZTT1RrJWHOZ>b=f~bC9$-pK{Ssz zLQ0znfBnR-$R0Ug=miml_}U+`5N+AexIk=Fq@q;29X82Yyc_ zQ+5t4k8|0kQhDpb-fn^4hj2FLTcV9qvMHx=PL9hNKzSi`7*s-==fp*eiVp2n{fsbo zecoc0R^mCZ z*A4adQ$OqNXE`=)0#hY9 zJJt17HFg#114cCVHTJBAMo_D9vvINxKP7SglMQTY=}2BP$Y_C<~$qf!H6jw0nYT}sBt-t z`L!HJHhA4p^)et4{!yqLDW=RM@|3>w~&XHWTw}v3Ji3tm@ruo;v zX!O<)goR?L(po}rmV}^^z+Fx*v?R}YE<_2ast%13kmVi7FM-h>!$D6=2+fiZf`k~3 zeR>|f6O#=kh!P$xL6oqu1o8(MsG+o*%+W!eh=ndfc;8L|Mr!(WvKUK);4n zBPA|PGJxq3wKB#?4B0~KyG+gBT7szI<0Z)NN2aY9PKTQJG9T~W5^qMF7b8){+l`SZ zeDB3bRC&;ik*MO`=Lk58f%pAoCl#LnMMyRP0iD8W3 z;5b6bPD3T*2SP$z_XAM$xK@4*ydR1lx1TBypM#>O!UV>PsVzmlKF>v}1^RhK%1tYV zN{+igV_LYkBGu|h&_&8Ek-A9jb|mN`<(3d#q}&n`RH?!(A@da}w}i|sQeFwsManH9 z{LbwqqmYu|mXOe_WTcA}Qwpu5imgl`vmXQ3#U>&_0Xl z{_Uw7RB6er()y`G;KuK7Ty5P@f9VR=-}2E<`hg3V;a&ia^^3yUnfW=FYpjF^`C)rN zEbv+W{zJu2Sq_=cOpLK^1{vBAetnX|SHofTJCrjTxrFNqL@Qabb}@%7W-7-YS=xlrzwP+%&$K-fiOvSWDK|#qi?7NHg aGykik?C5Z>x24+O|NEPJmCgD0BL9DL2^szX literal 0 HcmV?d00001 diff --git a/tests/data/cli/duplicates2.xlsx b/tests/data/cli/duplicates2.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5cc2413e96129b65742a595ef52156aec3462681 GIT binary patch literal 7802 zcmbVRWmud|vc=sYxF)!}1`7mt*TG>33~mWJIE3IDB)AM3T!RD#f_rc$cp$h1E_{1; zlg-`ze%yM0OwT;sHP2gp`c$7PHAQ#?0vHq&6d1HDd{vm=jPzmbWzS*aVrB2j;pymN zXYS}|$L(HXHJZ;LtnJKn>AW8izvaiM9E6S3mB;S+Gsa_{CSb1c+5Mt z5DgDT5t1+G7dH~GrVmf@0dZbYof6uK*_@3t@60%lTD=>agWOwQQ94)0f2g9uFTG`` zg5`++UT3}KTyNAJOtX9%XU)=-FUhcX?jd2EvRPh82S4|$aLxk>IIo@U5ffPzzwXLA~K58X4THz!~0jm!N92f*9Z~(BSNk~GZ!mM zO*a=?2OHPN0A=dTJIqPo_#Wy;-3H)&nF)nMHI;*BOv){M$4r_<|v_YA-J+tc^O5UB6TCi%$lpb$?q2fzhZwHQ;^Wuq>z4?ZzMp~)zw)$yeCTN5&r zJB>SsHo6s)^w9j4ogW&Qr@(W}ZTD7^_&x3Cj5LWSjEm_><3csxbtBX z>ZjrD+0Ze4xxr-H^dw=qfvMeC$FTi_0X7orXr(UyD+MwVqvj#lWH!Vhj2kS?EnHc3 zDJT3p36UOAZjv%Slk&zPte*Y&9@m5IBWKkjeK0?><5Ufg5~L}ctp$lC(fK5xpoOK{ z&ZVGarC5!w3=HVbGB`fU96QpMIddYqf^#o0)kp|3)FV`-t`BM@S|g!cD6`z|jir?;uJ14@f>9 zS685wm76QaUmx6$ux!&)c3l(1X*;ZRy5el;L9!RC?$Ck*&q4-i>nuGW=k)d@rRIhJ z`3~B4Kf`J+nQ?m;xyFF0H{ZX$S&XMs7P}whv56sJs7-BNj6M`@3)>~U@P1c7=gYm&o~elKtKPc z)snbpx8V87n@{3LF4AZX$4)&KNFA@^bW? zHwqRgB9bJ%qq@0l0E-`OT0hs~!~#}l4un&C%S47tus zqt@?u$Wh5Zj4d2WBeCvn`AF43rK0d_2ZEzuFBwJVE^v+~+43t$F7pdEQrB4%*4;AY z%&uXiHx!KfM6ox( zlAipeSWmAQo(`7&dQ7)6nUzfSL3(OZ-FA&n7)h;3Ilx}TA^%vt{9b~5+GuC90__Z; zYgF|vai&Req7iOHjp2S}j(v=Lgu{WEQW9@MsKe+F*+bRYN~Rmaerys8HGCw(E=ikP zSSbuU=xE`q_6c8qsN|4fRrJ_B4Ag8$u=btvB&xS#ei_IyoyzZ!@t{v;zTCF$l?#NV zMFMV!K8ljOkY48{U}zoRyG9!5thIfw&qL78B~9FvpYWDN7sX584|2}qzjk01z_$jK zb{oNU#UB%qWmS^CCwI1c8tB?RE*-oTo)kgMi-LU$lKyV2w~>WAqlP~#9mN}`+^zR8 zaH}&~dLJv1(uB!mUt*x25$ut9Lp0sw$efMnZvh6P4YJ{sj}sF1NhHh#Bam9o3gl0J z8-l!Wn^y_-C<#JWq)2rIpx;5^#;Um*Bx>zhmBQ7h2ssS8lEE}}_2K2LDt;f|1QR6- z^FEtM!A6%h5og8mh2^3NousE74NqPz)do3p*DH!PR!zd6h!jk%zeCJ@GQR*fR(RJH zOpG4h^EUZ>b7cxrfz#%}?X%qQR5@5i?3tV+v#oQr~HW+x3np9FJn;W9^6WcS5tXznbUrVIc`qsHGG(ot6T zcF}@f&eFqcs{FHVcRfNlk9)?2$mcQ6vXRRodh(ztx~PR|hvEPqfg!C$!i;Rxg50U| z_bu;?QEhhdYWQA*7mtXwAS#EutXVkFkmm*d^P6O6&4!CXlP!l1i}u4`DxNJa@$^tk zNc~7%Tcd=c)%BuaXdAaFEyi7~Xd+;Q*aB7hY-8L2tbd@=}IS3*OZ6j&)l zxQH_z6G@b7q?Fi`Q4ZMn3_ntqI02m=F1+Ll263kwZ#F7yu%8Kz<^dn)`>OjEo}W znO;AL00ZNL|Nme_uD=*DP1!Y15GVBThJ_K!+wf$SBco3A$vh2@cAEUF%-NE;bhxf! zBpen??|74THZWImqB~XuN4+he*)YZcZf-)g{djQBOb6Fs(7tkOj*$H+X@WUVO}lug zKSI5>vsO1_9%9z)NMp75kbnAjrW^eNVpnu>f~i3jA&yG&qDYO5Jcz8KQAb*EHMz5( zC0P+OiOk1V#u4&mcUhHaIkT^n6*GoDqqH=*fH7eDdf-DG*ZL`MhT4UapJD2Y%4V!!N)7P zOkeSxx&mo4Er!Kz^Gi*2e&Ai=##@eq3rea1S=_$;p2B;BAsFCu(Jh`jHr>%jA3|{G z)Mc1LE?Vv!`Yd|=yZePz#DXwB!RT4mHZbcBy+*9a68S8KYE?H0BqeeC*bz zhuRv(jrzgf=Yc_OYVRwbCBhb%&IsMoSL#FsexrC(Xo&j2iXm_C3XY|BRL$@J#$2*4 z7=aEE()xTtGlXz^SYMl1krvmlHA=5_hmkYviWCLE--#(HF`^?|H?P7>id%FN;GKA@ z&2UKEnA%>BlWtHftzw*VTYg0%-@_GTp!ud5pz}o)%pqk(M_=qr3Svcb4lE?9d%Y9_%@ac z>wN&cREN6bF{~6hKW-R3f$cbs$7cVuaD0o>j~E~K`cduZ&t1JK^*Gi)Hd zrd}oL-sD@&ikT;CKrt&fN3N!--IX(8F&W1O6uKCLpW@!3QI(juGVEOuLv`_Jjd}|Z z4_D9Ked)k=<~b~_nmro2mqn3D726+}9o%|Wv~YJLS4-A(bfXMH;C&4H?m}p849qPM zt1D0l`kKviC8nQbai<6%MR2E?>1w1F-b}A_3%GtaNp&?DQ7Wkkk<2BS8>d!h23f^M z5llWz+cCRm-8EQs&qT1E3_{PN%wV%MUS)dHGD-JJnJlB2uUygRB_7srASE!{cLd{& zm)WS4uUG%~W=^@pY7W4O+`ZHkEdb?23CeHmYS7FFO(!O4ta+8mPkaETA zetvwF;jS!TRofR!^U#B)6u(2KGaBakjjn7G#`Kl(e07O33{p@#-omP52O#) zHma3v2c0k&HBef@H`{uXW6dKKfKVGQl%UZ&pS+Zuov|%K@GKERnZ1{ITJocKqy;xJ zbGEl0X&KegdroIit;m;rmDOBB|zzY8bqU$@kMipqDI^U-shkD5u^ zFU+Ws0LSblJWzL6dBEf1qf0!hv-5r*B_VLi|FE0J5K;jVqy1DJ%D4TA$9LrjVQvany>Facs`ssW}!4TO+0r$^lDaK8!oY(lD)Q zaC2;-tnw0j(D_RM^A%o6ba3%O}{bFub2*3wmN8Q%~QzT6CkM|j?SVXUpAzq*0r#y z9&|0-^>rKV*)=k>gKKYlw9blG7HnE9HirCCqe~_oycRT!YAzqeew9LFa?TRlCvPu~ z(uG8*a=Day10ZMOE5COsASeM(hYiOMiV)-0P&xLF38qEm0ctUj+c=G4-W(!*R(Gsf zvIoL^VNP@Kix;KXjmpX|a2y+L_rUHsS0WUu(=iU+(?oLFNxjlq_(UPu&NH({^oS)y zxvT)}2gTC{|94oz{9h67_S)X+FZF$3Xc#jmNZ_|@Vz%i^CPyu$BjKqK=3yc;HLTS# z{)$S;ocfjdv)~CDv_vG#isaJ7on>-r3R|w&8w7d&Z0&a^X*SqajQs2!qw-Sn+jRsYZQsUh#1<_ zlWQXTsP^JYf^k)_-oD($Q}fDGt>#4V^IhT?v(bEw)l( z^zPM9h%QiDITOG|&dM!&)6c_w)rM>Nc9Jv(DY0UCf7n^H3OuZl!pF2e9V=c1CQU@3 z$^?x;=xnQ%ooRr)q_?)YUI-k-{yZLpsxu|?vMCTVT6YrR z5mIldGb-D?KY!KSqgA?A;>)TkGz`k!9da*>#PSw0%^)L&PtEI{#vJ_T!(}S`U8p=U z^ISh?C*uj}R>tV%7J{`!!cxhKpmGP-{x~}CeJ}q{M5Ja+hK*z5;xpI_VUprA@UT1g ziZ+H@WdgV*682D-WZU?s(zY(ff-wX9L7~7bk(}g&VJ&)o_6}7Gdo0|Aj3mfnrbPp- z83}EUL;XmKgKIbyJpabYqN4>5Y5&?#F zylJ_IJ*Mu%_-r)kd$q4ll!>!fspU^twKkt|1`Jpu)S1EW;YLQRlY$bg2;PQ6SiHZQ zcE=+ST~~<~lQhb}pQ`RHkN3@U9@ok!4x?gD7Yr?iD%*S*k||pXUCbQ*j=u_4dSScf znCj+Ee*vqP%@1ePKR8yc&G@Fkz&qxB5R7BR_P)9sZua1ly0Z4qG=XeB6?t^~U{@&a z2ItrDrM*-xNh=2FC%mA3F;04^dPO?6uT-d^q&{DainR6*G~Lt?N0y89mP$*NyL-<9 zkMyuOweYrHY5tH)elZ+?LJ8|C3Zkb3$8+CQ@T01_Y;$N%vKP_mrX#1~s?ACnfe7!q zGOW9kTALvKRis<5BNCo}E~|dJ%Gkb96Op6kVyH1y08)f-w;4Mg6{Qfk*EI{$Bdm-N z$a3tE92P+ar_J_k>9mt7*rS&UwUZMLj-~@+meRjKQ^M|4e3#utN4krd`4)yUA_N*$ z69n-2t44cEAX3;{a~U94?Ac}PQ|I$DoRiG=-cOybEsS(Sikd~ve0}ScofDO)dM7<0 z)k`4MQ>M0ed++V3KiMLr(|vM`PlaYMV8e@86`VQvDdly$o4KXipQ2jVC{cYmj9|Im&cDNz;od| zRFflPy*=xnNL$vlg^#Ao`39s8-kAxcagw8~7LYfE zSuR?72ZM}F;O{NB&%;a3mw-z*?{va0f>WwW_GH35Hvn)lLSt*Xxv^{)j4~WNt+=Wq z(wf@Yq$(};9G+L+DkzN8h96hLoiiJ?YNArx-JdswxnzptZv|J}xe7I+$i&>W#w+g( z0H->}7=Yq`$yIAeV8hD8Ym5ACO&RB5hNYv0nv0{8D~Fkr(_i)ESXH}+T6@po4MFGW zdqG=S#vW%|5|JnMQv4q1S>?5qcVXp;r}HE8*$1x$L`8ojoX&XE7Ya%#&9y}HYAd5T zjk6p?Q@q~$Fu=!zhgKB)154NiEM0#KZj+&+&JESF3{<2Lq6x(;kk9If)UuklvN;_? z8|0=ep^{}={W9<+P^!6XFS#pfg`k+*2)q%~D75aM(?9UxX58B!HLX`4o1YlmLDH7g zCTsQv1m@UVXlJfR?*uY&%64I2^K=ayV$JC&PwABQNVeEgicKGkiV=FkOWm!a(~cJc z_8CB0P3EVR&}}yUJIqG}kqY`@?mdLT8}{!Ig!_A-EF4{|9wP z8^9bMPhFRYFPEe+1rhiP2%Nv9na8=j5r);kS@7_Ht}lK(*#S(CVU^(b|2y$>ZZt0(q+vYXU&34#>8mer)O_PoP=8>JMhbJ^*iyHskaipbql z&Z`zXLGAtlSL=bID0M`iLM?8LmV642u^%_bLfD8E@UStde~aRWiVTdQl%s>2m4lm! z)@vs#SL4U+>CqZe?cyY8ztLrFdtcYlNnF#}8IujZaWIu<7YNoE2KMJz%sr^e1}6@WGN4qsXnNA)Z;P_;_;CL@t%h78)u znDZ@*6rGeM#F-EKa_8Hi%f}U0)4a}=NYp&|?by&hN$4yEfscO3hTpq+L*w9t4Uc)E z0++7x^28O`0ahJ-mSM?FkpLDp?1ci8VaBKFlqSr395F02+NJCm*Hb%88$=QyR!pX_exZs{ZBuVg#aoMbPZe!REJ*9XpP=T z@a7x$&)=_bSW~g7WPZ$|`B7={!UvYNZ}KT!V|KIV_qq^gr%#vccGCA+t$lhYrZM7w zHLn0(gf-?5TeSSZ!fJ}Ja0D>F8hpPuwjLXN|8D=i-S?-T-@6fyZLnYV^x>Z${QRG$ z*qflykzZEyK<1C3_?I;KbLsCX;V~orGGoGj%a4C9{GB2nGsZ9TAo{=gV)(D@^G{O!bNTQ1dOYaA?D+$v{=>0XQ$$31oP_r9ggo%ZGTGy|{{eRo Bq<{bb literal 0 HcmV?d00001