2384 lines
88 KiB
Plaintext
2384 lines
88 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Test function create in module cli"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Install\n",
|
|
"\n",
|
|
"This notebook requires a Python 2.7 environment and an OpenRefine server running at http://127.0.0.1:3333."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[33mDEPRECATION: Python 2.7 will reach the end of its life on January 1st, 2020. Please upgrade your Python as Python 2.7 won't be maintained after that date. A future version of pip will drop support for Python 2.7. More details about Python 2 support in pip, can be found at https://pip.pypa.io/en/latest/development/release-process/#python-2-support\u001b[0m\n",
|
|
"Processing /home/felix/git/openrefine-client\n",
|
|
"Requirement already satisfied, skipping upgrade: urllib2_file in /home/felix/.local/lib/python2.7/site-packages (from openrefine-client==0.3.7) (0.2.1)\n",
|
|
"Installing collected packages: openrefine-client\n",
|
|
" Found existing installation: openrefine-client 0.3.7\n",
|
|
" Uninstalling openrefine-client-0.3.7:\n",
|
|
" Successfully uninstalled openrefine-client-0.3.7\n",
|
|
" Running setup.py install for openrefine-client ... \u001b[?25ldone\n",
|
|
"\u001b[?25hSuccessfully installed openrefine-client-0.3.7\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import sys\n",
|
|
"!{sys.executable} -m pip install .. --user --upgrade"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from google.refine import cli"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## CSV"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1618143866116\n",
|
|
"rows: 10\n",
|
|
" id: 1618143866116\n",
|
|
" url: http://127.0.0.1:3333/project?project=1618143866116\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:53Z\n",
|
|
" created: 2019-08-20T02:12:53Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"Project 1618143866116 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### encoding\n",
|
|
"\n",
|
|
"check TV symbol in line 1"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1676755759011\n",
|
|
"rows: 10\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: ðº)\t1\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"Project 1676755759011 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', encoding='ISO-8859-1')\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1970849280401\n",
|
|
"rows: 10\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"Project 1970849280401 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', encoding='UTF-8')\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### guessCellValueTypes\n",
|
|
"\n",
|
|
"check OpenRefine GUI at url below: numbers should be green"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2231557582225\n",
|
|
"rows: 10\n",
|
|
" id: 2231557582225\n",
|
|
" url: http://127.0.0.1:3333/project?project=2231557582225\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:53Z\n",
|
|
" created: 2019-08-20T02:12:53Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': True, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', guessCellValueTypes=True)\n",
|
|
"cli.info(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Project 2231557582225 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### headerLines\n",
|
|
"\n",
|
|
"check column names, should be Column 1..."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2294888751269\n",
|
|
"rows: 11\n",
|
|
"Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\tColumn 7\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"Project 2294888751269 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', headerLines=0)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### ignoreLines\n",
|
|
"\n",
|
|
"check column names, should start with arthur.duff as header"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1990694976789\n",
|
|
"rows: 5\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"Project 1990694976789 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', ignoreLines=5)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### limit\n",
|
|
"\n",
|
|
"should contain 5 rows"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1834697810094\n",
|
|
"rows: 5\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"Project 1834697810094 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', limit=5)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### separator and processQuotes\n",
|
|
"\n",
|
|
"should contain 10 rows and 2 columns (Column 2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1745680810911\n",
|
|
"rows: 10\n",
|
|
"email,name,state,gender,purchase,count,date\tColumn 2\n",
|
|
"\"danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: 📺),1,\"\"Wed, 4 Jul 2001\"\t\n",
|
|
"melanie.white@example2.edu,Melanie White,NC,F,<iPhone>,1,2001-07-04T12:08:56\t\n",
|
|
"danny.baron@example1.com, D.\t\"(\"\"Tab\"\") Baron,CA,M,Winter jacket,1,2001-07-04\"\n",
|
|
"ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04\t\n",
|
|
"arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07\t\n",
|
|
"danny.baron@example1.com,Daniel Baron,,,Bike,1,2001\t\n",
|
|
"jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000\t\n",
|
|
"melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999\t\n",
|
|
"ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998\t\n",
|
|
"arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997\t\n",
|
|
"Project 1745680810911 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', separator=' ', processQuotes=False)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### projectName"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2022088294800\n",
|
|
"rows: 10\n",
|
|
" id: 2022088294800\n",
|
|
" url: http://127.0.0.1:3333/project?project=2022088294800\n",
|
|
" name: foo\n",
|
|
" modified: 2019-08-20T02:12:53Z\n",
|
|
" created: 2019-08-20T02:12:53Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'foo', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"Project 2022088294800 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', projectName='foo')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### projectTags (introduced in OpenRefine 2.8)\n",
|
|
"\n",
|
|
"check manually at http://127.0.0.1:3333 > Open Project if tags where stored"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2228120867351\n",
|
|
"rows: 10\n",
|
|
" id: 2228120867351\n",
|
|
" url: http://127.0.0.1:3333/project?project=2228120867351\n",
|
|
" name: duplicates\n",
|
|
" tags: [u'client1', u'beta']\n",
|
|
" modified: 2019-08-20T02:12:53Z\n",
|
|
" created: 2019-08-20T02:12:53Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'projectTags': [u'client1', u'beta'], u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', projectTags=['client1', 'beta'])\n",
|
|
"cli.info(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Project 2228120867351 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### skipDataLines\n",
|
|
"\n",
|
|
"should contain 5 rows"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1725478809832\n",
|
|
"rows: 5\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"Project 1725478809832 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', skipDataLines=5)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### storeBlankCellsAsNulls\n",
|
|
"\n",
|
|
"check OpenRefine GUI at url below:\n",
|
|
"* All > View > Show/Hide 'null' values in cells\n",
|
|
"* row 6 should contain null values in columns state and gender"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2533896794214\n",
|
|
"rows: 10\n",
|
|
" id: 2533896794214\n",
|
|
" url: http://127.0.0.1:3333/project?project=2533896794214\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': True, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', guessCellValueTypes=True)\n",
|
|
"cli.info(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Project 2533896794214 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## TSV"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2281824651803\n",
|
|
"rows: 10\n",
|
|
" id: 2281824651803\n",
|
|
" url: http://127.0.0.1:3333/project?project=2281824651803\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\"D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"Project 2281824651803 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.tsv')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## JSON"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2534262116323\n",
|
|
"rows: 10\n",
|
|
" id: 2534262116323\n",
|
|
" url: http://127.0.0.1:3333/project?project=2534262116323\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: _ - name\n",
|
|
" column 002: _ - date\n",
|
|
" column 003: _ - email\n",
|
|
" column 004: _ - state\n",
|
|
" column 005: _ - count\n",
|
|
" column 006: _ - gender\n",
|
|
" column 007: _ - purchase\n",
|
|
"_ - name\t_ - date\t_ - email\t_ - state\t_ - count\t_ - gender\t_ - purchase\n",
|
|
"Danny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\tCA\t1\tM\tTV (UTF-8: 📺)\n",
|
|
"Melanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\tNC\t1\tF\t<iPhone>\n",
|
|
"\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\tCA\t1\tM\tWinter jacket\n",
|
|
"Ben Tyler\t2001/07/04\tben.tyler@example3.org\tNV\t1\tM\tFlashlight\n",
|
|
"Arthur Duff\t2001-07\tarthur.duff@example4.com\tOR\t1\tM\tDining table\n",
|
|
"Daniel Baron\t2001\tdanny.baron@example1.com\t\t1\t\tBike\n",
|
|
"Jean Griffith\t2000\tjean.griffith@example5.org\tWA\t1\tF\tPower drill\n",
|
|
"Melanie White\t1999\tmelanie.white@example2.edu\tNC\t1\tF\t'iPad'\n",
|
|
"Ben Morisson\t1998\tben.morisson@example6.org\tFL\t1\tM\tAmplifier\n",
|
|
"Arthur Duff\t1997\tarthur.duff@example4.com\tOR\t1\tM\tNight table\n",
|
|
"Project 2534262116323 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.json')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### trimStrings (broken, does not work in the GUI either)\n",
|
|
"\n",
|
|
"check row 3 if spaces before `D.` are deleted"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2495073177504\n",
|
|
"rows: 10\n",
|
|
" id: 2495073177504\n",
|
|
" url: http://127.0.0.1:3333/project?project=2495073177504\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': True, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: _ - name\n",
|
|
" column 002: _ - date\n",
|
|
" column 003: _ - email\n",
|
|
" column 004: _ - state\n",
|
|
" column 005: _ - count\n",
|
|
" column 006: _ - gender\n",
|
|
" column 007: _ - purchase\n",
|
|
"_ - name\t_ - date\t_ - email\t_ - state\t_ - count\t_ - gender\t_ - purchase\n",
|
|
"Danny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\tCA\t1\tM\tTV (UTF-8: 📺)\n",
|
|
"Melanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\tNC\t1\tF\t<iPhone>\n",
|
|
"\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\tCA\t1\tM\tWinter jacket\n",
|
|
"Ben Tyler\t2001/07/04\tben.tyler@example3.org\tNV\t1\tM\tFlashlight\n",
|
|
"Arthur Duff\t2001-07\tarthur.duff@example4.com\tOR\t1\tM\tDining table\n",
|
|
"Daniel Baron\t2001\tdanny.baron@example1.com\t\t1\t\tBike\n",
|
|
"Jean Griffith\t2000\tjean.griffith@example5.org\tWA\t1\tF\tPower drill\n",
|
|
"Melanie White\t1999\tmelanie.white@example2.edu\tNC\t1\tF\t'iPad'\n",
|
|
"Ben Morisson\t1998\tben.morisson@example6.org\tFL\t1\tM\tAmplifier\n",
|
|
"Arthur Duff\t1997\tarthur.duff@example4.com\tOR\t1\tM\tNight table\n",
|
|
"Project 2495073177504 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.json', trimStrings=True)\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### recordPath"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1671966444040\n",
|
|
"rows: 10\n",
|
|
" id: 1671966444040\n",
|
|
" url: http://127.0.0.1:3333/project?project=1671966444040\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_', u'purchase'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: purchase\n",
|
|
"purchase\n",
|
|
"TV (UTF-8: 📺)\n",
|
|
"<iPhone>\n",
|
|
"Winter jacket\n",
|
|
"Flashlight\n",
|
|
"Dining table\n",
|
|
"Bike\n",
|
|
"Power drill\n",
|
|
"'iPad'\n",
|
|
"Amplifier\n",
|
|
"Night table\n",
|
|
"Project 1671966444040 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.json', recordPath=['_', '_', 'purchase'])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### storeEmptyStrings\n",
|
|
"\n",
|
|
"default: True; set to False for null values\n",
|
|
"\n",
|
|
"check OpenRefine GUI at url below:\n",
|
|
"* All > View > Show/Hide 'null' values in cells\n",
|
|
"* row 6 should contain null values in columns state and gender"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2078676878032\n",
|
|
"rows: 10\n",
|
|
" id: 2078676878032\n",
|
|
" url: http://127.0.0.1:3333/project?project=2078676878032\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': False, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: _ - name\n",
|
|
" column 002: _ - date\n",
|
|
" column 003: _ - email\n",
|
|
" column 004: _ - count\n",
|
|
" column 005: _ - purchase\n",
|
|
" column 006: _ - state\n",
|
|
" column 007: _ - gender\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.json', storeEmptyStrings=False)\n",
|
|
"cli.info(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Project 2078676878032 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## XML"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2264312539076\n",
|
|
"rows: 80\n",
|
|
" id: 2264312539076\n",
|
|
" url: http://127.0.0.1:3333/project?project=2264312539076\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 80\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: root\n",
|
|
" column 002: root - record\n",
|
|
" column 003: root - record - name\n",
|
|
" column 004: root - record - date\n",
|
|
" column 005: root - record - email\n",
|
|
" column 006: root - record - count\n",
|
|
" column 007: root - record - purchase\n",
|
|
" column 008: root - record - state\n",
|
|
" column 009: root - record - gender\n",
|
|
"root\troot - record\troot - record - name\troot - record - date\troot - record - email\troot - record - count\troot - record - purchase\troot - record - state\troot - record - gender\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\tDanny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\t1\tTV (UTF-8: 📺)\tCA\tM\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\tMelanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\t1\t<iPhone>\tNC\tF\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
"\"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\t1\tWinter jacket\tCA\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tBen Tyler\t2001/07/04\tben.tyler@example3.org\t1\tFlashlight\tNV\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tArthur Duff\t2001-07\tarthur.duff@example4.com\t1\tDining table\tOR\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tDaniel Baron\t2001\tdanny.baron@example1.com\t1\tBike\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tJean Griffith\t2000\tjean.griffith@example5.org\t1\tPower drill\tWA\tF\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tMelanie White\t1999\tmelanie.white@example2.edu\t1\t'iPad'\tNC\tF\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tBen Morisson\t1998\tben.morisson@example6.org\t1\tAmplifier\tFL\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tArthur Duff\t1997\tarthur.duff@example4.com\t1\tNight table\tOR\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"Project 2264312539076 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.xml')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### trimStrings (broken, does not work in the GUI either)\n",
|
|
"\n",
|
|
"check if spaces before `D.` are deleted"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1917953863988\n",
|
|
"rows: 80\n",
|
|
" id: 1917953863988\n",
|
|
" url: http://127.0.0.1:3333/project?project=1917953863988\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 80\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': True, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: root\n",
|
|
" column 002: root - record\n",
|
|
" column 003: root - record - name\n",
|
|
" column 004: root - record - date\n",
|
|
" column 005: root - record - email\n",
|
|
" column 006: root - record - count\n",
|
|
" column 007: root - record - purchase\n",
|
|
" column 008: root - record - state\n",
|
|
" column 009: root - record - gender\n",
|
|
"root\troot - record\troot - record - name\troot - record - date\troot - record - email\troot - record - count\troot - record - purchase\troot - record - state\troot - record - gender\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\tDanny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\t1\tTV (UTF-8: 📺)\tCA\tM\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\tMelanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\t1\t<iPhone>\tNC\tF\n",
|
|
"\"\n",
|
|
" \"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\"\n",
|
|
"\"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\t1\tWinter jacket\tCA\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tBen Tyler\t2001/07/04\tben.tyler@example3.org\t1\tFlashlight\tNV\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tArthur Duff\t2001-07\tarthur.duff@example4.com\t1\tDining table\tOR\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tDaniel Baron\t2001\tdanny.baron@example1.com\t1\tBike\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tJean Griffith\t2000\tjean.griffith@example5.org\t1\tPower drill\tWA\tF\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tMelanie White\t1999\tmelanie.white@example2.edu\t1\t'iPad'\tNC\tF\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tBen Morisson\t1998\tben.morisson@example6.org\t1\tAmplifier\tFL\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\tArthur Duff\t1997\tarthur.duff@example4.com\t1\tNight table\tOR\tM\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"\t\"\n",
|
|
" \"\t\t\t\t\t\t\t\n",
|
|
"Project 1917953863988 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.xml', trimStrings=True)\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### recordPath"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2293178566671\n",
|
|
"rows: 10\n",
|
|
" id: 2293178566671\n",
|
|
" url: http://127.0.0.1:3333/project?project=2293178566671\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root', u'record', u'purchase'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: purchase\n",
|
|
"purchase\n",
|
|
"TV (UTF-8: 📺)\n",
|
|
"<iPhone>\n",
|
|
"Winter jacket\n",
|
|
"Flashlight\n",
|
|
"Dining table\n",
|
|
"Bike\n",
|
|
"Power drill\n",
|
|
"'iPad'\n",
|
|
"Amplifier\n",
|
|
"Night table\n",
|
|
"Project 2293178566671 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.xml', recordPath=['root', 'record', 'purchase'])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### storeEmptyStrings\n",
|
|
"\n",
|
|
"default: True; set to False for null values\n",
|
|
"\n",
|
|
"check OpenRefine GUI at url below:\n",
|
|
"* All > View > Show/Hide 'null' values in cells\n",
|
|
"* row 6 should contain null values in columns state and gender"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2438123269695\n",
|
|
"rows: 10\n",
|
|
" id: 2438123269695\n",
|
|
" url: http://127.0.0.1:3333/project?project=2438123269695\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:54Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': False, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.csv', storeEmptyStrings=False)\n",
|
|
"cli.info(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 28,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Project 2438123269695 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## TXT"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default (line-based)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1913292396645\n",
|
|
"rows: 11\n",
|
|
" id: 1913292396645\n",
|
|
" url: http://127.0.0.1:3333/project?project=1913292396645\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:55Z\n",
|
|
" created: 2019-08-20T02:12:54Z\n",
|
|
" rowCount: 11\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'ignoreLines': -1, u'projectName': u'duplicates', u'processQuotes': True, u'skipDataLines': -1, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n",
|
|
" column 001: Column 1\n",
|
|
"Column 1\n",
|
|
"email name state gender purchase count date \n",
|
|
"danny.baron@example1.com Danny Baron CA M TV (UTF-8: 📺) 1 Wed, 4 Jul 2001 \n",
|
|
"melanie.white@example2.edu Melanie White NC F <iPhone> 1 2001-07-04T12:08:5\n",
|
|
"\"danny.baron@example1.com D.\t(\"\"Tab\"\") Baron CA M Winter jacket 1 2001-07-04 \"\n",
|
|
"ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 \n",
|
|
"arthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 \n",
|
|
"danny.baron@example1.com Daniel Baron Bike 1 2001 \n",
|
|
"jean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 \n",
|
|
"melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 \n",
|
|
"ben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 \n",
|
|
"arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 \n",
|
|
"Project 1913292396645 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.txt')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### linesPerRow\n",
|
|
"\n",
|
|
"should return 6 rows in 2 columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1958513543951\n",
|
|
"rows: 6\n",
|
|
" id: 1958513543951\n",
|
|
" url: http://127.0.0.1:3333/project?project=1958513543951\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:55Z\n",
|
|
" created: 2019-08-20T02:12:55Z\n",
|
|
" rowCount: 6\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'ignoreLines': -1, u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'skipDataLines': -1, u'separator': u',', u'trimStrings': False, u'linesPerRow': 2, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n",
|
|
" column 001: Column 1\n",
|
|
" column 002: Column 2\n",
|
|
"Column 1\tColumn 2\n",
|
|
"email name state gender purchase count date \tdanny.baron@example1.com Danny Baron CA M TV (UTF-8: 📺) 1 Wed, 4 Jul 2001 \n",
|
|
"melanie.white@example2.edu Melanie White NC F <iPhone> 1 2001-07-04T12:08:5\t\"danny.baron@example1.com D.\t(\"\"Tab\"\") Baron CA M Winter jacket 1 2001-07-04 \"\n",
|
|
"ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 \tarthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 \n",
|
|
"danny.baron@example1.com Daniel Baron Bike 1 2001 \tjean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 \n",
|
|
"melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 \tben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 \n",
|
|
"arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 \t\n",
|
|
"Project 1958513543951 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.txt', linesPerRow=2)\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### fixed-width: columnWidths and headerLines"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1703842312470\n",
|
|
"rows: 10\n",
|
|
" id: 1703842312470\n",
|
|
" url: http://127.0.0.1:3333/project?project=1703842312470\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:55Z\n",
|
|
" created: 2019-08-20T02:12:55Z\n",
|
|
" rowCount: 10\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'separator': u',', u'trimStrings': False, u'columnWidths': [27, 21, 6, 7, 15, 6, 1000], u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 1}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com \tDanny Baron \tCA \tM \tTV (UTF-8: 📺) \t1 \tWed, 4 Jul 2001 \n",
|
|
"melanie.white@example2.edu \tMelanie White \tNC \tF \t<iPhone> \t1 \t2001-07-04T12:08:5\n",
|
|
"danny.baron@example1.com \t\" D.\t(\"\"Tab\"\") Baron \"\tCA \tM \tWinter jacket \t1 \t2001-07-04 \n",
|
|
"ben.tyler@example3.org \tBen Tyler \tNV \tM \tFlashlight \t1 \t2001/07/04 \n",
|
|
"arthur.duff@example4.com \tArthur Duff \tOR \tM \tDining table \t1 \t2001-07 \n",
|
|
"danny.baron@example1.com \tDaniel Baron \t \t \tBike \t1 \t2001 \n",
|
|
"jean.griffith@example5.org \tJean Griffith \tWA \tF \tPower drill \t1 \t2000 \n",
|
|
"melanie.white@example2.edu \tMelanie White \tNC \tF \t'iPad' \t1 \t1999 \n",
|
|
"ben.morisson@example6.org \tBen Morisson \tFL \tM \tAmplifier \t1 \t1998 \n",
|
|
"arthur.duff@example4.com \tArthur Duff \tOR \tM \tNight table \t1 \t1997 \n",
|
|
"Project 1703842312470 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.txt', columnWidths=[27, 21, 6, 7, 15, 6, 1000], headerLines=1)\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## ZIP"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default\n",
|
|
"\n",
|
|
"should contain 16 rows"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2381217278039\n",
|
|
"rows: 16\n",
|
|
" id: 2381217278039\n",
|
|
" url: http://127.0.0.1:3333/project?project=2381217278039\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:55Z\n",
|
|
" created: 2019-08-20T02:12:55Z\n",
|
|
" rowCount: 16\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}, {u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺), Winter jacket, bike\t3\tWed, 4 Jul 2001, 2001-07-04, 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>, 'iPad'\t2\t2001-07-04T12:08:56, 1999\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2\t2001-07, 1997\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"Project 2381217278039 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.zip')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### includeFileSources\n",
|
|
"\n",
|
|
"should contain column File"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2314884555837\n",
|
|
"rows: 16\n",
|
|
" id: 2314884555837\n",
|
|
" url: http://127.0.0.1:3333/project?project=2314884555837\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:12:55Z\n",
|
|
" created: 2019-08-20T02:12:55Z\n",
|
|
" rowCount: 16\n",
|
|
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': True}, {u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': True}]\n",
|
|
" column 001: File\n",
|
|
" column 002: email\n",
|
|
" column 003: name\n",
|
|
" column 004: state\n",
|
|
" column 005: gender\n",
|
|
" column 006: purchase\n",
|
|
" column 007: count\n",
|
|
" column 008: date\n",
|
|
"File\temail\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"duplicates.csv\tdanny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n",
|
|
"duplicates.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1\t2001-07-04T12:08:56\n",
|
|
"duplicates.csv\tdanny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n",
|
|
"duplicates.csv\tben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"duplicates.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n",
|
|
"duplicates.csv\tdanny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n",
|
|
"duplicates.csv\tjean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"duplicates.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n",
|
|
"duplicates.csv\tben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"duplicates.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n",
|
|
"duplicates2.csv\tdanny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺), Winter jacket, bike\t3\tWed, 4 Jul 2001, 2001-07-04, 2001\n",
|
|
"duplicates2.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>, 'iPad'\t2\t2001-07-04T12:08:56, 1999\n",
|
|
"duplicates2.csv\tben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n",
|
|
"duplicates2.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2\t2001-07, 1997\n",
|
|
"duplicates2.csv\tjean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n",
|
|
"duplicates2.csv\tben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n",
|
|
"Project 2314884555837 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.zip', includeFileSources=True)\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## ODS (broken in OpenRefine >=2.8)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default\n",
|
|
"\n",
|
|
"many blank columns and rows in OpenRefine <=2.7 (also with manual import via GUI)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1620818141127\n",
|
|
"rows: 11\n",
|
|
" id: 1620818141127\n",
|
|
" url: http://127.0.0.1:3333/project?project=1620818141127\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:13:41Z\n",
|
|
" created: 2019-08-20T02:13:41Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
" column 008: Column\n",
|
|
" column 009: Column 9\n",
|
|
" column 010: Column 10\n",
|
|
" column 011: Column 11\n",
|
|
" column 012: Column 12\n",
|
|
" column 013: Column 13\n",
|
|
" column 014: Column 14\n",
|
|
" column 015: Column 15\n",
|
|
" column 016: Column 16\n",
|
|
" column 017: Column 17\n",
|
|
" column 018: Column 18\n",
|
|
" column 019: Column 19\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\tColumn\tColumn 9\tColumn 10\tColumn 11\tColumn 12\tColumn 13\tColumn 14\tColumn 15\tColumn 16\tColumn 17\tColumn 18\tColumn 19\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"Project 1620818141127 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.ods')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### sheets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"first sheet from file with 2 sheets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1985853059017\n",
|
|
"rows: 11\n",
|
|
" id: 1985853059017\n",
|
|
" url: http://127.0.0.1:3333/project?project=1985853059017\n",
|
|
" name: duplicates2\n",
|
|
" modified: 2019-08-20T02:13:47Z\n",
|
|
" created: 2019-08-20T02:13:47Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
" column 008: Column\n",
|
|
" column 009: Column 9\n",
|
|
" column 010: Column 10\n",
|
|
" column 011: Column 11\n",
|
|
" column 012: Column 12\n",
|
|
" column 013: Column 13\n",
|
|
" column 014: Column 14\n",
|
|
" column 015: Column 15\n",
|
|
" column 016: Column 16\n",
|
|
" column 017: Column 17\n",
|
|
" column 018: Column 18\n",
|
|
" column 019: Column 19\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\tColumn\tColumn 9\tColumn 10\tColumn 11\tColumn 12\tColumn 13\tColumn 14\tColumn 15\tColumn 16\tColumn 17\tColumn 18\tColumn 19\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"Project 1985853059017 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates2.ods', sheets=[0])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"both sheets from file with 2 sheets: should contain 16 rows"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2325827930833\n",
|
|
"rows: 18\n",
|
|
" id: 2325827930833\n",
|
|
" url: http://127.0.0.1:3333/project?project=2325827930833\n",
|
|
" name: duplicates2\n",
|
|
" modified: 2019-08-20T02:13:49Z\n",
|
|
" created: 2019-08-20T02:13:49Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
" column 008: Column\n",
|
|
" column 009: Column 9\n",
|
|
" column 010: Column 10\n",
|
|
" column 011: Column 11\n",
|
|
" column 012: Column 12\n",
|
|
" column 013: Column 13\n",
|
|
" column 014: Column 14\n",
|
|
" column 015: Column 15\n",
|
|
" column 016: Column 16\n",
|
|
" column 017: Column 17\n",
|
|
" column 018: Column 18\n",
|
|
" column 019: Column 19\n",
|
|
" column 020: Column 20\n",
|
|
" column 021: Column 21\n",
|
|
" column 022: Column 22\n",
|
|
" column 023: Column 23\n",
|
|
" column 024: Column 24\n",
|
|
" column 025: Column 25\n",
|
|
" column 026: Column 26\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\tColumn\tColumn 9\tColumn 10\tColumn 11\tColumn 12\tColumn 13\tColumn 14\tColumn 15\tColumn 16\tColumn 17\tColumn 18\tColumn 19\tColumn 20\tColumn 21\tColumn 22\tColumn 23\tColumn 24\tColumn 25\tColumn 26\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺), Winter jacket, bike\t3.0\tWed, 4 Jul 2001, 2001-07-04, 2001\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>, 'iPad'\t2.0\t2001-07-04T12:08:56, 1999\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2.0\t2001-07, 1997\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\n",
|
|
"Project 2325827930833 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates2.ods', sheets=[0, 1])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## XLS (broken in OpenRefine >=2.8)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 39,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1607123650693\n",
|
|
"rows: 10\n",
|
|
" id: 1607123650693\n",
|
|
" url: http://127.0.0.1:3333/project?project=1607123650693\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:13:52Z\n",
|
|
" created: 2019-08-20T02:13:52Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D. (\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n",
|
|
"Project 1607123650693 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.xls')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### sheets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"first sheet from file with 2 sheets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2439816728218\n",
|
|
"rows: 10\n",
|
|
" id: 2439816728218\n",
|
|
" url: http://127.0.0.1:3333/project?project=2439816728218\n",
|
|
" name: duplicates2\n",
|
|
" modified: 2019-08-20T02:13:58Z\n",
|
|
" created: 2019-08-20T02:13:58Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n",
|
|
"Project 2439816728218 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates2.xls', sheets=[0])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"both sheets from file with 2 sheets: should contain 16 rows"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 41,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1954256360738\n",
|
|
"rows: 16\n",
|
|
" id: 1954256360738\n",
|
|
" url: http://127.0.0.1:3333/project?project=1954256360738\n",
|
|
" name: duplicates2\n",
|
|
" modified: 2019-08-20T02:13:59Z\n",
|
|
" created: 2019-08-20T02:13:59Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺), Winter jacket, bike\t3.0\tWed, 4 Jul 2001, 2001-07-04, 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>, 'iPad'\t2.0\t2001-07-04T12:08:56, 1999\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2.0\t2001-07, 1997\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"Project 1954256360738 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates2.xls', sheets=[0, 1])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## XLSX (broken in OpenRefine >=2.8)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### default"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 2423289296267\n",
|
|
"rows: 10\n",
|
|
" id: 2423289296267\n",
|
|
" url: http://127.0.0.1:3333/project?project=2423289296267\n",
|
|
" name: duplicates\n",
|
|
" modified: 2019-08-20T02:14:01Z\n",
|
|
" created: 2019-08-20T02:14:01Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D. (\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n",
|
|
"Project 2423289296267 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates.xlsx')\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### sheets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"first sheet from file with 2 sheets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 43,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1593486586431\n",
|
|
"rows: 10\n",
|
|
" id: 1593486586431\n",
|
|
" url: http://127.0.0.1:3333/project?project=1593486586431\n",
|
|
" name: duplicates2\n",
|
|
" modified: 2019-08-20T02:14:04Z\n",
|
|
" created: 2019-08-20T02:14:04Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n",
|
|
"Project 1593486586431 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates2.xlsx', sheets=[0])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"both sheets from file with 2 sheets: should contain 16 rows"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 44,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"id: 1857964669991\n",
|
|
"rows: 16\n",
|
|
" id: 1857964669991\n",
|
|
" url: http://127.0.0.1:3333/project?project=1857964669991\n",
|
|
" name: duplicates2\n",
|
|
" modified: 2019-08-20T02:14:09Z\n",
|
|
" created: 2019-08-20T02:14:09Z\n",
|
|
" column 001: email\n",
|
|
" column 002: name\n",
|
|
" column 003: state\n",
|
|
" column 004: gender\n",
|
|
" column 005: purchase\n",
|
|
" column 006: count\n",
|
|
" column 007: date\n",
|
|
"email\tname\tstate\tgender\tpurchase\tcount\tdate\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1.0\tWed, 4 Jul 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>\t1.0\t2001-07-04T12:08:56\n",
|
|
"danny.baron@example1.com\t\" D.(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1.0\t2001-07-04\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1.0\t2001-07\n",
|
|
"danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1.0\t2001.0\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1.0\t1999.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1.0\t1997.0\n",
|
|
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺), Winter jacket, bike\t3.0\tWed, 4 Jul 2001, 2001-07-04, 2001\n",
|
|
"melanie.white@example2.edu\tMelanie White\tNC\tF\t<iPhone>, 'iPad'\t2.0\t2001-07-04T12:08:56, 1999\n",
|
|
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1.0\t2001/07/04\n",
|
|
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2.0\t2001-07, 1997\n",
|
|
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1.0\t2000.0\n",
|
|
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1.0\t1998.0\n",
|
|
"Project 1857964669991 has been successfully deleted\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"p = cli.create('data/cli/duplicates2.xlsx', sheets=[0, 1])\n",
|
|
"cli.info(p.project_id)\n",
|
|
"cli.export(p.project_id)\n",
|
|
"cli.delete(p.project_id)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 2",
|
|
"language": "python",
|
|
"name": "python2"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 2
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython2",
|
|
"version": "2.7.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|