openrefine-client/tests/cli_bash.ipynb

820 lines
21 KiB
Plaintext
Raw Normal View History

2019-08-22 01:43:52 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test executable in a Linux Bash environment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Install\n",
"\n",
"This notebook requires a [Bash kernel](https://github.com/takluyver/bash_kernel) environment and an OpenRefine server running at http://127.0.0.1:3333."
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/tmp/20190822_013937\n"
]
}
],
"source": [
"workspace=$(date +%Y%m%d_%H%M%S)\n",
"mkdir -p /tmp/$workspace\n",
"cp -r data /tmp/$workspace\n",
"cd /tmp/$workspace && pwd"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux:\n",
"2019-08-22 01:39:40 ERROR 404: Not Found.\n"
]
}
],
"source": [
"wget -nv https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux -O openrefine-client\n",
"chmod +x openrefine-client"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## README.MD"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Download to file duplicates.csv complete\n"
]
}
],
"source": [
"./openrefine-client --download \"https://git.io/fj5hF\" --output=duplicates.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 2334935475634\n",
"rows: 10\n"
]
}
],
"source": [
"./openrefine-client --create duplicates.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### List"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2334935475634: duplicates\n"
]
}
],
"source": [
"./openrefine-client --list"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Info"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id: 2334935475634\n",
" url: http://127.0.0.1:3333/project?project=2334935475634\n",
" name: duplicates\n",
" modified: 2019-08-21T23:40:30Z\n",
" created: 2019-08-21T23:40:30Z\n",
" rowCount: 10\n",
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
" column 001: email\n",
" column 002: name\n",
" column 003: state\n",
" column 004: gender\n",
" column 005: purchase\n"
]
}
],
"source": [
"./openrefine-client --info \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Export"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"email\tname\tstate\tgender\tpurchase\n",
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV\n",
"melanie.white@example2.edu\tMelanie White\tNC\tF\tiPhone\n",
"danny.baron@example1.com\tD. Baron\tCA\tM\tWinter jacket\n",
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\n",
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\n",
"danny.baron@example1.com\tDaniel Baron\tCA\tM\tBike\n",
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\n",
"melanie.white@example2.edu\tMelanie White\tNC\tF\tiPad\n",
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\n",
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\n"
]
}
],
"source": [
"./openrefine-client --export \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Apply"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Download to file duplicates-deletion.json complete\n"
]
}
],
"source": [
"./openrefine-client --download \"https://git.io/fj5ju\" --output=duplicates-deletion.json"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File duplicates-deletion.json has been successfully applied to project 2334935475634\n"
]
}
],
"source": [
"./openrefine-client --apply duplicates-deletion.json \"duplicates\""
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"email\tcount\tname\tstate\tgender\tpurchase\n",
"arthur.duff@example4.com\t2\tArthur Duff\tOR\tM\tDining table\n",
"ben.morisson@example6.org\t1\tBen Morisson\tFL\tM\tAmplifier\n",
"ben.tyler@example3.org\t1\tBen Tyler\tNV\tM\tFlashlight\n",
"danny.baron@example1.com\t3\tDanny Baron\tCA\tM\tTV\n",
"jean.griffith@example5.org\t1\tJean Griffith\tWA\tF\tPower drill\n",
"melanie.white@example2.edu\t2\tMelanie White\tNC\tF\tiPhone\n"
]
}
],
"source": [
"./openrefine-client --export \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Export XLS"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to file deduped.xls complete\n"
]
}
],
"source": [
"./openrefine-client --export \"duplicates\" --output deduped.xls"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 2334935475634 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Templating"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 1633409429491\n",
"rows: 10\n"
]
}
],
"source": [
"./openrefine-client --create duplicates.csv --projectName=advanced"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{ \"events\" : [\n",
" { \"name\" : \"Melanie White\", \"purchase\" : \"iPhone\" },\n",
" { \"name\" : \"Jean Griffith\", \"purchase\" : \"Power drill\" },\n",
" { \"name\" : \"Melanie White\", \"purchase\" : \"iPad\" }\n",
"] }"
]
}
],
"source": [
"./openrefine-client --export \"advanced\" \\\n",
"--prefix='{ \"events\" : [\n",
"' \\\n",
"--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^F$' \\\n",
"--filterColumn='gender'"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: advanced_3.json\n"
]
}
],
"source": [
"./openrefine-client --export \"advanced\" \\\n",
"--prefix='{ \"events\" : [\n",
"' \\\n",
"--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^F$' \\\n",
"--filterColumn='gender' \\\n",
"--output=advanced.json \\\n",
"--splitToFiles=true"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: advanced_melanie.white@example2.edu.json\n"
]
}
],
"source": [
"./openrefine-client --export \"advanced\" \\\n",
"--prefix='{ \"events\" : [\n",
"' \\\n",
"--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^F$' \\\n",
"--filterColumn='gender' \\\n",
"--output=advanced.json \\\n",
"--splitToFiles=true \\\n",
"--suffixById=true"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"advanced_1.json \u001b[0m\u001b[38;5;33mdata\u001b[0m\n",
"advanced_2.json deduped.xls\n",
"advanced_3.json duplicates.csv\n",
"advanced_jean.griffith@example5.org.json duplicates-deletion.json\n",
"advanced_melanie.white@example2.edu.json \u001b[38;5;40mopenrefine-client\u001b[0m\n"
]
}
],
"source": [
"ls"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 1633409429491 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"advanced\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Unicode"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### fruits"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 2280962953279\n",
"rows: 5\n",
" id: 2280962953279\n",
" url: http://127.0.0.1:3333/project?project=2280962953279\n",
" name: evil-fruits\n",
" modified: 2019-08-21T23:40:43Z\n",
" created: 2019-08-21T23:40:43Z\n",
" rowCount: 5\n",
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/evil-fruits.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'evil-fruits', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
" column 001: 🔣\n",
" column 002: code\n",
" column 003: meaning\n",
"🔣\tcode\tmeaning\n",
"🍇\t1F347\tGRAPES\n",
"🍉\t1F349\tWATERMELON\n",
"🍒\t1F352\tCHERRIES\n",
"🍓\t1F353\tSTRAWBERRY\n",
"🍍\t1F34D\tPINEAPPLE\n"
]
}
],
"source": [
"./openrefine-client --create data/cli/evil-fruits.tsv\n",
"./openrefine-client --info \"evil-fruits\"\n",
"./openrefine-client --export \"evil-fruits\""
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to file emojis.csv complete\n",
"🔣,code,meaning\n",
"🍇,1F347,GRAPES\n",
"🍉,1F349,WATERMELON\n",
"🍒,1F352,CHERRIES\n",
"🍓,1F353,STRAWBERRY\n",
"🍍,1F34D,PINEAPPLE\n"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" --output emojis.csv\n",
"cat emojis.csv"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{ \"emojis\" : [\n",
" { \"symbol\" : \"🍇\", \"meaning\" : \"GRAPES\" },\n",
" { \"symbol\" : \"🍉\", \"meaning\" : \"WATERMELON\" },\n",
" { \"symbol\" : \"🍍\", \"meaning\" : \"PINEAPPLE\" }\n",
"] }"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" \\\n",
"--prefix='{ \"emojis\" : [\n",
"' \\\n",
"--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^1F34' \\\n",
"--filterColumn='code'"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: trái cây_3.json\n"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" \\\n",
"--prefix='{ \"emojis\" : [\n",
"' \\\n",
"--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^1F34' \\\n",
"--filterColumn='code' \\\n",
"--output='trái cây.json' \\\n",
"--splitToFiles=true"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: trái cây_🍍.json\n"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" \\\n",
"--prefix='{ \"emojis\" : [\n",
"' \\\n",
"--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^1F34' \\\n",
"--filterColumn='code' \\\n",
"--output='trái cây.json' \\\n",
"--splitToFiles=true \\\n",
"--suffixById=true"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" advanced_1.json emojis.csv\n",
" advanced_2.json \u001b[0m\u001b[38;5;40mopenrefine-client\u001b[0m\n",
" advanced_3.json 'trái cây_1.json'\n",
" advanced_jean.griffith@example5.org.json 'trái cây_2.json'\n",
" advanced_melanie.white@example2.edu.json 'trái cây_3.json'\n",
" \u001b[38;5;33mdata\u001b[0m 'trái cây_🍇.json'\n",
" deduped.xls 'trái cây_🍉.json'\n",
" duplicates.csv 'trái cây_🍍.json'\n",
" duplicates-deletion.json\n"
]
}
],
"source": [
"ls"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 2280962953279 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"evil-fruits\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### emoji-data"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 2019865211741\n",
"rows: 20\n",
" id: 2019865211741\n",
" url: http://127.0.0.1:3333/project?project=2019865211741\n",
" name: dữ liệu biểu tượng cảm xúc\n",
" modified: 2019-08-21T23:41:06Z\n",
" created: 2019-08-21T23:41:06Z\n",
" rowCount: 20\n",
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/d\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'd\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac', u'processQuotes': True, u'skipDataLines': 34, u'limit': 20, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n",
" column 001: Column 1\n",
" column 002: Column 2\n",
" column 003: Column 3\n",
" column 004: Column 4\n",
" column 005: Column 5\n",
" column 006: Column 6\n",
"Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\n",
"00A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (©) COPYRIGHT SIGN\n",
"00AE ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (®) REGISTERED SIGN\n",
"203C ;\ttext ;\tL1 ;\tnone ;\ta j\t# V1.1 (‼) DOUBLE EXCLAMATION MARK\n",
"2049 ;\ttext ;\tL1 ;\tnone ;\ta j\t# V3.0 (⁉) EXCLAMATION QUESTION MARK\n",
"2122 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (™) TRADE MARK SIGN\n",
"2139 ;\ttext ;\tL1 ;\tnone ;\tj\t# V3.0 () INFORMATION SOURCE\n",
"2194 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↔) LEFT RIGHT ARROW\n",
"2195 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↕) UP DOWN ARROW\n",
"2196 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↖) NORTH WEST ARROW\n",
"2197 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↗) NORTH EAST ARROW\n",
"2198 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↘) SOUTH EAST ARROW\n",
"2199 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↙) SOUTH WEST ARROW\n",
"21A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↩) LEFTWARDS ARROW WITH HOOK\n",
"21AA ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↪) RIGHTWARDS ARROW WITH HOOK\n",
"231A ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌚) WATCH\n",
"231B ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌛) HOURGLASS\n",
"2328 ;\ttext ;\tL2 ;\tnone ;\tx\t# V1.1 (⌨) KEYBOARD\n",
"23CF ;\ttext ;\tL2 ;\tnone ;\tx\t# V4.0 (⏏) EJECT SYMBOL\n",
"23E9 ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏩) BLACK RIGHT-POINTING DOUBLE TRIANGLE\n",
"23EA ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏪) BLACK LEFT-POINTING DOUBLE TRIANGLE\n"
]
}
],
"source": [
"./openrefine-client --create \"data/cli/dữ liệu biểu tượng cảm xúc.txt\" \\\n",
"--format=tsv \\\n",
"--headerLines=0 \\\n",
"--skipDataLines=34 \\\n",
"--limit=20\n",
"./openrefine-client --info \"dữ liệu biểu tượng cảm xúc\"\n",
"./openrefine-client --export \"dữ liệu biểu tượng cảm xúc\""
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2019865211741: dữ liệu biểu tượng cảm xúc\n"
]
}
],
"source": [
"./openrefine-client --list"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 2019865211741 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"dữ liệu biểu tượng cảm xúc\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Bash",
"language": "bash",
"name": "bash"
},
"language_info": {
"codemirror_mode": "shell",
"file_extension": ".sh",
"mimetype": "text/x-sh",
"name": "bash"
}
},
"nbformat": 4,
"nbformat_minor": 2
}