openrefine-client/tests/cli_bash.ipynb

820 lines
21 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test executable in a Linux Bash environment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Install\n",
"\n",
"This notebook requires a [Bash kernel](https://github.com/takluyver/bash_kernel) environment and an OpenRefine server running at http://127.0.0.1:3333."
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/tmp/20190822_013937\n"
]
}
],
"source": [
"workspace=$(date +%Y%m%d_%H%M%S)\n",
"mkdir -p /tmp/$workspace\n",
"cp -r data /tmp/$workspace\n",
"cd /tmp/$workspace && pwd"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux:\n",
"2019-08-22 01:39:40 ERROR 404: Not Found.\n"
]
}
],
"source": [
"wget -nv https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux -O openrefine-client\n",
"chmod +x openrefine-client"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## README.MD"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Download to file duplicates.csv complete\n"
]
}
],
"source": [
"./openrefine-client --download \"https://git.io/fj5hF\" --output=duplicates.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 2334935475634\n",
"rows: 10\n"
]
}
],
"source": [
"./openrefine-client --create duplicates.csv"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### List"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2334935475634: duplicates\n"
]
}
],
"source": [
"./openrefine-client --list"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Info"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" id: 2334935475634\n",
" url: http://127.0.0.1:3333/project?project=2334935475634\n",
" name: duplicates\n",
" modified: 2019-08-21T23:40:30Z\n",
" created: 2019-08-21T23:40:30Z\n",
" rowCount: 10\n",
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
" column 001: email\n",
" column 002: name\n",
" column 003: state\n",
" column 004: gender\n",
" column 005: purchase\n"
]
}
],
"source": [
"./openrefine-client --info \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Export"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"email\tname\tstate\tgender\tpurchase\n",
"danny.baron@example1.com\tDanny Baron\tCA\tM\tTV\n",
"melanie.white@example2.edu\tMelanie White\tNC\tF\tiPhone\n",
"danny.baron@example1.com\tD. Baron\tCA\tM\tWinter jacket\n",
"ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\n",
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\n",
"danny.baron@example1.com\tDaniel Baron\tCA\tM\tBike\n",
"jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\n",
"melanie.white@example2.edu\tMelanie White\tNC\tF\tiPad\n",
"ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\n",
"arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\n"
]
}
],
"source": [
"./openrefine-client --export \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Apply"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Download to file duplicates-deletion.json complete\n"
]
}
],
"source": [
"./openrefine-client --download \"https://git.io/fj5ju\" --output=duplicates-deletion.json"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"File duplicates-deletion.json has been successfully applied to project 2334935475634\n"
]
}
],
"source": [
"./openrefine-client --apply duplicates-deletion.json \"duplicates\""
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"email\tcount\tname\tstate\tgender\tpurchase\n",
"arthur.duff@example4.com\t2\tArthur Duff\tOR\tM\tDining table\n",
"ben.morisson@example6.org\t1\tBen Morisson\tFL\tM\tAmplifier\n",
"ben.tyler@example3.org\t1\tBen Tyler\tNV\tM\tFlashlight\n",
"danny.baron@example1.com\t3\tDanny Baron\tCA\tM\tTV\n",
"jean.griffith@example5.org\t1\tJean Griffith\tWA\tF\tPower drill\n",
"melanie.white@example2.edu\t2\tMelanie White\tNC\tF\tiPhone\n"
]
}
],
"source": [
"./openrefine-client --export \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Export XLS"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to file deduped.xls complete\n"
]
}
],
"source": [
"./openrefine-client --export \"duplicates\" --output deduped.xls"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 2334935475634 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"duplicates\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Templating"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 1633409429491\n",
"rows: 10\n"
]
}
],
"source": [
"./openrefine-client --create duplicates.csv --projectName=advanced"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{ \"events\" : [\n",
" { \"name\" : \"Melanie White\", \"purchase\" : \"iPhone\" },\n",
" { \"name\" : \"Jean Griffith\", \"purchase\" : \"Power drill\" },\n",
" { \"name\" : \"Melanie White\", \"purchase\" : \"iPad\" }\n",
"] }"
]
}
],
"source": [
"./openrefine-client --export \"advanced\" \\\n",
"--prefix='{ \"events\" : [\n",
"' \\\n",
"--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^F$' \\\n",
"--filterColumn='gender'"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: advanced_3.json\n"
]
}
],
"source": [
"./openrefine-client --export \"advanced\" \\\n",
"--prefix='{ \"events\" : [\n",
"' \\\n",
"--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^F$' \\\n",
"--filterColumn='gender' \\\n",
"--output=advanced.json \\\n",
"--splitToFiles=true"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: advanced_melanie.white@example2.edu.json\n"
]
}
],
"source": [
"./openrefine-client --export \"advanced\" \\\n",
"--prefix='{ \"events\" : [\n",
"' \\\n",
"--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^F$' \\\n",
"--filterColumn='gender' \\\n",
"--output=advanced.json \\\n",
"--splitToFiles=true \\\n",
"--suffixById=true"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"advanced_1.json \u001b[0m\u001b[38;5;33mdata\u001b[0m\n",
"advanced_2.json deduped.xls\n",
"advanced_3.json duplicates.csv\n",
"advanced_jean.griffith@example5.org.json duplicates-deletion.json\n",
"advanced_melanie.white@example2.edu.json \u001b[38;5;40mopenrefine-client\u001b[0m\n"
]
}
],
"source": [
"ls"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 1633409429491 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"advanced\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Unicode"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### fruits"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 2280962953279\n",
"rows: 5\n",
" id: 2280962953279\n",
" url: http://127.0.0.1:3333/project?project=2280962953279\n",
" name: evil-fruits\n",
" modified: 2019-08-21T23:40:43Z\n",
" created: 2019-08-21T23:40:43Z\n",
" rowCount: 5\n",
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/evil-fruits.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'evil-fruits', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n",
" column 001: 🔣\n",
" column 002: code\n",
" column 003: meaning\n",
"🔣\tcode\tmeaning\n",
"🍇\t1F347\tGRAPES\n",
"🍉\t1F349\tWATERMELON\n",
"🍒\t1F352\tCHERRIES\n",
"🍓\t1F353\tSTRAWBERRY\n",
"🍍\t1F34D\tPINEAPPLE\n"
]
}
],
"source": [
"./openrefine-client --create data/cli/evil-fruits.tsv\n",
"./openrefine-client --info \"evil-fruits\"\n",
"./openrefine-client --export \"evil-fruits\""
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to file emojis.csv complete\n",
"🔣,code,meaning\n",
"🍇,1F347,GRAPES\n",
"🍉,1F349,WATERMELON\n",
"🍒,1F352,CHERRIES\n",
"🍓,1F353,STRAWBERRY\n",
"🍍,1F34D,PINEAPPLE\n"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" --output emojis.csv\n",
"cat emojis.csv"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{ \"emojis\" : [\n",
" { \"symbol\" : \"🍇\", \"meaning\" : \"GRAPES\" },\n",
" { \"symbol\" : \"🍉\", \"meaning\" : \"WATERMELON\" },\n",
" { \"symbol\" : \"🍍\", \"meaning\" : \"PINEAPPLE\" }\n",
"] }"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" \\\n",
"--prefix='{ \"emojis\" : [\n",
"' \\\n",
"--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^1F34' \\\n",
"--filterColumn='code'"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: trái cây_3.json\n"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" \\\n",
"--prefix='{ \"emojis\" : [\n",
"' \\\n",
"--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^1F34' \\\n",
"--filterColumn='code' \\\n",
"--output='trái cây.json' \\\n",
"--splitToFiles=true"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Export to files complete. Last file: trái cây_🍍.json\n"
]
}
],
"source": [
"./openrefine-client --export \"evil-fruits\" \\\n",
"--prefix='{ \"emojis\" : [\n",
"' \\\n",
"--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n",
"--rowSeparator=',\n",
"' \\\n",
"--suffix='\n",
"] }' \\\n",
"--filterQuery='^1F34' \\\n",
"--filterColumn='code' \\\n",
"--output='trái cây.json' \\\n",
"--splitToFiles=true \\\n",
"--suffixById=true"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" advanced_1.json emojis.csv\n",
" advanced_2.json \u001b[0m\u001b[38;5;40mopenrefine-client\u001b[0m\n",
" advanced_3.json 'trái cây_1.json'\n",
" advanced_jean.griffith@example5.org.json 'trái cây_2.json'\n",
" advanced_melanie.white@example2.edu.json 'trái cây_3.json'\n",
" \u001b[38;5;33mdata\u001b[0m 'trái cây_🍇.json'\n",
" deduped.xls 'trái cây_🍉.json'\n",
" duplicates.csv 'trái cây_🍍.json'\n",
" duplicates-deletion.json\n"
]
}
],
"source": [
"ls"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 2280962953279 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"evil-fruits\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### emoji-data"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id: 2019865211741\n",
"rows: 20\n",
" id: 2019865211741\n",
" url: http://127.0.0.1:3333/project?project=2019865211741\n",
" name: dữ liệu biểu tượng cảm xúc\n",
" modified: 2019-08-21T23:41:06Z\n",
" created: 2019-08-21T23:41:06Z\n",
" rowCount: 20\n",
"importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/d\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'd\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac', u'processQuotes': True, u'skipDataLines': 34, u'limit': 20, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n",
" column 001: Column 1\n",
" column 002: Column 2\n",
" column 003: Column 3\n",
" column 004: Column 4\n",
" column 005: Column 5\n",
" column 006: Column 6\n",
"Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\n",
"00A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (©) COPYRIGHT SIGN\n",
"00AE ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (®) REGISTERED SIGN\n",
"203C ;\ttext ;\tL1 ;\tnone ;\ta j\t# V1.1 (‼) DOUBLE EXCLAMATION MARK\n",
"2049 ;\ttext ;\tL1 ;\tnone ;\ta j\t# V3.0 (⁉) EXCLAMATION QUESTION MARK\n",
"2122 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (™) TRADE MARK SIGN\n",
"2139 ;\ttext ;\tL1 ;\tnone ;\tj\t# V3.0 () INFORMATION SOURCE\n",
"2194 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↔) LEFT RIGHT ARROW\n",
"2195 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↕) UP DOWN ARROW\n",
"2196 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↖) NORTH WEST ARROW\n",
"2197 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↗) NORTH EAST ARROW\n",
"2198 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↘) SOUTH EAST ARROW\n",
"2199 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↙) SOUTH WEST ARROW\n",
"21A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↩) LEFTWARDS ARROW WITH HOOK\n",
"21AA ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↪) RIGHTWARDS ARROW WITH HOOK\n",
"231A ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌚) WATCH\n",
"231B ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌛) HOURGLASS\n",
"2328 ;\ttext ;\tL2 ;\tnone ;\tx\t# V1.1 (⌨) KEYBOARD\n",
"23CF ;\ttext ;\tL2 ;\tnone ;\tx\t# V4.0 (⏏) EJECT SYMBOL\n",
"23E9 ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏩) BLACK RIGHT-POINTING DOUBLE TRIANGLE\n",
"23EA ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏪) BLACK LEFT-POINTING DOUBLE TRIANGLE\n"
]
}
],
"source": [
"./openrefine-client --create \"data/cli/dữ liệu biểu tượng cảm xúc.txt\" \\\n",
"--format=tsv \\\n",
"--headerLines=0 \\\n",
"--skipDataLines=34 \\\n",
"--limit=20\n",
"./openrefine-client --info \"dữ liệu biểu tượng cảm xúc\"\n",
"./openrefine-client --export \"dữ liệu biểu tượng cảm xúc\""
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 2019865211741: dữ liệu biểu tượng cảm xúc\n"
]
}
],
"source": [
"./openrefine-client --list"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Project 2019865211741 has been successfully deleted\n"
]
}
],
"source": [
"./openrefine-client --delete \"dữ liệu biểu tượng cảm xúc\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Bash",
"language": "bash",
"name": "bash"
},
"language_info": {
"codemirror_mode": "shell",
"file_extension": ".sh",
"mimetype": "text/x-sh",
"name": "bash"
}
},
"nbformat": 4,
"nbformat_minor": 2
}