{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Test executable in a Linux Bash environment" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Install\n", "\n", "This notebook requires a [Bash kernel](https://github.com/takluyver/bash_kernel) environment and an OpenRefine server running at http://127.0.0.1:3333." ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/tmp/20190822_013937\n" ] } ], "source": [ "workspace=$(date +%Y%m%d_%H%M%S)\n", "mkdir -p /tmp/$workspace\n", "cp -r data /tmp/$workspace\n", "cd /tmp/$workspace && pwd" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux:\n", "2019-08-22 01:39:40 ERROR 404: Not Found.\n" ] } ], "source": [ "wget -nv https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux -O openrefine-client\n", "chmod +x openrefine-client" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## README.MD" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Download" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Download to file duplicates.csv complete\n" ] } ], "source": [ "./openrefine-client --download \"https://git.io/fj5hF\" --output=duplicates.csv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id: 2334935475634\n", "rows: 10\n" ] } ], "source": [ "./openrefine-client --create duplicates.csv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2334935475634: duplicates\n" ] } ], "source": [ "./openrefine-client --list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Info" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id: 2334935475634\n", " url: http://127.0.0.1:3333/project?project=2334935475634\n", " name: duplicates\n", " modified: 2019-08-21T23:40:30Z\n", " created: 2019-08-21T23:40:30Z\n", " rowCount: 10\n", "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", " column 001: email\n", " column 002: name\n", " column 003: state\n", " column 004: gender\n", " column 005: purchase\n" ] } ], "source": [ "./openrefine-client --info \"duplicates\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Export" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "email\tname\tstate\tgender\tpurchase\n", "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV\n", "melanie.white@example2.edu\tMelanie White\tNC\tF\tiPhone\n", "danny.baron@example1.com\tD. Baron\tCA\tM\tWinter jacket\n", "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\n", "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\n", "danny.baron@example1.com\tDaniel Baron\tCA\tM\tBike\n", "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\n", "melanie.white@example2.edu\tMelanie White\tNC\tF\tiPad\n", "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\n", "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\n" ] } ], "source": [ "./openrefine-client --export \"duplicates\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Apply" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Download to file duplicates-deletion.json complete\n" ] } ], "source": [ "./openrefine-client --download \"https://git.io/fj5ju\" --output=duplicates-deletion.json" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File duplicates-deletion.json has been successfully applied to project 2334935475634\n" ] } ], "source": [ "./openrefine-client --apply duplicates-deletion.json \"duplicates\"" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "email\tcount\tname\tstate\tgender\tpurchase\n", "arthur.duff@example4.com\t2\tArthur Duff\tOR\tM\tDining table\n", "ben.morisson@example6.org\t1\tBen Morisson\tFL\tM\tAmplifier\n", "ben.tyler@example3.org\t1\tBen Tyler\tNV\tM\tFlashlight\n", "danny.baron@example1.com\t3\tDanny Baron\tCA\tM\tTV\n", "jean.griffith@example5.org\t1\tJean Griffith\tWA\tF\tPower drill\n", "melanie.white@example2.edu\t2\tMelanie White\tNC\tF\tiPhone\n" ] } ], "source": [ "./openrefine-client --export \"duplicates\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Export XLS" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Export to file deduped.xls complete\n" ] } ], "source": [ "./openrefine-client --export \"duplicates\" --output deduped.xls" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Delete" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Project 2334935475634 has been successfully deleted\n" ] } ], "source": [ "./openrefine-client --delete \"duplicates\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Templating" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id: 1633409429491\n", "rows: 10\n" ] } ], "source": [ "./openrefine-client --create duplicates.csv --projectName=advanced" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{ \"events\" : [\n", " { \"name\" : \"Melanie White\", \"purchase\" : \"iPhone\" },\n", " { \"name\" : \"Jean Griffith\", \"purchase\" : \"Power drill\" },\n", " { \"name\" : \"Melanie White\", \"purchase\" : \"iPad\" }\n", "] }" ] } ], "source": [ "./openrefine-client --export \"advanced\" \\\n", "--prefix='{ \"events\" : [\n", "' \\\n", "--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n", "--rowSeparator=',\n", "' \\\n", "--suffix='\n", "] }' \\\n", "--filterQuery='^F$' \\\n", "--filterColumn='gender'" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Export to files complete. Last file: advanced_3.json\n" ] } ], "source": [ "./openrefine-client --export \"advanced\" \\\n", "--prefix='{ \"events\" : [\n", "' \\\n", "--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n", "--rowSeparator=',\n", "' \\\n", "--suffix='\n", "] }' \\\n", "--filterQuery='^F$' \\\n", "--filterColumn='gender' \\\n", "--output=advanced.json \\\n", "--splitToFiles=true" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Export to files complete. Last file: advanced_melanie.white@example2.edu.json\n" ] } ], "source": [ "./openrefine-client --export \"advanced\" \\\n", "--prefix='{ \"events\" : [\n", "' \\\n", "--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n", "--rowSeparator=',\n", "' \\\n", "--suffix='\n", "] }' \\\n", "--filterQuery='^F$' \\\n", "--filterColumn='gender' \\\n", "--output=advanced.json \\\n", "--splitToFiles=true \\\n", "--suffixById=true" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "advanced_1.json \u001b[0m\u001b[38;5;33mdata\u001b[0m\n", "advanced_2.json deduped.xls\n", "advanced_3.json duplicates.csv\n", "advanced_jean.griffith@example5.org.json duplicates-deletion.json\n", "advanced_melanie.white@example2.edu.json \u001b[38;5;40mopenrefine-client\u001b[0m\n" ] } ], "source": [ "ls" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Delete" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Project 1633409429491 has been successfully deleted\n" ] } ], "source": [ "./openrefine-client --delete \"advanced\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Unicode" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### fruits" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id: 2280962953279\n", "rows: 5\n", " id: 2280962953279\n", " url: http://127.0.0.1:3333/project?project=2280962953279\n", " name: evil-fruits\n", " modified: 2019-08-21T23:40:43Z\n", " created: 2019-08-21T23:40:43Z\n", " rowCount: 5\n", "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/evil-fruits.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'evil-fruits', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", " column 001: 🔣\n", " column 002: code\n", " column 003: meaning\n", "🔣\tcode\tmeaning\n", "🍇\t1F347\tGRAPES\n", "🍉\t1F349\tWATERMELON\n", "🍒\t1F352\tCHERRIES\n", "🍓\t1F353\tSTRAWBERRY\n", "🍍\t1F34D\tPINEAPPLE\n" ] } ], "source": [ "./openrefine-client --create data/cli/evil-fruits.tsv\n", "./openrefine-client --info \"evil-fruits\"\n", "./openrefine-client --export \"evil-fruits\"" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Export to file emojis.csv complete\n", "🔣,code,meaning\n", "🍇,1F347,GRAPES\n", "🍉,1F349,WATERMELON\n", "🍒,1F352,CHERRIES\n", "🍓,1F353,STRAWBERRY\n", "🍍,1F34D,PINEAPPLE\n" ] } ], "source": [ "./openrefine-client --export \"evil-fruits\" --output emojis.csv\n", "cat emojis.csv" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{ \"emojis\" : [\n", " { \"symbol\" : \"🍇\", \"meaning\" : \"GRAPES\" },\n", " { \"symbol\" : \"🍉\", \"meaning\" : \"WATERMELON\" },\n", " { \"symbol\" : \"🍍\", \"meaning\" : \"PINEAPPLE\" }\n", "] }" ] } ], "source": [ "./openrefine-client --export \"evil-fruits\" \\\n", "--prefix='{ \"emojis\" : [\n", "' \\\n", "--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n", "--rowSeparator=',\n", "' \\\n", "--suffix='\n", "] }' \\\n", "--filterQuery='^1F34' \\\n", "--filterColumn='code'" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Export to files complete. Last file: trái cây_3.json\n" ] } ], "source": [ "./openrefine-client --export \"evil-fruits\" \\\n", "--prefix='{ \"emojis\" : [\n", "' \\\n", "--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n", "--rowSeparator=',\n", "' \\\n", "--suffix='\n", "] }' \\\n", "--filterQuery='^1F34' \\\n", "--filterColumn='code' \\\n", "--output='trái cây.json' \\\n", "--splitToFiles=true" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Export to files complete. Last file: trái cây_🍍.json\n" ] } ], "source": [ "./openrefine-client --export \"evil-fruits\" \\\n", "--prefix='{ \"emojis\" : [\n", "' \\\n", "--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n", "--rowSeparator=',\n", "' \\\n", "--suffix='\n", "] }' \\\n", "--filterQuery='^1F34' \\\n", "--filterColumn='code' \\\n", "--output='trái cây.json' \\\n", "--splitToFiles=true \\\n", "--suffixById=true" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " advanced_1.json emojis.csv\n", " advanced_2.json \u001b[0m\u001b[38;5;40mopenrefine-client\u001b[0m\n", " advanced_3.json 'trái cây_1.json'\n", " advanced_jean.griffith@example5.org.json 'trái cây_2.json'\n", " advanced_melanie.white@example2.edu.json 'trái cây_3.json'\n", " \u001b[38;5;33mdata\u001b[0m 'trái cây_🍇.json'\n", " deduped.xls 'trái cây_🍉.json'\n", " duplicates.csv 'trái cây_🍍.json'\n", " duplicates-deletion.json\n" ] } ], "source": [ "ls" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Project 2280962953279 has been successfully deleted\n" ] } ], "source": [ "./openrefine-client --delete \"evil-fruits\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### emoji-data" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id: 2019865211741\n", "rows: 20\n", " id: 2019865211741\n", " url: http://127.0.0.1:3333/project?project=2019865211741\n", " name: dữ liệu biểu tượng cảm xúc\n", " modified: 2019-08-21T23:41:06Z\n", " created: 2019-08-21T23:41:06Z\n", " rowCount: 20\n", "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/d\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'd\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac', u'processQuotes': True, u'skipDataLines': 34, u'limit': 20, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n", " column 001: Column 1\n", " column 002: Column 2\n", " column 003: Column 3\n", " column 004: Column 4\n", " column 005: Column 5\n", " column 006: Column 6\n", "Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\n", "00A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (©) COPYRIGHT SIGN\n", "00AE ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (®) REGISTERED SIGN\n", "203C ;\ttext ;\tL1 ;\tnone ;\ta j\t# V1.1 (‼) DOUBLE EXCLAMATION MARK\n", "2049 ;\ttext ;\tL1 ;\tnone ;\ta j\t# V3.0 (⁉) EXCLAMATION QUESTION MARK\n", "2122 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (™) TRADE MARK SIGN\n", "2139 ;\ttext ;\tL1 ;\tnone ;\tj\t# V3.0 (ℹ) INFORMATION SOURCE\n", "2194 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↔) LEFT RIGHT ARROW\n", "2195 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↕) UP DOWN ARROW\n", "2196 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↖) NORTH WEST ARROW\n", "2197 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↗) NORTH EAST ARROW\n", "2198 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↘) SOUTH EAST ARROW\n", "2199 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↙) SOUTH WEST ARROW\n", "21A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↩) LEFTWARDS ARROW WITH HOOK\n", "21AA ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↪) RIGHTWARDS ARROW WITH HOOK\n", "231A ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌚) WATCH\n", "231B ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌛) HOURGLASS\n", "2328 ;\ttext ;\tL2 ;\tnone ;\tx\t# V1.1 (⌨) KEYBOARD\n", "23CF ;\ttext ;\tL2 ;\tnone ;\tx\t# V4.0 (⏏) EJECT SYMBOL\n", "23E9 ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏩) BLACK RIGHT-POINTING DOUBLE TRIANGLE\n", "23EA ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏪) BLACK LEFT-POINTING DOUBLE TRIANGLE\n" ] } ], "source": [ "./openrefine-client --create \"data/cli/dữ liệu biểu tượng cảm xúc.txt\" \\\n", "--format=tsv \\\n", "--headerLines=0 \\\n", "--skipDataLines=34 \\\n", "--limit=20\n", "./openrefine-client --info \"dữ liệu biểu tượng cảm xúc\"\n", "./openrefine-client --export \"dữ liệu biểu tượng cảm xúc\"" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2019865211741: dữ liệu biểu tượng cảm xúc\n" ] } ], "source": [ "./openrefine-client --list" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Delete" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Project 2019865211741 has been successfully deleted\n" ] } ], "source": [ "./openrefine-client --delete \"dữ liệu biểu tượng cảm xúc\"" ] } ], "metadata": { "kernelspec": { "display_name": "Bash", "language": "bash", "name": "bash" }, "language_info": { "codemirror_mode": "shell", "file_extension": ".sh", "mimetype": "text/x-sh", "name": "bash" } }, "nbformat": 4, "nbformat_minor": 2 }