From 82da3f7b4e50d3807a64ca96b4f43ad1432b88e7 Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Sat, 2 Jan 2021 17:02:39 +0100 Subject: [PATCH] add functional tests all CLI options replaces manual tests with jupyter notebook --- .gitignore | 5 +- README.md | 14 +- tests-cli.sh | 123 + tests/apply-utf8.sh | 57 + tests/apply.sh | 57 + tests/cli_bash.ipynb | 819 ----- tests/cli_python2.ipynb | 2824 ----------------- tests/create-csv-encoding.sh | 41 + tests/create-csv-guessCellValueTypes.sh | 40 + tests/create-csv-headerLines.sh | 41 + tests/create-csv-ignoreLines.sh | 39 + tests/create-csv-limit.sh | 39 + tests/create-csv-processQuotes.sh | 40 + tests/create-csv-projectTags.sh | 45 + tests/create-csv-separator.sh | 40 + tests/create-csv-skipDataLines.sh | 39 + tests/create-csv-storeBlankCellsAsNulls.sh | 58 + tests/create-csv-storeBlankRows.sh | 39 + tests/create-csv-utf8.sh | 40 + tests/create-csv.sh | 40 + tests/create-json-recordPath.sh | 55 + tests/create-json-storeEmptyStrings.sh | 52 + tests/create-json-trimStrings.sh | 62 + tests/create-json-utf8.sh | 53 + tests/create-json.sh | 53 + tests/create-ods-sheets-utf8.sh | 44 + tests/create-ods.sh | 48 + tests/create-tsv-utf8.sh | 40 + tests/create-tsv.sh | 40 + tests/create-txt-fixed-width-headerLines.sh | 80 + tests/create-txt-fixed-width-utf8.sh | 81 + tests/create-txt-fixed-width.sh | 81 + tests/create-txt-linesPerRow.sh | 39 + tests/create-txt.sh | 39 + tests/create-xls-sheets-utf8.sh | 44 + tests/create-xls.sh | 48 + tests/create-xlsx-sheets-utf8.sh | 44 + tests/create-xlsx.sh | 48 + tests/create-xml-recordPath.sh | 91 + tests/create-xml-utf8.sh | 95 + tests/create-xml.sh | 95 + tests/create-zip-includeFileSources.sh | 44 + tests/create-zip.sh | 44 + tests/data/example.ods | Bin 0 -> 9427 bytes tests/data/example.xls | Bin 0 -> 6656 bytes tests/data/example.xlsx | Bin 0 -> 6046 bytes tests/delete-utf8.sh | 35 + tests/delete.sh | 35 + tests/download.sh | 21 + tests/export-csv-utf8.sh | 44 + tests/export-csv.sh | 40 + tests/export-html-utf8.sh | 72 + tests/export-html.sh | 50 + tests/export-ods-utf8.sh | 43 + tests/export-ods.sh | 47 + tests/export-tsv-utf8.sh | 44 + tests/export-tsv.sh | 40 + tests/export-utf8.sh | 44 + tests/export-xls-utf8.sh | 43 + tests/export-xls.sh | 43 + tests/export-xlsx-utf8.sh | 45 + tests/export-xlsx.sh | 43 + tests/export.sh | 40 + tests/format-create-separator.sh | 40 + tests/format-create.sh | 41 + tests/format-export-output.sh | 40 + tests/format-export.sh | 40 + tests/help.sh | 27 + tests/info-utf8.sh | 39 + tests/info.sh | 35 + tests/list-utf8.sh | 35 + tests/list.sh | 35 + tests/template-facets.sh | 58 + tests/template-filterQuery-utf8.sh | 54 + tests/template-filterQuery.sh | 59 + tests/template-splitToFiles-mode.sh | 58 + .../template-splitToFiles-suffixById-utf8.sh | 52 + tests/template-splitToFiles-suffixById.sh | 58 + tests/template-splitToFiles-utf8.sh | 51 + tests/template-splitToFiles.sh | 57 + tests/template-utf8.sh | 54 + tests/template.sh | 64 + tests/usage.sh | 27 + 83 files changed, 3763 insertions(+), 3650 deletions(-) create mode 100755 tests-cli.sh create mode 100644 tests/apply-utf8.sh create mode 100644 tests/apply.sh delete mode 100644 tests/cli_bash.ipynb delete mode 100644 tests/cli_python2.ipynb create mode 100644 tests/create-csv-encoding.sh create mode 100644 tests/create-csv-guessCellValueTypes.sh create mode 100644 tests/create-csv-headerLines.sh create mode 100644 tests/create-csv-ignoreLines.sh create mode 100644 tests/create-csv-limit.sh create mode 100644 tests/create-csv-processQuotes.sh create mode 100644 tests/create-csv-projectTags.sh create mode 100644 tests/create-csv-separator.sh create mode 100644 tests/create-csv-skipDataLines.sh create mode 100644 tests/create-csv-storeBlankCellsAsNulls.sh create mode 100644 tests/create-csv-storeBlankRows.sh create mode 100644 tests/create-csv-utf8.sh create mode 100644 tests/create-csv.sh create mode 100644 tests/create-json-recordPath.sh create mode 100644 tests/create-json-storeEmptyStrings.sh create mode 100644 tests/create-json-trimStrings.sh create mode 100644 tests/create-json-utf8.sh create mode 100644 tests/create-json.sh create mode 100644 tests/create-ods-sheets-utf8.sh create mode 100644 tests/create-ods.sh create mode 100644 tests/create-tsv-utf8.sh create mode 100644 tests/create-tsv.sh create mode 100644 tests/create-txt-fixed-width-headerLines.sh create mode 100644 tests/create-txt-fixed-width-utf8.sh create mode 100644 tests/create-txt-fixed-width.sh create mode 100644 tests/create-txt-linesPerRow.sh create mode 100644 tests/create-txt.sh create mode 100644 tests/create-xls-sheets-utf8.sh create mode 100644 tests/create-xls.sh create mode 100644 tests/create-xlsx-sheets-utf8.sh create mode 100644 tests/create-xlsx.sh create mode 100644 tests/create-xml-recordPath.sh create mode 100644 tests/create-xml-utf8.sh create mode 100644 tests/create-xml.sh create mode 100644 tests/create-zip-includeFileSources.sh create mode 100644 tests/create-zip.sh create mode 100644 tests/data/example.ods create mode 100644 tests/data/example.xls create mode 100644 tests/data/example.xlsx create mode 100644 tests/delete-utf8.sh create mode 100644 tests/delete.sh create mode 100644 tests/download.sh create mode 100644 tests/export-csv-utf8.sh create mode 100644 tests/export-csv.sh create mode 100644 tests/export-html-utf8.sh create mode 100644 tests/export-html.sh create mode 100644 tests/export-ods-utf8.sh create mode 100644 tests/export-ods.sh create mode 100644 tests/export-tsv-utf8.sh create mode 100644 tests/export-tsv.sh create mode 100644 tests/export-utf8.sh create mode 100644 tests/export-xls-utf8.sh create mode 100644 tests/export-xls.sh create mode 100644 tests/export-xlsx-utf8.sh create mode 100644 tests/export-xlsx.sh create mode 100644 tests/export.sh create mode 100644 tests/format-create-separator.sh create mode 100644 tests/format-create.sh create mode 100644 tests/format-export-output.sh create mode 100644 tests/format-export.sh create mode 100644 tests/help.sh create mode 100644 tests/info-utf8.sh create mode 100644 tests/info.sh create mode 100644 tests/list-utf8.sh create mode 100644 tests/list.sh create mode 100644 tests/template-facets.sh create mode 100644 tests/template-filterQuery-utf8.sh create mode 100644 tests/template-filterQuery.sh create mode 100644 tests/template-splitToFiles-mode.sh create mode 100644 tests/template-splitToFiles-suffixById-utf8.sh create mode 100644 tests/template-splitToFiles-suffixById.sh create mode 100644 tests/template-splitToFiles-utf8.sh create mode 100644 tests/template-splitToFiles.sh create mode 100644 tests/template-utf8.sh create mode 100644 tests/template.sh create mode 100644 tests/usage.sh diff --git a/.gitignore b/.gitignore index f2e4460..7c12cbe 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,7 @@ dist .* openrefine_client.egg-info refine.spec -README.html +openrefine-2.* +openrefine-3.* +openrefine-client_* +tests-cli.log diff --git a/README.md b/README.md index 378104c..d657672 100644 --- a/README.md +++ b/README.md @@ -679,11 +679,12 @@ There is also a script that uses docker images to run the unit tests with differ Note to myself: When releasing a new version... -1. Run tests +1. Run functional tests ```sh - ./tests.sh -a - jupyter notebook tests/cli_python2.ipynb + for v in 2.7 2.8 3.0 3.1 3.2; do + ./tests-cli.sh $v + done ``` 2. Make final changes in Git @@ -710,11 +711,12 @@ Note to myself: When releasing a new version... python2 -m PyInstaller --onefile refine.py --hidden-import google.refine.__main__ ``` -4. Run test with Linux executable +4. Run functional tests with Linux executable ```sh - ./tests.sh -a - jupyter notebook tests/cli_bash.ipynb + for v in 2.7 2.8 3.0 3.1 3.2; do + ./tests-cli.sh $v openrefine-client_0-3-7_linux + done ``` 5. Create release in GitHub diff --git a/tests-cli.sh b/tests-cli.sh new file mode 100755 index 0000000..deef082 --- /dev/null +++ b/tests-cli.sh @@ -0,0 +1,123 @@ +#!/bin/bash +# Script for running functional tests against the CLI + +# Copyright (c) 2011 Paul Makepeace, Real Programmers. All rights reserved. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see + +# ================================== CONFIG ================================== # + +cd "${BASH_SOURCE%/*}/" || exit 1 + +port=3334 + +if [[ ${1} ]]; then + version="${1}" +else + version="3.2" +fi +refine="openrefine-${version}/refine" + +if [[ ${2} ]]; then + client="$(readlink -e "${2}")" +else + client="python2 $(readlink -e refine.py)" +fi +cmd="${client} -H localhost -P ${port}" + +if [[ ${3} ]]; then + filename="${3%%.*}" +else + filename="" +fi +cmd="${client} -H localhost -P ${port}" + +# =============================== REQUIREMENTS =============================== # + +# check existence of java and cURL +if [[ -z "$(command -v java 2> /dev/null)" ]] ; then + echo 1>&2 "ERROR: OpenRefine requires JAVA runtime environment (jre)" \ + "https://openjdk.java.net/install/" + exit 1 +fi +if [[ -z "$(command -v curl 2> /dev/null)" ]] ; then + echo 1>&2 "ERROR: This shell script requires cURL" \ + "https://curl.haxx.se/download.html" + exit 1 +fi +# download OpenRefine +if [[ -z "$(readlink -e "${refine}")" ]]; then + echo "Download OpenRefine ${version}..." + mkdir -p "$(dirname "${refine}")" + curl -L --output openrefine.tar.gz \ + "https://github.com/OpenRefine/OpenRefine/releases/download/${version}/openrefine-linux-${version}.tar.gz" + echo "Install OpenRefine ${version} in subdirectory $(dirname "${refine}")..." + tar -xzf openrefine.tar.gz -C "$(dirname "${refine}")" --strip 1 --totals + rm -f openrefine.tar.gz + # do not try to open OpenRefine in browser + sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' \ + "$(dirname "${refine}")"/refine.ini + # set autosave period from 5 minutes to 25 hours + sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1500/' \ + "$(dirname "${refine}")"/refine.ini + echo +fi + +# ================================== SETUP =================================== # + +dir="$(readlink -f "tests/tmp")" +mkdir -p "${dir}" +rm -f tests-cli.log + +echo "start OpenRefine ${version}..." +${refine} -v warn -p ${port} -d "${dir}" &>> tests-cli.log & +pid_server=${!} +timeout 30s bash -c "until curl -s 'http://localhost:3334' \ + | cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \ + || error "starting OpenRefine server failed!" +echo + +# ================================== TESTS =================================== # + +echo "running tests, please wait..." +tests=() +results=() +for t in tests/*${filename}*.sh; do + tests+=("${t}") + echo "======================= ${t} =======================" &>> tests-cli.log + bash "${t}" "${cmd}" "${version}" &>> tests-cli.log + results+=(${?}) +done +echo + +# ================================= TEARDOWN ================================= # + +echo "cleanup..." +{ kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null +rm -rf "${dir}" +echo + +# ================================= SUMMARY ================================== # + +printf "%s\t%s\n" "code" "test" +printf "%s\t%s\n" "----" "----------------" +for i in "${!tests[@]}"; do + printf "%s\t%s\n" "${results[$i]}" "${tests[$i]}" +done +echo +if [[ " ${results[*]} " =~ [1-9] ]]; then + echo "failed tests! check tests-cli.log for debugging"; echo +else + echo "all tests passed!"; echo +fi diff --git a/tests/apply-utf8.sh b/tests/apply-utf8.sh new file mode 100644 index 0000000..bf58766 --- /dev/null +++ b/tests/apply-utf8.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/column-addition", + "engineConfig": { + "mode": "row-based" + }, + "newColumnName": "apply", + "columnInsertIndex": 2, + "baseColumnName": "b", + "expression": "grel:value.replace('2','⛲')", + "onError": "set-to-blank" + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b apply c +1 2 ⛲ 3 +0 0 0 0 +$ \ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/apply.sh b/tests/apply.sh new file mode 100644 index 0000000..ffbaa91 --- /dev/null +++ b/tests/apply.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/column-addition", + "engineConfig": { + "mode": "row-based" + }, + "newColumnName": "apply", + "columnInsertIndex": 2, + "baseColumnName": "b", + "expression": "grel:value.replace('2','TEST')", + "onError": "set-to-blank" + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b apply c +1 2 TEST 3 +0 0 0 0 +$ \ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/cli_bash.ipynb b/tests/cli_bash.ipynb deleted file mode 100644 index 5e4ca86..0000000 --- a/tests/cli_bash.ipynb +++ /dev/null @@ -1,819 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test executable in a Linux Bash environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Install\n", - "\n", - "This notebook requires a [Bash kernel](https://github.com/takluyver/bash_kernel) environment and an OpenRefine server running at http://127.0.0.1:3333." - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tmp/20190822_013937\n" - ] - } - ], - "source": [ - "workspace=$(date +%Y%m%d_%H%M%S)\n", - "mkdir -p /tmp/$workspace\n", - "cp -r data /tmp/$workspace\n", - "cd /tmp/$workspace && pwd" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux:\n", - "2019-08-22 01:39:40 ERROR 404: Not Found.\n" - ] - } - ], - "source": [ - "wget -nv https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.8/openrefine-client_0-3-8_linux -O openrefine-client\n", - "chmod +x openrefine-client" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## README.MD" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Download to file duplicates.csv complete\n" - ] - } - ], - "source": [ - "./openrefine-client --download \"https://git.io/fj5hF\" --output=duplicates.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2334935475634\n", - "rows: 10\n" - ] - } - ], - "source": [ - "./openrefine-client --create duplicates.csv" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### List" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 2334935475634: duplicates\n" - ] - } - ], - "source": [ - "./openrefine-client --list" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Info" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " id: 2334935475634\n", - " url: http://127.0.0.1:3333/project?project=2334935475634\n", - " name: duplicates\n", - " modified: 2019-08-21T23:40:30Z\n", - " created: 2019-08-21T23:40:30Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n" - ] - } - ], - "source": [ - "./openrefine-client --info \"duplicates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Export" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "email\tname\tstate\tgender\tpurchase\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\tiPhone\n", - "danny.baron@example1.com\tD. Baron\tCA\tM\tWinter jacket\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\n", - "danny.baron@example1.com\tDaniel Baron\tCA\tM\tBike\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\tiPad\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"duplicates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Apply" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Download to file duplicates-deletion.json complete\n" - ] - } - ], - "source": [ - "./openrefine-client --download \"https://git.io/fj5ju\" --output=duplicates-deletion.json" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File duplicates-deletion.json has been successfully applied to project 2334935475634\n" - ] - } - ], - "source": [ - "./openrefine-client --apply duplicates-deletion.json \"duplicates\"" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "email\tcount\tname\tstate\tgender\tpurchase\n", - "arthur.duff@example4.com\t2\tArthur Duff\tOR\tM\tDining table\n", - "ben.morisson@example6.org\t1\tBen Morisson\tFL\tM\tAmplifier\n", - "ben.tyler@example3.org\t1\tBen Tyler\tNV\tM\tFlashlight\n", - "danny.baron@example1.com\t3\tDanny Baron\tCA\tM\tTV\n", - "jean.griffith@example5.org\t1\tJean Griffith\tWA\tF\tPower drill\n", - "melanie.white@example2.edu\t2\tMelanie White\tNC\tF\tiPhone\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"duplicates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Export XLS" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to file deduped.xls complete\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"duplicates\" --output deduped.xls" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2334935475634 has been successfully deleted\n" - ] - } - ], - "source": [ - "./openrefine-client --delete \"duplicates\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Templating" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1633409429491\n", - "rows: 10\n" - ] - } - ], - "source": [ - "./openrefine-client --create duplicates.csv --projectName=advanced" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ \"events\" : [\n", - " { \"name\" : \"Melanie White\", \"purchase\" : \"iPhone\" },\n", - " { \"name\" : \"Jean Griffith\", \"purchase\" : \"Power drill\" },\n", - " { \"name\" : \"Melanie White\", \"purchase\" : \"iPad\" }\n", - "] }" - ] - } - ], - "source": [ - "./openrefine-client --export \"advanced\" \\\n", - "--prefix='{ \"events\" : [\n", - "' \\\n", - "--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n", - "--rowSeparator=',\n", - "' \\\n", - "--suffix='\n", - "] }' \\\n", - "--filterQuery='^F$' \\\n", - "--filterColumn='gender'" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: advanced_3.json\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"advanced\" \\\n", - "--prefix='{ \"events\" : [\n", - "' \\\n", - "--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n", - "--rowSeparator=',\n", - "' \\\n", - "--suffix='\n", - "] }' \\\n", - "--filterQuery='^F$' \\\n", - "--filterColumn='gender' \\\n", - "--output=advanced.json \\\n", - "--splitToFiles=true" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: advanced_melanie.white@example2.edu.json\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"advanced\" \\\n", - "--prefix='{ \"events\" : [\n", - "' \\\n", - "--template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }' \\\n", - "--rowSeparator=',\n", - "' \\\n", - "--suffix='\n", - "] }' \\\n", - "--filterQuery='^F$' \\\n", - "--filterColumn='gender' \\\n", - "--output=advanced.json \\\n", - "--splitToFiles=true \\\n", - "--suffixById=true" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "advanced_1.json \u001b[0m\u001b[38;5;33mdata\u001b[0m\n", - "advanced_2.json deduped.xls\n", - "advanced_3.json duplicates.csv\n", - "advanced_jean.griffith@example5.org.json duplicates-deletion.json\n", - "advanced_melanie.white@example2.edu.json \u001b[38;5;40mopenrefine-client\u001b[0m\n" - ] - } - ], - "source": [ - "ls" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 1633409429491 has been successfully deleted\n" - ] - } - ], - "source": [ - "./openrefine-client --delete \"advanced\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Unicode" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### fruits" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2280962953279\n", - "rows: 5\n", - " id: 2280962953279\n", - " url: http://127.0.0.1:3333/project?project=2280962953279\n", - " name: evil-fruits\n", - " modified: 2019-08-21T23:40:43Z\n", - " created: 2019-08-21T23:40:43Z\n", - " rowCount: 5\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/evil-fruits.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'evil-fruits', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: 🔣\n", - " column 002: code\n", - " column 003: meaning\n", - "🔣\tcode\tmeaning\n", - "🍇\t1F347\tGRAPES\n", - "🍉\t1F349\tWATERMELON\n", - "🍒\t1F352\tCHERRIES\n", - "🍓\t1F353\tSTRAWBERRY\n", - "🍍\t1F34D\tPINEAPPLE\n" - ] - } - ], - "source": [ - "./openrefine-client --create data/cli/evil-fruits.tsv\n", - "./openrefine-client --info \"evil-fruits\"\n", - "./openrefine-client --export \"evil-fruits\"" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to file emojis.csv complete\n", - "🔣,code,meaning\n", - "🍇,1F347,GRAPES\n", - "🍉,1F349,WATERMELON\n", - "🍒,1F352,CHERRIES\n", - "🍓,1F353,STRAWBERRY\n", - "🍍,1F34D,PINEAPPLE\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"evil-fruits\" --output emojis.csv\n", - "cat emojis.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ \"emojis\" : [\n", - " { \"symbol\" : \"🍇\", \"meaning\" : \"GRAPES\" },\n", - " { \"symbol\" : \"🍉\", \"meaning\" : \"WATERMELON\" },\n", - " { \"symbol\" : \"🍍\", \"meaning\" : \"PINEAPPLE\" }\n", - "] }" - ] - } - ], - "source": [ - "./openrefine-client --export \"evil-fruits\" \\\n", - "--prefix='{ \"emojis\" : [\n", - "' \\\n", - "--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n", - "--rowSeparator=',\n", - "' \\\n", - "--suffix='\n", - "] }' \\\n", - "--filterQuery='^1F34' \\\n", - "--filterColumn='code'" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: trái cây_3.json\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"evil-fruits\" \\\n", - "--prefix='{ \"emojis\" : [\n", - "' \\\n", - "--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n", - "--rowSeparator=',\n", - "' \\\n", - "--suffix='\n", - "] }' \\\n", - "--filterQuery='^1F34' \\\n", - "--filterColumn='code' \\\n", - "--output='trái cây.json' \\\n", - "--splitToFiles=true" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: trái cây_🍍.json\n" - ] - } - ], - "source": [ - "./openrefine-client --export \"evil-fruits\" \\\n", - "--prefix='{ \"emojis\" : [\n", - "' \\\n", - "--template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }' \\\n", - "--rowSeparator=',\n", - "' \\\n", - "--suffix='\n", - "] }' \\\n", - "--filterQuery='^1F34' \\\n", - "--filterColumn='code' \\\n", - "--output='trái cây.json' \\\n", - "--splitToFiles=true \\\n", - "--suffixById=true" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " advanced_1.json emojis.csv\n", - " advanced_2.json \u001b[0m\u001b[38;5;40mopenrefine-client\u001b[0m\n", - " advanced_3.json 'trái cây_1.json'\n", - " advanced_jean.griffith@example5.org.json 'trái cây_2.json'\n", - " advanced_melanie.white@example2.edu.json 'trái cây_3.json'\n", - " \u001b[38;5;33mdata\u001b[0m 'trái cây_🍇.json'\n", - " deduped.xls 'trái cây_🍉.json'\n", - " duplicates.csv 'trái cây_🍍.json'\n", - " duplicates-deletion.json\n" - ] - } - ], - "source": [ - "ls" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2280962953279 has been successfully deleted\n" - ] - } - ], - "source": [ - "./openrefine-client --delete \"evil-fruits\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### emoji-data" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2019865211741\n", - "rows: 20\n", - " id: 2019865211741\n", - " url: http://127.0.0.1:3333/project?project=2019865211741\n", - " name: dữ liệu biểu tượng cảm xúc\n", - " modified: 2019-08-21T23:41:06Z\n", - " created: 2019-08-21T23:41:06Z\n", - " rowCount: 20\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/d\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'd\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac', u'processQuotes': True, u'skipDataLines': 34, u'limit': 20, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n", - " column 001: Column 1\n", - " column 002: Column 2\n", - " column 003: Column 3\n", - " column 004: Column 4\n", - " column 005: Column 5\n", - " column 006: Column 6\n", - "Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\n", - "00A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (©) COPYRIGHT SIGN\n", - "00AE ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (®) REGISTERED SIGN\n", - "203C ;\ttext ;\tL1 ;\tnone ;\ta j\t# V1.1 (‼) DOUBLE EXCLAMATION MARK\n", - "2049 ;\ttext ;\tL1 ;\tnone ;\ta j\t# V3.0 (⁉) EXCLAMATION QUESTION MARK\n", - "2122 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (™) TRADE MARK SIGN\n", - "2139 ;\ttext ;\tL1 ;\tnone ;\tj\t# V3.0 (ℹ) INFORMATION SOURCE\n", - "2194 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↔) LEFT RIGHT ARROW\n", - "2195 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↕) UP DOWN ARROW\n", - "2196 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↖) NORTH WEST ARROW\n", - "2197 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↗) NORTH EAST ARROW\n", - "2198 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↘) SOUTH EAST ARROW\n", - "2199 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↙) SOUTH WEST ARROW\n", - "21A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↩) LEFTWARDS ARROW WITH HOOK\n", - "21AA ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↪) RIGHTWARDS ARROW WITH HOOK\n", - "231A ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌚) WATCH\n", - "231B ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌛) HOURGLASS\n", - "2328 ;\ttext ;\tL2 ;\tnone ;\tx\t# V1.1 (⌨) KEYBOARD\n", - "23CF ;\ttext ;\tL2 ;\tnone ;\tx\t# V4.0 (⏏) EJECT SYMBOL\n", - "23E9 ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏩) BLACK RIGHT-POINTING DOUBLE TRIANGLE\n", - "23EA ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏪) BLACK LEFT-POINTING DOUBLE TRIANGLE\n" - ] - } - ], - "source": [ - "./openrefine-client --create \"data/cli/dữ liệu biểu tượng cảm xúc.txt\" \\\n", - "--format=tsv \\\n", - "--headerLines=0 \\\n", - "--skipDataLines=34 \\\n", - "--limit=20\n", - "./openrefine-client --info \"dữ liệu biểu tượng cảm xúc\"\n", - "./openrefine-client --export \"dữ liệu biểu tượng cảm xúc\"" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 2019865211741: dữ liệu biểu tượng cảm xúc\n" - ] - } - ], - "source": [ - "./openrefine-client --list" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2019865211741 has been successfully deleted\n" - ] - } - ], - "source": [ - "./openrefine-client --delete \"dữ liệu biểu tượng cảm xúc\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Bash", - "language": "bash", - "name": "bash" - }, - "language_info": { - "codemirror_mode": "shell", - "file_extension": ".sh", - "mimetype": "text/x-sh", - "name": "bash" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tests/cli_python2.ipynb b/tests/cli_python2.ipynb deleted file mode 100644 index 933ea3c..0000000 --- a/tests/cli_python2.ipynb +++ /dev/null @@ -1,2824 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test module cli in a Python 2 environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Install\n", - "\n", - "This notebook requires a Python 2.7 environment and an OpenRefine server running at http://127.0.0.1:3333." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[33mDEPRECATION: Python 2.7 will reach the end of its life on January 1st, 2020. Please upgrade your Python as Python 2.7 won't be maintained after that date. A future version of pip will drop support for Python 2.7. More details about Python 2 support in pip, can be found at https://pip.pypa.io/en/latest/development/release-process/#python-2-support\u001b[0m\n", - "Processing /home/felix/git/openrefine-client\n", - "Requirement already satisfied, skipping upgrade: urllib2_file in /home/felix/.local/lib/python2.7/site-packages (from openrefine-client==0.3.7) (0.2.1)\n", - "Installing collected packages: openrefine-client\n", - " Found existing installation: openrefine-client 0.3.7\n", - " Uninstalling openrefine-client-0.3.7:\n", - " Successfully uninstalled openrefine-client-0.3.7\n", - " Running setup.py install for openrefine-client ... \u001b[?25ldone\n", - "\u001b[?25hSuccessfully installed openrefine-client-0.3.7\n" - ] - } - ], - "source": [ - "import sys\n", - "!{sys.executable} -m pip install .. --user --upgrade" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tmp/tmp24HyYg\n" - ] - } - ], - "source": [ - "import tempfile\n", - "import shutil\n", - "import os\n", - "dirpath = tempfile.mkdtemp()\n", - "shutil.copytree('data',dirpath + '/data')\n", - "print(dirpath)\n", - "os.chdir(dirpath)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from google.refine import cli" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## README.md" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Download to file duplicates.csv complete\n" - ] - } - ], - "source": [ - "cli.download('https://git.io/fj5hF','duplicates.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2019539621291\n", - "rows: 10\n" - ] - } - ], - "source": [ - "p1 = cli.create('duplicates.csv')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### List" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 2019539621291: duplicates\n" - ] - } - ], - "source": [ - "cli.ls()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Info" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " id: 2019539621291\n", - " url: http://127.0.0.1:3333/project?project=2019539621291\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:03Z\n", - " created: 2019-08-21T23:31:02Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n" - ] - } - ], - "source": [ - "cli.info(p1.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Export" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "email\tname\tstate\tgender\tpurchase\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\tiPhone\n", - "danny.baron@example1.com\tD. Baron\tCA\tM\tWinter jacket\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\n", - "danny.baron@example1.com\tDaniel Baron\tCA\tM\tBike\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\tiPad\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\n" - ] - } - ], - "source": [ - "cli.export(p1.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Apply" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Download to file duplicates-deletion.json complete\n" - ] - } - ], - "source": [ - "cli.download('https://git.io/fj5ju','duplicates-deletion.json')" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File duplicates-deletion.json has been successfully applied to project 2019539621291\n" - ] - } - ], - "source": [ - "cli.apply(p1.project_id, 'duplicates-deletion.json')" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "email\tcount\tname\tstate\tgender\tpurchase\n", - "arthur.duff@example4.com\t2\tArthur Duff\tOR\tM\tDining table\n", - "ben.morisson@example6.org\t1\tBen Morisson\tFL\tM\tAmplifier\n", - "ben.tyler@example3.org\t1\tBen Tyler\tNV\tM\tFlashlight\n", - "danny.baron@example1.com\t3\tDanny Baron\tCA\tM\tTV\n", - "jean.griffith@example5.org\t1\tJean Griffith\tWA\tF\tPower drill\n", - "melanie.white@example2.edu\t2\tMelanie White\tNC\tF\tiPhone\n" - ] - } - ], - "source": [ - "cli.export(p1.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Export XLS" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "email\tcount\tname\tstate\tgender\tpurchase\n", - "arthur.duff@example4.com\t2\tArthur Duff\tOR\tM\tDining table\n", - "ben.morisson@example6.org\t1\tBen Morisson\tFL\tM\tAmplifier\n", - "ben.tyler@example3.org\t1\tBen Tyler\tNV\tM\tFlashlight\n", - "danny.baron@example1.com\t3\tDanny Baron\tCA\tM\tTV\n", - "jean.griffith@example5.org\t1\tJean Griffith\tWA\tF\tPower drill\n", - "melanie.white@example2.edu\t2\tMelanie White\tNC\tF\tiPhone\n" - ] - } - ], - "source": [ - "cli.export(p1.project_id, 'deduped.xls')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2019539621291 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p1.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Templating" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1716843473792\n", - "rows: 10\n" - ] - } - ], - "source": [ - "p2 = cli.create('duplicates.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ \"events\" : [\n", - " { \"name\" : \"Melanie White\", \"purchase\" : \"iPhone\" },\n", - " { \"name\" : \"Jean Griffith\", \"purchase\" : \"Power drill\" },\n", - " { \"name\" : \"Melanie White\", \"purchase\" : \"iPad\" }\n", - "] }" - ] - } - ], - "source": [ - "cli.templating(p2.project_id,\n", - "prefix='''{ \"events\" : [\n", - "''',\n", - "template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }',\n", - "rowSeparator=''',\n", - "''',\n", - "suffix='''\n", - "] }''',\n", - "filterQuery='^F$',\n", - "filterColumn='gender')" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: advanced_3.json\n" - ] - } - ], - "source": [ - "cli.templating(p2.project_id,\n", - "prefix='''{ \"events\" : [\n", - "''',\n", - "template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }',\n", - "rowSeparator=''',\n", - "''',\n", - "suffix='''\n", - "] }''',\n", - "filterQuery='^F$',\n", - "filterColumn='gender',\n", - "output_file='advanced.json',\n", - "splitToFiles=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: advanced_melanie.white@example2.edu.json\n" - ] - } - ], - "source": [ - "cli.templating(p2.project_id,\n", - "prefix='''{ \"events\" : [\n", - "''',\n", - "template=' { \"name\" : {{jsonize(cells[\"name\"].value)}}, \"purchase\" : {{jsonize(cells[\"purchase\"].value)}} }',\n", - "rowSeparator=''',\n", - "''',\n", - "suffix='''\n", - "] }''',\n", - "filterQuery='^F$',\n", - "filterColumn='gender',\n", - "output_file='advanced.json',\n", - "splitToFiles=True,\n", - "suffixById=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['advanced_jean.griffith@example5.org.json',\n", - " 'advanced_melanie.white@example2.edu.json',\n", - " 'advanced_3.json',\n", - " 'advanced_2.json',\n", - " 'advanced_1.json',\n", - " 'duplicates-deletion.json',\n", - " 'duplicates.csv',\n", - " 'data']" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.listdir(os.getcwd())" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 1716843473792 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p2.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Unicode" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### fruits" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1929957235590\n", - "rows: 5\n", - " id: 1929957235590\n", - " url: http://127.0.0.1:3333/project?project=1929957235590\n", - " name: evil-fruits\n", - " modified: 2019-08-21T23:35:47Z\n", - " created: 2019-08-21T23:35:47Z\n", - " rowCount: 5\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/evil-fruits.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'evil-fruits', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: 🔣\n", - " column 002: code\n", - " column 003: meaning\n", - "🔣\tcode\tmeaning\n", - "🍇\t1F347\tGRAPES\n", - "🍉\t1F349\tWATERMELON\n", - "🍒\t1F352\tCHERRIES\n", - "🍓\t1F353\tSTRAWBERRY\n", - "🍍\t1F34D\tPINEAPPLE\n" - ] - } - ], - "source": [ - "p1 = cli.create('data/cli/evil-fruits.tsv')\n", - "cli.info(p1.project_id)\n", - "cli.export(p1.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to file emojis.csv complete\n", - "🔣,code,meaning\n", - "🍇,1F347,GRAPES\n", - "🍉,1F349,WATERMELON\n", - "🍒,1F352,CHERRIES\n", - "🍓,1F353,STRAWBERRY\n", - "🍍,1F34D,PINEAPPLE\n", - "\n" - ] - } - ], - "source": [ - "cli.export(p1.project_id, output_file='emojis.csv')\n", - "with open('emojis.csv', 'r') as f:\n", - " print(f.read())" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{ \"emojis\" : [\n", - " { \"symbol\" : \"🍇\", \"meaning\" : \"GRAPES\" },\n", - " { \"symbol\" : \"🍉\", \"meaning\" : \"WATERMELON\" },\n", - " { \"symbol\" : \"🍍\", \"meaning\" : \"PINEAPPLE\" }\n", - "] }" - ] - } - ], - "source": [ - "cli.templating(p1.project_id,\n", - "prefix='''{ \"emojis\" : [\n", - "''',\n", - "template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }',\n", - "rowSeparator=''',\n", - "''',\n", - "suffix='''\n", - "] }''',\n", - "filterQuery='^1F34',\n", - "filterColumn='code')" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: trái cây_3.json\n" - ] - } - ], - "source": [ - "cli.templating(p1.project_id,\n", - "prefix='''{ \"emojis\" : [\n", - "''',\n", - "template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }',\n", - "rowSeparator=''',\n", - "''',\n", - "suffix='''\n", - "] }''',\n", - "filterQuery='^1F34',\n", - "filterColumn='code',\n", - "output_file='trái cây.json',\n", - "splitToFiles=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Export to files complete. Last file: trái cây_🍍.json\n" - ] - } - ], - "source": [ - "cli.templating(p1.project_id,\n", - "prefix='''{ \"emojis\" : [\n", - "''',\n", - "template=' { \"symbol\" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, \"meaning\" : {{jsonize(cells[\"meaning\"].value)}} }',\n", - "rowSeparator=''',\n", - "''',\n", - "suffix='''\n", - "] }''',\n", - "filterQuery='^1F34',\n", - "filterColumn='code',\n", - "output_file='trái cây.json',\n", - "splitToFiles=True,\n", - "suffixById=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['tr\\xc3\\xa1i c\\xc3\\xa2y_\\xf0\\x9f\\x8d\\x8d.json',\n", - " 'tr\\xc3\\xa1i c\\xc3\\xa2y_\\xf0\\x9f\\x8d\\x89.json',\n", - " 'tr\\xc3\\xa1i c\\xc3\\xa2y_\\xf0\\x9f\\x8d\\x87.json',\n", - " 'tr\\xc3\\xa1i c\\xc3\\xa2y_3.json',\n", - " 'tr\\xc3\\xa1i c\\xc3\\xa2y_2.json',\n", - " 'tr\\xc3\\xa1i c\\xc3\\xa2y_1.json',\n", - " 'emojis.csv',\n", - " 'advanced_jean.griffith@example5.org.json',\n", - " 'advanced_melanie.white@example2.edu.json',\n", - " 'advanced_3.json',\n", - " 'advanced_2.json',\n", - " 'advanced_1.json',\n", - " 'duplicates-deletion.json',\n", - " 'duplicates.csv',\n", - " 'data']" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.listdir(os.getcwd())" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2401578251107 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p1.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### emoji data" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2314250240290\n", - "rows: 20\n", - " id: 2314250240290\n", - " url: http://127.0.0.1:3333/project?project=2314250240290\n", - " name: dữ liệu biểu tượng cảm xúc\n", - " modified: 2019-08-21T23:36:05Z\n", - " created: 2019-08-21T23:36:05Z\n", - " rowCount: 20\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/d\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'd\\u1eef li\\u1ec7u bi\\u1ec3u t\\u01b0\\u1ee3ng c\\u1ea3m x\\xfac', u'processQuotes': True, u'skipDataLines': 34, u'limit': 20, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n", - " column 001: Column 1\n", - " column 002: Column 2\n", - " column 003: Column 3\n", - " column 004: Column 4\n", - " column 005: Column 5\n", - " column 006: Column 6\n", - "Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\n", - "00A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (©) COPYRIGHT SIGN\n", - "00AE ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (®) REGISTERED SIGN\n", - "203C ;\ttext ;\tL1 ;\tnone ;\ta j\t# V1.1 (‼) DOUBLE EXCLAMATION MARK\n", - "2049 ;\ttext ;\tL1 ;\tnone ;\ta j\t# V3.0 (⁉) EXCLAMATION QUESTION MARK\n", - "2122 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (™) TRADE MARK SIGN\n", - "2139 ;\ttext ;\tL1 ;\tnone ;\tj\t# V3.0 (ℹ) INFORMATION SOURCE\n", - "2194 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↔) LEFT RIGHT ARROW\n", - "2195 ;\ttext ;\tL1 ;\tnone ;\tz j\t# V1.1 (↕) UP DOWN ARROW\n", - "2196 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↖) NORTH WEST ARROW\n", - "2197 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↗) NORTH EAST ARROW\n", - "2198 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↘) SOUTH EAST ARROW\n", - "2199 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↙) SOUTH WEST ARROW\n", - "21A9 ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↩) LEFTWARDS ARROW WITH HOOK\n", - "21AA ;\ttext ;\tL1 ;\tnone ;\tj\t# V1.1 (↪) RIGHTWARDS ARROW WITH HOOK\n", - "231A ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌚) WATCH\n", - "231B ;\temoji ;\tL1 ;\tnone ;\tj\t# V1.1 (⌛) HOURGLASS\n", - "2328 ;\ttext ;\tL2 ;\tnone ;\tx\t# V1.1 (⌨) KEYBOARD\n", - "23CF ;\ttext ;\tL2 ;\tnone ;\tx\t# V4.0 (⏏) EJECT SYMBOL\n", - "23E9 ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏩) BLACK RIGHT-POINTING DOUBLE TRIANGLE\n", - "23EA ;\temoji ;\tL1 ;\tnone ;\tj w\t# V6.0 (⏪) BLACK LEFT-POINTING DOUBLE TRIANGLE\n" - ] - } - ], - "source": [ - "p1 = cli.create('data/cli/dữ liệu biểu tượng cảm xúc.txt',\n", - " project_format='tsv',\n", - " headerLines=0,\n", - " skipDataLines=34,\n", - " limit=20)\n", - "cli.info(p1.project_id)\n", - "cli.export(p1.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 2314250240290: dữ liệu biểu tượng cảm xúc\n", - " 1929957235590: evil-fruits\n" - ] - } - ], - "source": [ - "cli.ls()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Delete" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 1602939526221 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p1.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## CSV" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1675776970201\n", - "rows: 10\n", - " id: 1675776970201\n", - " url: http://127.0.0.1:3333/project?project=1675776970201\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:05Z\n", - " created: 2019-08-21T23:31:05Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "Project 1675776970201 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### encoding\n", - "\n", - "check TV symbol in line 1" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2268199900543\n", - "rows: 10\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "Project 2268199900543 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', encoding='ISO-8859-1')\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1798292162864\n", - "rows: 10\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "Project 1798292162864 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', encoding='UTF-8')\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### guessCellValueTypes\n", - "\n", - "check OpenRefine GUI at url below: numbers should be green" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2351526371150\n", - "rows: 10\n", - " id: 2351526371150\n", - " url: http://127.0.0.1:3333/project?project=2351526371150\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:05Z\n", - " created: 2019-08-21T23:31:05Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': True, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', guessCellValueTypes=True)\n", - "cli.info(p.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2351526371150 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### headerLines\n", - "\n", - "check column names, should be Column 1..." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1753036694840\n", - "rows: 11\n", - "Column 1\tColumn 2\tColumn 3\tColumn 4\tColumn 5\tColumn 6\tColumn 7\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "Project 1753036694840 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', headerLines=0)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### ignoreLines\n", - "\n", - "check column names, should start with arthur.duff as header" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1567779238383\n", - "rows: 5\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "Project 1567779238383 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', ignoreLines=5)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### limit\n", - "\n", - "should contain 5 rows" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2236287775552\n", - "rows: 5\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "Project 2236287775552 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', limit=5)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### separator and processQuotes\n", - "\n", - "should contain 10 rows and 2 columns (Column 2)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2493837924937\n", - "rows: 10\n", - "email,name,state,gender,purchase,count,date\tColumn 2\n", - "\"danny.baron@example1.com,Danny Baron,CA,M,TV (UTF-8: 📺),1,\"\"Wed, 4 Jul 2001\"\t\n", - "melanie.white@example2.edu,Melanie White,NC,F,,1,2001-07-04T12:08:56\t\n", - "danny.baron@example1.com, D.\t\"(\"\"Tab\"\") Baron,CA,M,Winter jacket,1,2001-07-04\"\n", - "ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight,1,2001/07/04\t\n", - "arthur.duff@example4.com,Arthur Duff,OR,M,Dining table,1,2001-07\t\n", - "danny.baron@example1.com,Daniel Baron,,,Bike,1,2001\t\n", - "jean.griffith@example5.org,Jean Griffith,WA,F,Power drill,1,2000\t\n", - "melanie.white@example2.edu,Melanie White,NC,F,'iPad',1,1999\t\n", - "ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier,1,1998\t\n", - "arthur.duff@example4.com,Arthur Duff,OR,M,Night table,1,1997\t\n", - "Project 2493837924937 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', separator=' ', processQuotes=False)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### projectName" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1568868311685\n", - "rows: 10\n", - " id: 1568868311685\n", - " url: http://127.0.0.1:3333/project?project=1568868311685\n", - " name: foo\n", - " modified: 2019-08-21T23:31:06Z\n", - " created: 2019-08-21T23:31:06Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'foo', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n", - "Project 1568868311685 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', projectName='foo')\n", - "cli.info(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### projectTags (introduced in OpenRefine 2.8)\n", - "\n", - "check manually at http://127.0.0.1:3333 > Open Project if tags where stored" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1889306695897\n", - "rows: 10\n", - " id: 1889306695897\n", - " url: http://127.0.0.1:3333/project?project=1889306695897\n", - " name: duplicates\n", - " tags: [u'client1', u'beta']\n", - " modified: 2019-08-21T23:31:06Z\n", - " created: 2019-08-21T23:31:06Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'projectTags': [u'client1', u'beta'], u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', projectTags=['client1', 'beta'])\n", - "cli.info(p.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 1889306695897 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### skipDataLines\n", - "\n", - "should contain 5 rows" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1906416549071\n", - "rows: 5\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "Project 1906416549071 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', skipDataLines=5)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### storeBlankCellsAsNulls\n", - "\n", - "check OpenRefine GUI at url below:\n", - "* All > View > Show/Hide 'null' values in cells\n", - "* row 6 should contain null values in columns state and gender" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1641203332364\n", - "rows: 10\n", - " id: 1641203332364\n", - " url: http://127.0.0.1:3333/project?project=1641203332364\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:06Z\n", - " created: 2019-08-21T23:31:06Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': True, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', guessCellValueTypes=True)\n", - "cli.info(p.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 1641203332364 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## TSV" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2332414205165\n", - "rows: 10\n", - " id: 2332414205165\n", - " url: http://127.0.0.1:3333/project?project=2332414205165\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:06Z\n", - " created: 2019-08-21T23:31:06Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.tsv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'trimStrings': False, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "danny.baron@example1.com\t\"D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "Project 2332414205165 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.tsv')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## JSON" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1978993820770\n", - "rows: 10\n", - " id: 1978993820770\n", - " url: http://127.0.0.1:3333/project?project=1978993820770\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:06Z\n", - " created: 2019-08-21T23:31:06Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: _ - name\n", - " column 002: _ - date\n", - " column 003: _ - email\n", - " column 004: _ - state\n", - " column 005: _ - count\n", - " column 006: _ - gender\n", - " column 007: _ - purchase\n", - "_ - name\t_ - date\t_ - email\t_ - state\t_ - count\t_ - gender\t_ - purchase\n", - "Danny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\tCA\t1\tM\tTV (UTF-8: 📺)\n", - "Melanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\tNC\t1\tF\t\n", - "\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\tCA\t1\tM\tWinter jacket\n", - "Ben Tyler\t2001/07/04\tben.tyler@example3.org\tNV\t1\tM\tFlashlight\n", - "Arthur Duff\t2001-07\tarthur.duff@example4.com\tOR\t1\tM\tDining table\n", - "Daniel Baron\t2001\tdanny.baron@example1.com\t\t1\t\tBike\n", - "Jean Griffith\t2000\tjean.griffith@example5.org\tWA\t1\tF\tPower drill\n", - "Melanie White\t1999\tmelanie.white@example2.edu\tNC\t1\tF\t'iPad'\n", - "Ben Morisson\t1998\tben.morisson@example6.org\tFL\t1\tM\tAmplifier\n", - "Arthur Duff\t1997\tarthur.duff@example4.com\tOR\t1\tM\tNight table\n", - "Project 1978993820770 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.json')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### trimStrings (broken, does not work in the GUI either)\n", - "\n", - "check row 3 if spaces before `D.` are deleted" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1892692171021\n", - "rows: 10\n", - " id: 1892692171021\n", - " url: http://127.0.0.1:3333/project?project=1892692171021\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:06Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': True, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: _ - name\n", - " column 002: _ - date\n", - " column 003: _ - email\n", - " column 004: _ - state\n", - " column 005: _ - count\n", - " column 006: _ - gender\n", - " column 007: _ - purchase\n", - "_ - name\t_ - date\t_ - email\t_ - state\t_ - count\t_ - gender\t_ - purchase\n", - "Danny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\tCA\t1\tM\tTV (UTF-8: 📺)\n", - "Melanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\tNC\t1\tF\t\n", - "\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\tCA\t1\tM\tWinter jacket\n", - "Ben Tyler\t2001/07/04\tben.tyler@example3.org\tNV\t1\tM\tFlashlight\n", - "Arthur Duff\t2001-07\tarthur.duff@example4.com\tOR\t1\tM\tDining table\n", - "Daniel Baron\t2001\tdanny.baron@example1.com\t\t1\t\tBike\n", - "Jean Griffith\t2000\tjean.griffith@example5.org\tWA\t1\tF\tPower drill\n", - "Melanie White\t1999\tmelanie.white@example2.edu\tNC\t1\tF\t'iPad'\n", - "Ben Morisson\t1998\tben.morisson@example6.org\tFL\t1\tM\tAmplifier\n", - "Arthur Duff\t1997\tarthur.duff@example4.com\tOR\t1\tM\tNight table\n", - "Project 1892692171021 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.json', trimStrings=True)\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### recordPath" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1945894618034\n", - "rows: 10\n", - " id: 1945894618034\n", - " url: http://127.0.0.1:3333/project?project=1945894618034\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:07Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_', u'purchase'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: purchase\n", - "purchase\n", - "TV (UTF-8: 📺)\n", - "\n", - "Winter jacket\n", - "Flashlight\n", - "Dining table\n", - "Bike\n", - "Power drill\n", - "'iPad'\n", - "Amplifier\n", - "Night table\n", - "Project 1945894618034 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.json', recordPath=['_', '_', 'purchase'])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### storeEmptyStrings\n", - "\n", - "default: True; set to False for null values\n", - "\n", - "check OpenRefine GUI at url below:\n", - "* All > View > Show/Hide 'null' values in cells\n", - "* row 6 should contain null values in columns state and gender" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2551263767214\n", - "rows: 10\n", - " id: 2551263767214\n", - " url: http://127.0.0.1:3333/project?project=2551263767214\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:07Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': False, u'fileSource': u'data/cli/duplicates.json', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'_', u'_'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: _ - name\n", - " column 002: _ - date\n", - " column 003: _ - email\n", - " column 004: _ - count\n", - " column 005: _ - purchase\n", - " column 006: _ - state\n", - " column 007: _ - gender\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.json', storeEmptyStrings=False)\n", - "cli.info(p.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2551263767214 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## XML" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1926835461545\n", - "rows: 80\n", - " id: 1926835461545\n", - " url: http://127.0.0.1:3333/project?project=1926835461545\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:07Z\n", - " rowCount: 80\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: root\n", - " column 002: root - record\n", - " column 003: root - record - name\n", - " column 004: root - record - date\n", - " column 005: root - record - email\n", - " column 006: root - record - count\n", - " column 007: root - record - purchase\n", - " column 008: root - record - state\n", - " column 009: root - record - gender\n", - "root\troot - record\troot - record - name\troot - record - date\troot - record - email\troot - record - count\troot - record - purchase\troot - record - state\troot - record - gender\n", - "\"\n", - " \"\t\"\n", - " \"\tDanny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\t1\tTV (UTF-8: 📺)\tCA\tM\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\tMelanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\t1\t\tNC\tF\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - "\"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\t1\tWinter jacket\tCA\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tBen Tyler\t2001/07/04\tben.tyler@example3.org\t1\tFlashlight\tNV\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tArthur Duff\t2001-07\tarthur.duff@example4.com\t1\tDining table\tOR\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tDaniel Baron\t2001\tdanny.baron@example1.com\t1\tBike\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tJean Griffith\t2000\tjean.griffith@example5.org\t1\tPower drill\tWA\tF\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tMelanie White\t1999\tmelanie.white@example2.edu\t1\t'iPad'\tNC\tF\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tBen Morisson\t1998\tben.morisson@example6.org\t1\tAmplifier\tFL\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tArthur Duff\t1997\tarthur.duff@example4.com\t1\tNight table\tOR\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "Project 1926835461545 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.xml')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### trimStrings (broken, does not work in the GUI either)\n", - "\n", - "check if spaces before `D.` are deleted" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1615744471501\n", - "rows: 80\n", - " id: 1615744471501\n", - " url: http://127.0.0.1:3333/project?project=1615744471501\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:07Z\n", - " rowCount: 80\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': True, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: root\n", - " column 002: root - record\n", - " column 003: root - record - name\n", - " column 004: root - record - date\n", - " column 005: root - record - email\n", - " column 006: root - record - count\n", - " column 007: root - record - purchase\n", - " column 008: root - record - state\n", - " column 009: root - record - gender\n", - "root\troot - record\troot - record - name\troot - record - date\troot - record - email\troot - record - count\troot - record - purchase\troot - record - state\troot - record - gender\n", - "\"\n", - " \"\t\"\n", - " \"\tDanny Baron\tWed, 4 Jul 2001\tdanny.baron@example1.com\t1\tTV (UTF-8: 📺)\tCA\tM\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - " \"\t\"\n", - " \"\tMelanie White\t2001-07-04T12:08:56\tmelanie.white@example2.edu\t1\t\tNC\tF\n", - "\"\n", - " \"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\"\n", - "\"\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\" D.\t(\"\"Tab\"\") Baron\"\t2001-07-04\tdanny.baron@example1.com\t1\tWinter jacket\tCA\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tBen Tyler\t2001/07/04\tben.tyler@example3.org\t1\tFlashlight\tNV\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tArthur Duff\t2001-07\tarthur.duff@example4.com\t1\tDining table\tOR\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tDaniel Baron\t2001\tdanny.baron@example1.com\t1\tBike\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tJean Griffith\t2000\tjean.griffith@example5.org\t1\tPower drill\tWA\tF\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tMelanie White\t1999\tmelanie.white@example2.edu\t1\t'iPad'\tNC\tF\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tBen Morisson\t1998\tben.morisson@example6.org\t1\tAmplifier\tFL\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\tArthur Duff\t1997\tarthur.duff@example4.com\t1\tNight table\tOR\tM\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "\t\"\n", - " \"\t\t\t\t\t\t\t\n", - "Project 1615744471501 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.xml', trimStrings=True)\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### recordPath" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1843370951454\n", - "rows: 10\n", - " id: 1843370951454\n", - " url: http://127.0.0.1:3333/project?project=1843370951454\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:07Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.xml', u'storeBlankRows': True, u'encoding': u'', u'recordPath': [u'root', u'record', u'purchase'], u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: purchase\n", - "purchase\n", - "TV (UTF-8: 📺)\n", - "\n", - "Winter jacket\n", - "Flashlight\n", - "Dining table\n", - "Bike\n", - "Power drill\n", - "'iPad'\n", - "Amplifier\n", - "Night table\n", - "Project 1843370951454 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.xml', recordPath=['root', 'record', 'purchase'])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### storeEmptyStrings\n", - "\n", - "default: True; set to False for null values\n", - "\n", - "check OpenRefine GUI at url below:\n", - "* All > View > Show/Hide 'null' values in cells\n", - "* row 6 should contain null values in columns state and gender" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2549624481101\n", - "rows: 10\n", - " id: 2549624481101\n", - " url: http://127.0.0.1:3333/project?project=2549624481101\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:07Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': False, u'fileSource': u'data/cli/duplicates.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.csv', storeEmptyStrings=False)\n", - "cli.info(p.project_id)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Project 2549624481101 has been successfully deleted\n" - ] - } - ], - "source": [ - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## TXT" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default (line-based)" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2029778313736\n", - "rows: 11\n", - " id: 2029778313736\n", - " url: http://127.0.0.1:3333/project?project=2029778313736\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:07Z\n", - " created: 2019-08-21T23:31:07Z\n", - " rowCount: 11\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'ignoreLines': -1, u'projectName': u'duplicates', u'processQuotes': True, u'skipDataLines': -1, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n", - " column 001: Column 1\n", - "Column 1\n", - "email name state gender purchase count date \n", - "danny.baron@example1.com Danny Baron CA M TV (UTF-8: 📺) 1 Wed, 4 Jul 2001 \n", - "melanie.white@example2.edu Melanie White NC F 1 2001-07-04T12:08:5\n", - "\"danny.baron@example1.com D.\t(\"\"Tab\"\") Baron CA M Winter jacket 1 2001-07-04 \"\n", - "ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 \n", - "arthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 \n", - "danny.baron@example1.com Daniel Baron Bike 1 2001 \n", - "jean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 \n", - "melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 \n", - "ben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 \n", - "arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 \n", - "Project 2029778313736 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.txt')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### linesPerRow\n", - "\n", - "should return 6 rows in 2 columns" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1614710460265\n", - "rows: 6\n", - " id: 1614710460265\n", - " url: http://127.0.0.1:3333/project?project=1614710460265\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:08Z\n", - " created: 2019-08-21T23:31:08Z\n", - " rowCount: 6\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'ignoreLines': -1, u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'skipDataLines': -1, u'separator': u',', u'trimStrings': False, u'linesPerRow': 2, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 0}]\n", - " column 001: Column 1\n", - " column 002: Column 2\n", - "Column 1\tColumn 2\n", - "email name state gender purchase count date \tdanny.baron@example1.com Danny Baron CA M TV (UTF-8: 📺) 1 Wed, 4 Jul 2001 \n", - "melanie.white@example2.edu Melanie White NC F 1 2001-07-04T12:08:5\t\"danny.baron@example1.com D.\t(\"\"Tab\"\") Baron CA M Winter jacket 1 2001-07-04 \"\n", - "ben.tyler@example3.org Ben Tyler NV M Flashlight 1 2001/07/04 \tarthur.duff@example4.com Arthur Duff OR M Dining table 1 2001-07 \n", - "danny.baron@example1.com Daniel Baron Bike 1 2001 \tjean.griffith@example5.org Jean Griffith WA F Power drill 1 2000 \n", - "melanie.white@example2.edu Melanie White NC F 'iPad' 1 1999 \tben.morisson@example6.org Ben Morisson FL M Amplifier 1 1998 \n", - "arthur.duff@example4.com Arthur Duff OR M Night table 1 1997 \t\n", - "Project 1614710460265 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.txt', linesPerRow=2)\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### fixed-width: columnWidths and headerLines" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 1729341878534\n", - "rows: 10\n", - " id: 1729341878534\n", - " url: http://127.0.0.1:3333/project?project=1729341878534\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:08Z\n", - " created: 2019-08-21T23:31:08Z\n", - " rowCount: 10\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'data/cli/duplicates.txt', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'limit': -1, u'separator': u',', u'trimStrings': False, u'columnWidths': [27, 21, 6, 7, 15, 6, 1000], u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False, u'headerLines': 1}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com \tDanny Baron \tCA \tM \tTV (UTF-8: 📺) \t1 \tWed, 4 Jul 2001 \n", - "melanie.white@example2.edu \tMelanie White \tNC \tF \t \t1 \t2001-07-04T12:08:5\n", - "danny.baron@example1.com \t\" D.\t(\"\"Tab\"\") Baron \"\tCA \tM \tWinter jacket \t1 \t2001-07-04 \n", - "ben.tyler@example3.org \tBen Tyler \tNV \tM \tFlashlight \t1 \t2001/07/04 \n", - "arthur.duff@example4.com \tArthur Duff \tOR \tM \tDining table \t1 \t2001-07 \n", - "danny.baron@example1.com \tDaniel Baron \t \t \tBike \t1 \t2001 \n", - "jean.griffith@example5.org \tJean Griffith \tWA \tF \tPower drill \t1 \t2000 \n", - "melanie.white@example2.edu \tMelanie White \tNC \tF \t'iPad' \t1 \t1999 \n", - "ben.morisson@example6.org \tBen Morisson \tFL \tM \tAmplifier \t1 \t1998 \n", - "arthur.duff@example4.com \tArthur Duff \tOR \tM \tNight table \t1 \t1997 \n", - "Project 1729341878534 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.txt', columnWidths=[27, 21, 6, 7, 15, 6, 1000], headerLines=1)\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ZIP" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default\n", - "\n", - "should contain 16 rows" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2279718038457\n", - "rows: 16\n", - " id: 2279718038457\n", - " url: http://127.0.0.1:3333/project?project=2279718038457\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:08Z\n", - " created: 2019-08-21T23:31:08Z\n", - " rowCount: 16\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}, {u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': False}]\n", - " column 001: email\n", - " column 002: name\n", - " column 003: state\n", - " column 004: gender\n", - " column 005: purchase\n", - " column 006: count\n", - " column 007: date\n", - "email\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "danny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "danny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "danny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺), Winter jacket, bike\t3\tWed, 4 Jul 2001, 2001-07-04, 2001\n", - "melanie.white@example2.edu\tMelanie White\tNC\tF\t, 'iPad'\t2\t2001-07-04T12:08:56, 1999\n", - "ben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "arthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2\t2001-07, 1997\n", - "jean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "ben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "Project 2279718038457 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.zip')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### includeFileSources\n", - "\n", - "should contain column File" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "id: 2100283089198\n", - "rows: 16\n", - " id: 2100283089198\n", - " url: http://127.0.0.1:3333/project?project=2100283089198\n", - " name: duplicates\n", - " modified: 2019-08-21T23:31:08Z\n", - " created: 2019-08-21T23:31:08Z\n", - " rowCount: 16\n", - "importOptionMetadata: [{u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': True}, {u'storeEmptyStrings': True, u'fileSource': u'duplicates2.csv', u'storeBlankRows': True, u'encoding': u'', u'projectName': u'duplicates', u'processQuotes': True, u'separator': u',', u'trimStrings': False, u'limit': -1, u'storeBlankCellsAsNulls': True, u'guessCellValueTypes': False, u'includeFileSources': True}]\n", - " column 001: File\n", - " column 002: email\n", - " column 003: name\n", - " column 004: state\n", - " column 005: gender\n", - " column 006: purchase\n", - " column 007: count\n", - " column 008: date\n", - "File\temail\tname\tstate\tgender\tpurchase\tcount\tdate\n", - "duplicates.csv\tdanny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺)\t1\tWed, 4 Jul 2001\n", - "duplicates.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t\t1\t2001-07-04T12:08:56\n", - "duplicates.csv\tdanny.baron@example1.com\t\" D.\t(\"\"Tab\"\") Baron\"\tCA\tM\tWinter jacket\t1\t2001-07-04\n", - "duplicates.csv\tben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "duplicates.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table\t1\t2001-07\n", - "duplicates.csv\tdanny.baron@example1.com\tDaniel Baron\t\t\tBike\t1\t2001\n", - "duplicates.csv\tjean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "duplicates.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t'iPad'\t1\t1999\n", - "duplicates.csv\tben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "duplicates.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tNight table\t1\t1997\n", - "duplicates2.csv\tdanny.baron@example1.com\tDanny Baron\tCA\tM\tTV (UTF-8: 📺), Winter jacket, bike\t3\tWed, 4 Jul 2001, 2001-07-04, 2001\n", - "duplicates2.csv\tmelanie.white@example2.edu\tMelanie White\tNC\tF\t, 'iPad'\t2\t2001-07-04T12:08:56, 1999\n", - "duplicates2.csv\tben.tyler@example3.org\tBen Tyler\tNV\tM\tFlashlight\t1\t2001/07/04\n", - "duplicates2.csv\tarthur.duff@example4.com\tArthur Duff\tOR\tM\tDining table, Night table\t2\t2001-07, 1997\n", - "duplicates2.csv\tjean.griffith@example5.org\tJean Griffith\tWA\tF\tPower drill\t1\t2000\n", - "duplicates2.csv\tben.morisson@example6.org\tBen Morisson\tFL\tM\tAmplifier\t1\t1998\n", - "Project 2100283089198 has been successfully deleted\n" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.zip', includeFileSources=True)\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ODS (broken in OpenRefine >=2.8)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default\n", - "\n", - "many blank columns and rows in OpenRefine <=2.7 (also with manual import via GUI)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "ename": "Exception", - "evalue": "Project not created", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mException\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcli\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data/cli/duplicates.ods'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mcli\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproject_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mcli\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexport\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproject_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mcli\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mproject_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/felix/.local/lib/python2.7/site-packages/google/refine/cli.pyc\u001b[0m in \u001b[0;36mcreate\u001b[0;34m(project_file, project_format, columnWidths, encoding, guessCellValueTypes, headerLines, ignoreLines, includeFileSources, limit, linesPerRow, processQuotes, projectName, projectTags, recordPath, separator, sheets, skipDataLines, storeBlankCellsAsNulls, storeBlankRows, storeEmptyStrings, trimStrings)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0mstore_blank_cells_as_nulls\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstoreBlankCellsAsNulls\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0minclude_file_sources\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mincludeFileSources\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 127\u001b[0;31m **kwargs)\n\u001b[0m\u001b[1;32m 128\u001b[0m \u001b[0mrows\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mproject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdo_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'get-rows'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'total'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrows\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/home/felix/.local/lib/python2.7/site-packages/google/refine/refine.pyc\u001b[0m in \u001b[0;36mnew_project\u001b[0;34m(self, project_file, project_url, project_name, project_format, encoding, separator, ignore_lines, header_lines, skip_data_lines, limit, store_blank_rows, guess_cell_value_types, process_quotes, store_blank_cells_as_nulls, include_file_sources, **opts)\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mRefineProject\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mserver\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mproject_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 281\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Project not created'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 282\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mException\u001b[0m: Project not created" - ] - } - ], - "source": [ - "p = cli.create('data/cli/duplicates.ods')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### sheets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "first sheet from file with 2 sheets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates2.ods', sheets=[0])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "both sheets from file with 2 sheets: should contain 16 rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates2.ods', sheets=[0, 1])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## XLS (broken in OpenRefine >=2.8)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates.xls')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### sheets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "first sheet from file with 2 sheets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates2.xls', sheets=[0])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "both sheets from file with 2 sheets: should contain 16 rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates2.xls', sheets=[0, 1])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## XLSX (broken in OpenRefine >=2.8)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### default" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates.xlsx')\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### sheets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "first sheet from file with 2 sheets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates2.xlsx', sheets=[0])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "both sheets from file with 2 sheets: should contain 16 rows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "p = cli.create('data/cli/duplicates2.xlsx', sheets=[0, 1])\n", - "cli.info(p.project_id)\n", - "cli.export(p.project_id)\n", - "cli.delete(p.project_id)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.16" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tests/create-csv-encoding.sh b/tests/create-csv-encoding.sh new file mode 100644 index 0000000..fb10341 --- /dev/null +++ b/tests/create-csv-encoding.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}-utf8.csv" +a,b,c +1,2,3 +ä,é,ß +$,\,' +DATA +iconv -f UTF-8 -t ISO-8859-1 "tmp/${t}/${t}-utf8.csv" > "tmp/${t}/${t}.csv" + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +ä é ß +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --encoding "ISO-8859-1" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-guessCellValueTypes.sh b/tests/create-csv-guessCellValueTypes.sh new file mode 100644 index 0000000..09fe3ca --- /dev/null +++ b/tests/create-csv-guessCellValueTypes.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +01,02,03 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +1 2 3 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --guessCellValueTypes "true" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-headerLines.sh b/tests/create-csv-headerLines.sh new file mode 100644 index 0000000..5548345 --- /dev/null +++ b/tests/create-csv-headerLines.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Column 1 Column 2 Column 3 +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --headerLines "0" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-ignoreLines.sh b/tests/create-csv-ignoreLines.sh new file mode 100644 index 0000000..fa04da9 --- /dev/null +++ b/tests/create-csv-ignoreLines.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --ignoreLines "1" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-limit.sh b/tests/create-csv-limit.sh new file mode 100644 index 0000000..3141391 --- /dev/null +++ b/tests/create-csv-limit.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +0 0 0 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --limit "2" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-processQuotes.sh b/tests/create-csv-processQuotes.sh new file mode 100644 index 0000000..a6d09de --- /dev/null +++ b/tests/create-csv-processQuotes.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,"2,0",3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c Column 4 +1 2 0 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --processQuotes "false" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-projectTags.sh b/tests/create-csv-projectTags.sh new file mode 100644 index 0000000..cf0f450 --- /dev/null +++ b/tests/create-csv-projectTags.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +if [[ "${version:0:1}" = "2" ]]; then + echo "projectTags were introduced in OpenRefine 3.0" + exit 200 +else + cat << "DATA" > "tmp/${t}/${t}.assert" + tags: [u'beta', u'client1'] +DATA +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --projectTags "beta" --projectTags "client1" +${cmd} --info "${t}" | grep ' tags: ' > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-separator.sh b/tests/create-csv-separator.sh new file mode 100644 index 0000000..175cb45 --- /dev/null +++ b/tests/create-csv-separator.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a;b;c +1;2;3 +0;0;0 +$;\;' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --separator ";" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-skipDataLines.sh b/tests/create-csv-skipDataLines.sh new file mode 100644 index 0000000..fb2650a --- /dev/null +++ b/tests/create-csv-skipDataLines.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --skipDataLines "1" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-storeBlankCellsAsNulls.sh b/tests/create-csv-storeBlankCellsAsNulls.sh new file mode 100644 index 0000000..669b040 --- /dev/null +++ b/tests/create-csv-storeBlankCellsAsNulls.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,,0 +$,\,' +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "b", + "expression": "grel:isNull(value)", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 false 3 +0 false 0 +$ false ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --storeBlankCellsAsNulls "false" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-storeBlankRows.sh b/tests/create-csv-storeBlankRows.sh new file mode 100644 index 0000000..117d020 --- /dev/null +++ b/tests/create-csv-storeBlankRows.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +,, +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --storeBlankRows "false" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv-utf8.sh b/tests/create-csv-utf8.sh new file mode 100644 index 0000000..055a371 --- /dev/null +++ b/tests/create-csv-utf8.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +⌨ code meaning +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --projectName "${t} biểu tượng cảm xúc ⛲" +${cmd} --export "${t} biểu tượng cảm xúc ⛲" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-csv.sh b/tests/create-csv.sh new file mode 100644 index 0000000..e617a88 --- /dev/null +++ b/tests/create-csv.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-json-recordPath.sh b/tests/create-json-recordPath.sh new file mode 100644 index 0000000..9ae0cd7 --- /dev/null +++ b/tests/create-json-recordPath.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.json" +{ + "rows":[ + { + "a":1, + "b":2, + "c":3 + }, + { + "a":0, + "b":0, + "c":0 + }, + { + "a":"$", + "b":"\\", + "c":"\"" + } + ] +} +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +_ - a _ - b _ - c +1 2 3 +0 0 0 +$ \ """" +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.json" --recordPath "_" --recordPath "rows" --recordPath "_" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-json-storeEmptyStrings.sh b/tests/create-json-storeEmptyStrings.sh new file mode 100644 index 0000000..d2ebd55 --- /dev/null +++ b/tests/create-json-storeEmptyStrings.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.json" +[ + { + "a": 1, + "b": 2, + "c": 3 + }, + { + "a": "", + "b": "", + "c": "" + }, + { + "a": "$", + "b": "\\", + "c": "\"" + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +_ - a _ - b _ - c +1 2 3 +$ \ """" +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.json" --storeEmptyStrings "false" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-json-trimStrings.sh b/tests/create-json-trimStrings.sh new file mode 100644 index 0000000..20c27b5 --- /dev/null +++ b/tests/create-json-trimStrings.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.json" +[ + { + "a": 1, + "b": 2, + "c": 3 + }, + { + "a": "0", + "b": " 0", + "c": "0 " + }, + { + "a": "$", + "b": "\\", + "c": "\"" + } +] +DATA + +# ================================= ASSERTION ================================ # + +if [[ "${version:0:1}" = "2" || "${version}" = "3.0" || "${version}" = "3.1" || "${version}" = "3.2" || "${version}" = "3.3" ]]; then + echo "trimStrings option does not work in OpenRefine <=3.3" + echo "https://github.com/OpenRefine/OpenRefine/issues/2409" + exit 200 +else + cat << "DATA" > "tmp/${t}/${t}.assert" +_ - a _ - b _ - c +1 2 3 +0 0 0 +$ \ """" +DATA +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.json" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-json-utf8.sh b/tests/create-json-utf8.sh new file mode 100644 index 0000000..c3f1404 --- /dev/null +++ b/tests/create-json-utf8.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.json" +[ + { + "⌨": "⛲", + "code": "1F347", + "meaning": "FOUNTAIN" + }, + { + "⌨": "⛳", + "code": "1F349", + "meaning": "FLAG IN HOLE" + }, + { + "⌨": "⛵", + "code": "1F352", + "meaning": "SAILBOAT" + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +_ - ⌨ _ - code _ - meaning +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.json" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-json.sh b/tests/create-json.sh new file mode 100644 index 0000000..01025da --- /dev/null +++ b/tests/create-json.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.json" +[ + { + "a": 1, + "b": 2, + "c": 3 + }, + { + "a": 0, + "b": 0, + "c": 0 + }, + { + "a": "$", + "b": "\\", + "c": "\"" + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +_ - a _ - b _ - c +1 2 3 +0 0 0 +$ \ """" +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.json" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-ods-sheets-utf8.sh b/tests/create-ods-sheets-utf8.sh new file mode 100644 index 0000000..a9e4252 --- /dev/null +++ b/tests/create-ods-sheets-utf8.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cp "data/example.ods" "tmp/${t}/${t}.ods" + +# ================================= ASSERTION ================================ # + +if [[ "${version}" = "2.7" ]]; then + cat << "DATA" > "tmp/${t}/${t}.assert" +⌨ code meaning Column Column 5 Column 6 Column 7 Column 8 +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA +else + #TODO + echo "https://github.com/opencultureconsulting/openrefine-client/issues/4" + exit 200 +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.ods" --sheets 1 +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-ods.sh b/tests/create-ods.sh new file mode 100644 index 0000000..e13079e --- /dev/null +++ b/tests/create-ods.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cp "data/example.ods" "tmp/${t}/${t}.ods" +#a b c +#1 2 3 +#0 0 0 +#$ \ ' + +# ================================= ASSERTION ================================ # + +if [[ "${version}" = "2.7" ]]; then + cat << "DATA" > "tmp/${t}/${t}.assert" +a b c Column Column 5 Column 6 Column 7 Column 8 +1.0 2.0 3.0 +0.0 0.0 0.0 +$ \ ' +DATA +else + #TODO + echo "https://github.com/opencultureconsulting/openrefine-client/issues/4" + exit 200 +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.ods" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-tsv-utf8.sh b/tests/create-tsv-utf8.sh new file mode 100644 index 0000000..9e918c9 --- /dev/null +++ b/tests/create-tsv-utf8.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.tsv" +⌨ code meaning +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.tsv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.csv" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.csv" diff --git a/tests/create-tsv.sh b/tests/create-tsv.sh new file mode 100644 index 0000000..de71fb1 --- /dev/null +++ b/tests/create-tsv.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.tsv" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.tsv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.csv" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.csv" diff --git a/tests/create-txt-fixed-width-headerLines.sh b/tests/create-txt-fixed-width-headerLines.sh new file mode 100644 index 0000000..378dd65 --- /dev/null +++ b/tests/create-txt-fixed-width-headerLines.sh @@ -0,0 +1,80 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.txt" +1 2 3 +mon tue wed +$2 $300 $1 +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "1", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "2", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "3", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +1 2 3 +mon tue wed +$2 $300 $1 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.txt" --columnWidths "6" --columnWidths "6" --columnWidths "6" --headerLines "1" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-txt-fixed-width-utf8.sh b/tests/create-txt-fixed-width-utf8.sh new file mode 100644 index 0000000..c650a4d --- /dev/null +++ b/tests/create-txt-fixed-width-utf8.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.txt" +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Column 1", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Column 2", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Column 3", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Column 1 Column 2 Column 3 +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.txt" --columnWidths "6" --columnWidths "6" --columnWidths "60" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-txt-fixed-width.sh b/tests/create-txt-fixed-width.sh new file mode 100644 index 0000000..a1679d0 --- /dev/null +++ b/tests/create-txt-fixed-width.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.txt" +1 2 3 +mon tue wed +$2 $300 $1 +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Column 1", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Column 2", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Column 3", + "expression": "grel:value.trim()", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Column 1 Column 2 Column 3 +1 2 3 +mon tue wed +$2 $300 $1 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.txt" --columnWidths "6" --columnWidths "6" --columnWidths "6" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-txt-linesPerRow.sh b/tests/create-txt-linesPerRow.sh new file mode 100644 index 0000000..d913328 --- /dev/null +++ b/tests/create-txt-linesPerRow.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.txt" +mon tue wed +$2 $300 $1 +thu fri sat +$70 $20 $50 +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Column 1 Column 2 +mon tue wed $2 $300 $1 +thu fri sat $70 $20 $50 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.txt" --linesPerRow "2" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-txt.sh b/tests/create-txt.sh new file mode 100644 index 0000000..b6a8558 --- /dev/null +++ b/tests/create-txt.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.txt" +1 2 3 +mon tue wed +$2 $300 $1 +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Column 1 +1 2 3 +mon tue wed +$2 $300 $1 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.txt" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-xls-sheets-utf8.sh b/tests/create-xls-sheets-utf8.sh new file mode 100644 index 0000000..67ede5f --- /dev/null +++ b/tests/create-xls-sheets-utf8.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cp "data/example.xls" "tmp/${t}/${t}.xls" + +# ================================= ASSERTION ================================ # + +if [[ "${version}" = "2.7" ]]; then + cat << "DATA" > "tmp/${t}/${t}.assert" +⌨ code meaning +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA +else + #TODO + echo "https://github.com/opencultureconsulting/openrefine-client/issues/4" + exit 200 +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.xls" --sheets 1 +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-xls.sh b/tests/create-xls.sh new file mode 100644 index 0000000..711b4cf --- /dev/null +++ b/tests/create-xls.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cp "data/example.xls" "tmp/${t}/${t}.xls" +#a b c +#1 2 3 +#0 0 0 +#$ \ ' + +# ================================= ASSERTION ================================ # + +if [[ "${version}" = "2.7" ]]; then + cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1.0 2.0 3.0 +0.0 0.0 0.0 +$ \ ' +DATA +else + #TODO + echo "https://github.com/opencultureconsulting/openrefine-client/issues/4" + exit 200 +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.xls" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-xlsx-sheets-utf8.sh b/tests/create-xlsx-sheets-utf8.sh new file mode 100644 index 0000000..c90108d --- /dev/null +++ b/tests/create-xlsx-sheets-utf8.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cp "data/example.xlsx" "tmp/${t}/${t}.xlsx" + +# ================================= ASSERTION ================================ # + +if [[ "${version}" = "2.7" ]]; then + cat << "DATA" > "tmp/${t}/${t}.assert" +⌨ code meaning +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA +else + #TODO + echo "https://github.com/opencultureconsulting/openrefine-client/issues/4" + exit 200 +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.xlsx" --sheets 1 +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-xlsx.sh b/tests/create-xlsx.sh new file mode 100644 index 0000000..90fefe8 --- /dev/null +++ b/tests/create-xlsx.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + version="${2}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cp "data/example.xlsx" "tmp/${t}/${t}.xlsx" +#a b c +#1 2 3 +#0 0 0 +#$ \ ' + +# ================================= ASSERTION ================================ # + +if [[ "${version}" = "2.7" ]]; then + cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1.0 2.0 3.0 +0.0 0.0 0.0 +$ \ ' +DATA +else + #TODO + echo "https://github.com/opencultureconsulting/openrefine-client/issues/4" + exit 200 +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.xlsx" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-xml-recordPath.sh b/tests/create-xml-recordPath.sh new file mode 100644 index 0000000..d995255 --- /dev/null +++ b/tests/create-xml-recordPath.sh @@ -0,0 +1,91 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.xml" + + + + 1 + 2 + 3 + + + 0 + 0 + 0 + + + $ + \ + ' + + +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/column-removal", + "columnName": "record" + }, + { + "op": "core/row-removal", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "Blank Rows", + "expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()", + "columnName": "", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": "true", + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "record-based" + } + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +record - a record - b record - c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.xml" --recordPath "root" --recordPath "record" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-xml-utf8.sh b/tests/create-xml-utf8.sh new file mode 100644 index 0000000..b026ab3 --- /dev/null +++ b/tests/create-xml-utf8.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.xml" + + + + + 1F347 + FOUNTAIN + + + + 1F349 + FLAG IN HOLE + + + + 1F352 + SAILBOAT + + +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/column-removal", + "columnName": "root" + }, + { + "op": "core/column-removal", + "columnName": "root - record" + }, + { + "op": "core/row-removal", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "Blank Rows", + "expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()", + "columnName": "", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": "true", + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "record-based" + } + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +root - record - icon root - record - code root - record - meaning +⛲ 1F347 FOUNTAIN +⛳ 1F349 FLAG IN HOLE +⛵ 1F352 SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.xml" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-xml.sh b/tests/create-xml.sh new file mode 100644 index 0000000..c5f1fa9 --- /dev/null +++ b/tests/create-xml.sh @@ -0,0 +1,95 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.xml" + + + + 1 + 2 + 3 + + + 0 + 0 + 0 + + + $ + \ + ' + + +DATA + +cat << "DATA" > "tmp/${t}/${t}.transform" +[ + { + "op": "core/column-removal", + "columnName": "root" + }, + { + "op": "core/column-removal", + "columnName": "root - record" + }, + { + "op": "core/row-removal", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "Blank Rows", + "expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()", + "columnName": "", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": "true", + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "record-based" + } + } +] +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +root - record - a root - record - b root - record - c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.xml" +${cmd} --apply "tmp/${t}/${t}.transform" "${t}" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-zip-includeFileSources.sh b/tests/create-zip-includeFileSources.sh new file mode 100644 index 0000000..a2f1158 --- /dev/null +++ b/tests/create-zip-includeFileSources.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}-1.csv" +a,b,c +1,2,3 +DATA + +cat << "DATA" > "tmp/${t}/${t}-2.csv" +a,b,c +4,5,6 +DATA + +zip "tmp/${t}/${t}.zip" "tmp/${t}/${t}-1.csv" "tmp/${t}/${t}-2.csv" + +# ================================= ASSERTION ================================ # + +cat << DATA > "tmp/${t}/${t}.assert" +File a b c +tmp/${t}/${t}-1.csv 1 2 3 +tmp/${t}/${t}-2.csv 4 5 6 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.zip" --includeFileSources "true" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/create-zip.sh b/tests/create-zip.sh new file mode 100644 index 0000000..99023d1 --- /dev/null +++ b/tests/create-zip.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}-1.csv" +a,b,c +1,2,3 +DATA + +cat << "DATA" > "tmp/${t}/${t}-2.csv" +a,b,c +4,5,6 +DATA + +zip "tmp/${t}/${t}.zip" "tmp/${t}/${t}-1.csv" "tmp/${t}/${t}-2.csv" + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +4 5 6 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.zip" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/data/example.ods b/tests/data/example.ods new file mode 100644 index 0000000000000000000000000000000000000000..abd5abd136a5663c80bb370930a7434d05e99873 GIT binary patch literal 9427 zcmdUVWmsIxvi4xXgAGn_hd^+b0Kq-D1_qbG-625GK!Akc?(VK3xO;GScmLp=FDHBN zyR+|ee%_jA&GedQ)!VhYSJ&#QdgY~{pn(7Y900JD52fI5!4=E^005qj$G-rUrj~~G zF4l&6*47rLAU%6iD=?!Y*nq)G&(742!OGeYY+wa)urvhQGuT<%8tNI?nHU<{%l`@U z5#~RF^Vkx$0vnkcJJ|jU&5oJT-pa~CU(c5Dztb|Zu+pdSFu{Lpys0TLYv1 zsD7~DE7bP~y#9s*Q-X{zFt*e^wPwjJYyoO8jK^{4^G)GVDhe5S+sR`qMWQSV;T7?F zdIVh|WjU=%tRNbS9&wY(EjsRchT6;-?jKWCbVPA4j?DZrlWFUmz_eVtpZ#BQAXyBS zr%^k{o$$&vkhYF<+K3~4uLy^OCkzuZJzI6h;~H3oH&Ruw-${vZxgh2Xnd8n)DMQe! zQfv&YB98PC41<^Md2jDWP$w(Z!SeY;fvYujZ@KebcN^*k;Q8}|%@54g+46^-L z5=9g@$#F@U%z0A%!7^~z3*2&v2ai^Fi?a9_TARbrWT*8+GzP+fFa({Js*z3PU@~9G zp;@@x9pvj;H>AnU)LW(4Y^rV_`1bl|6^QuO%0p5Py=HXpf20_1gd?qtZUGZNbxq;D z6++hnDb&)sM_ONpQg#r}rwo`G<*8Ijt0GtQ*%*bVY+&ui7|u`NB*%<-fgM6lcsuVM zMPYPnK*llI(48soau$|3gOB>~Kv>S!OanY0BbS+UOo;9d;70dvQAe-zmw+)Rj)CgE zZF2OL_GOAB)$&}RUNQM&0*ih-D0B-SE1~o;F^{7XUEb(udGt>X;-6k*7Cdy()(ygS zAJd0tb58~fS)a?sm5>|R5DuU6XJ2ur!z7jZJanyhjGH_cu~MH*-7K(D_gv^$-P!J3 zU7b6t41QQc`0=SJHT$${BBk{@T+^lGu)i3qIE}T3)U`UMj_1IuQ1wO`PYZC9&erV1 zlklmYF;R&$A--V(e}_>n#i6ldLW1zJA^$mo5SV+9{X-g4_v>7WFR~JLINbfFzSioC z`2v6iSIaelr}^fBDk`pfobUX9oNt83=Vqs|aG~;s`mR2+DsP|2}B-AY>5K%NU zrsi%zeua!~=6sMU+Zm&YWSI!$PgYggv%oZR0s6oK{k zEpq!}#Tn*~x}09bp`w+L*V#W@U4DA@Wg4>G)A8uMDZ$P3L2TTj8>T5%aua2UR&y#X0RH_p+sh$aQ9F2@XyS^mc z;+Gr7_m@1ylVYu}n`b1Dt*M@V3OM!&F&<_z0YX6$Yc9zsx-6mQP`u7S@Te z;1xhF9b|s948Fd8l;f6dcK0d?GF$)FNt>$Tg@#v?Hs^ zF*U*Jc#Zv?e zrH%p@Cn4&MN~KSHz2DJ-#4{wN#Z_Xhz?}8jrQ0|%C|dFwPSV%(`IyL+ThRFqa$g6B zcs5xd8r3&PzmR@eVtzH*cdV;*7WMHA;A>H zkx-5yQZG#(lYVrW-d!F1Q6_cq(hr)skDJIEGEeV{CWUFG`pf{b@R-SV!NCe7_+`Ni4%>igigT9M?o}M%|=`Olf zKS|&|amV$U>9Q9dHiKYUHo<}`+j|!No(tdVbPuAz{iS+Kf$ulndIwZ;t9PsK=e?lK z38RN#w_zyhT%*AtrEXQ4;dhq)-5cWDg`ZSWaoPeSJb*Uj=zWct7tv$01rBuec2^ZVYK~ma+XTX~}d` zS*=>bZ&BTZI_>)vT#LPx?S-y!@lAc0a7Ur0O9c-Gjzoh9MUoXd6)#`wRd&lNFEzuX z5oFH9IY@_L=$pBV(_G)5Y`i$xqLDg^14qwhcYz}HshUVK`Zhkw_7qI=5uFKRZ@CD=3Y(IcPNwk&iiu-(|E!R%j>BMnDtRdXtw=~-Sqj3 z4mGA_%oqL&XlE_>r3!4#3zhp$HTWSdK@V|!lL89v?bp?b#oo=mODv@6AMVZ1&qo}ysCa4W z_g-|>0cS_5GZSAwXD&J(U2|T9A>~oLq0sNukz+10i8Xf=jL4hi`+%PqB=FG7Y(w4l#@rIV-qKU~8 z4%*lASSMjk1Q%3I8-u)F+YBDmly_$y=KjwZylfHtR_c|_<#S!@ybpks0n?XPr?(vz zMS*IuR$KO}coTPSb+h!O0eO(?h5fet#R#jGR6T}cd~PZG0$v*Tuut!;Y}#aX?`Hr2 zKgB=oQxG5DTOcd2{ZmNr_~P1AmjSP^1LrS*z8dyoqXO`8IpMHd(%CXQ_(&mf#3r`7(QI2?dPV+3*FT?&k zcu3lTZ<;cAHFRo-_7~DwF8X)NbCDqTSw2b%z^fZXFp@zDyod;|z^2Pu3X7GNNF$X) z@2|v;j4&O%ObS(3>TyLeZ0G|w?VIy4Vej|EPPKE&jK1c^6|ekuA)r?uh%|$vmG&iA zu@r@m@Hv%p5DgP_A(xZ@BLzDh#(oS=Vsi3|MYXTle5&hvucoAhD>^LnZ)BF@tK~gD zm82i$e09zX0eWshUskfOJ?=hLRQi zV!9x#nZpSSn4P_1n8{3GBXM25kiuRt^hbWjFFtcGQ+;;L4bou{48#--hE&6l1i@Mq zqs1oQgV1p$wn3D!n8VJjILdp(nYJqAKIU%KBfaQIsr<_xv4GrgTf zz<`Jr1W4csrsHH8?Ct?suan@sR3*&R`{M*D(fT{T5uT~^nVXn!w6(^D22APG7IR`$ zB&(wP*X@0Z0cXwjT*$c@6;fJtrFpp-EY{gazemh+rNW_H^Lq}(G3o~sL;D;pr}fUy zIzBDHE)emYz&J%wsLM;9_>Gb|9v<2ISmjP2XZvKySH7Z+v$6yHVMl*+-J_a|oW9{O z{m*(EHU@5?g?qd?&l1>*d7!^@F3cy|pB_aGI0UOr6d{6~ROXRw+wfEoA!gC!1>FOv zPyCZ)PLA_FC*ICITWU+R%NeH+8h8=pjgtVTU|rRBTk@39&us=T9lx10&8c#(FhB!B zM5oKvP9Tf)g`ueHVsw-rw|$h4q5~&5S;&x7oY?%n*6n z@6S7ynM3J@zS!?^DwRbiOC7BF$jqlqVlWuM*hHZG9btbO;Ekb7uDAO6)7cwfJHV*N zIxA7o6|tjBtm2*ge9BN90~^+1^xK)^mDbDCJ`|RFHtxAuC2S6gQ7k64-1464-R0n; zH3Z5QYinEXXDfm5RB99Jrwh$-sEi+33*)6 z$*j>kY7v&L*+WKyP&G1X@`^*`K<1D@B^zTsAU>i*b$u8?KYEU_fd-#F$HI{S>5b48^Aamzb3vCrNGD5351QDrY!k%!ZmV7dGo|D2LN4YUX zU%I0)e7NLNA4x^8pm0&nEZX;Uilkxij9;$W|59DWnaB2!!ZwR= z)w@oN`U$7H6sq&kbDpazjnCCF;vA3Yf|>u`DOAoCH<^G=@1qk~w=6SC(-|VMuz}6H z)z$oT4R^1brS<#c?e%f;)xmVC)!Q2QuezQaT63HW3j@s_Y!X*HQxaSr_u4x&SO*Nn z>kG3w^UW>rCK#`GbPXy67+9MiZB}?n-OV5D%+5KUJY;TUVDA<*05JC$VE;andkTc@ z4DIbr!N&jE+wW;;+Wp`JdM@ZjE;&$j#InAW7V6Z3l<@XudUi`W1F2-tP1ntIxTDG* zAwe^sSi-Vrag18>(qm(DbF_9T-bxBZmsj{Z8bx(&8x@5UM3Atyh*5QIs*TeX+eP8= zByyr`Bvdbh10)K%O?N7)EnIWNC_E!iv~*+w>MO-8Z!#Uq7QY7i1m-i6X&Jg^LhW#W z%YAw#(vX>1Ex6`{!9cxB8hXz(i_eVFl9}$e_8ifQ*UYcl=1s6rR4~_Qn`=FBU+BKb zpK^>T)t@;imQG)%<>A@=tXn%>WO!l|1bs?6qh(=YrG z#xX;J^t-N|6D~bo6BZFEN}FBvdUi#z$es{v5h2!vc_KVCsoMk`6t#NI8c#IVaHw3% zk99u+bx%Rcpe5v_3q(RWP~a6i_Ad-qzzMk6Px`F&O2S)fT`|z7kw)))re8HRgbJWY(7grs!#vLz zEvBz44WwqYYk-JE%rIIGymiUPI-BvHTy_8p&ea6RaIqXTQJ?9Qfysj9Ote0ncoC@@ zK#rH-TjclmMG#+>C5$lNO1HBoKp4l6ntSpGn>7cTUN|K#^s=LOWXg7QIA9 z=>E;`qw^se%KgO=@s^F&TLQQ66*IEbgz5K@<#!3~)Zf|P1+9T3xav%<7wyoK$sm-~ zMHyX@L>)Ha-FZYqVQ|CFHnxb8;F_i0s0>XfT*6g~G8w{drRiWRpPdZPg>Y1{L8Vs! zzQn;24x3K-w)oTIlavxghEqbFy4?feLg5Gsc;of?y>FKIFiDtJG?`sm6d*|wL4G2h zERnLt1Z8gg#4Q{-&uhGIG9-mto~ad?j=J_r`#!HFSJl@h;oAzD2NT`Dv$0{gB`(oE z4Rubtrtu+CxmVh@SgfR2_KWEg!J#lo>>Cl_>7QnIK|xuRCs^oMn1#dnFm+0|cN<_V zymiY*)SfAA7s0r1&=cho9|B`a0P(3Ywu-f#=YtEWGu!Hr$6L{0S!zSZBL*L;w^osQ z%f}2sZvv+EtGJ}(R8)tngwS@KkS$u4&Xh6HT2yKghBdr4FkkTGU3TJK_}|~@Y>lxk z<*Xxw_Enz8-<=^EdDWcbPt^2!8`JJB8H^X2>$%N#ga?*lG$Fk()eu9xW}HNpiAPD3 zQ@zJ`rpVwpi0&5DoDf$oIxE)?JCJf6)q)!D1Q!nNx*r54YUo*p5xJYsx>ST5zYUKM zby!d-mTWgNU8^Eoe>dxukdyhUOm)yiWTEzD>C5y*8PVR#@rw?FTfvOb4ue(fgfs@y zgrw9X*_TK&4wfw0mn9|Nm0`wWlvPiJ#kBP&S!xL`Hd!48&EjZr9F9Ulog3EV#`JR2 zukQ4Oi)YL#!d-;7Ig6LJXBB0jLOpIPOsI#;Dn+Y}W5@GcRLx?ID%0ana+&Da>>Ja! zUb0lgug5UazNGiliBBKwJC}Up=IC~#X=KU_f%cB;gC;z80wKe2^KB80#YAS&`i;#c z$bqMN7fPhGj(}@Xh6SeqH3k0)HHe&>rN5iD5!c(;qz`J460jtY zt6LW4sqGRu+;O?VnK#Pxi}d-{i1;?6v2||4Z_fVylHu_~OS#Bi$pfK34>XFG5wCdvhK_i^ZtTZg{zxGA5sL zgVuDPyu1BaEPe(I0DwyK)4Tu7n&VY}5OjtG0G^Jg47ZYrgQY%L&(y+>(f((X!5VBF zBrhw5jDYu)+Ci2O7g2otZG1c);2twUfT*ujh5!IWyu7s18wv^vIyyQQ78X7}J`oWS zDJdx>B_&NwO%MoVZfLBO+lA^)u?;p45(qlq;cJ$X+`ndrSZ3Yla>v=_CtrZ zHK+DX>+U0u{?j2ZoNarMZ;iNr<=YR{DnX5kspT3~xgHN3pg^C;`J z?DHYs{D8Oe;9}#*V)gDwUVD3cZ*TAD z=;+MMOvlD=Dg+TQHN=E&vN=<4d~+UEAo&d%2H;=%dW+R5d?;nCjN z_3i%p_2%Zu$;t7>_37o|>DA56`N`e&`ThO<(@Z@)bdMjG0{}3w5+bjaoPX?1U0D&Z z0bvg*r%O8>IVE#Zm4?u1seyUkdA_tpI8b(gWd`{k1)KJ%$Bc2SH$p(fh0}@Ai5*q> zoHhNmY+1eu$co?P<~E5h&?Kf~yfRU)8vm$2`#K^2CQ zJK)!6pUFIJQmIQW6+@ez!2BjYPtu^RyCgD5#i-nGxO$HCIxnNCK%9~(qYbAtxoMkK zBD24MF%*3a$SgUMo4)FOI>3~eoV7itMdV5Y<$rhTFzC8RL%on$Q0>bSes<)2`l9`+ z_Y9+48=HwuJ<2Fg#-Xu!UVi&6sF2~)`!iE{1GBGG54Qd4%iFZYDyniZ$gmmBE*{4B zYW?7|sSXUH#0GmkwlmctFW2q;3r0C)&o6I>zO&8)Be`er)5SietfVq%fIz)${Z$l< ztFtC6G&{M)=_GmW%-1{EvU6S=L4l0gU)fVp$8vE%gu%?TZY$t46x>>6qUVsq!=*O5LU2Z()M5h|OX;k>?)S{@`PKTB*a;a6m?^)cn z17H3#j|f*9kB9GcDDy`xksMT>-k$4$Q;{YovnNFz@d_TTa+FTIDs^pYVzbuj$<;u^ zUe~>=$vPD7wmXS(_^H?2KaKE99LTzJ@RebLQ`&8awkGV65GfRfO&HkG&IVlvq` z$%qo7JdOOaU}bfKf`uMn1}eB3|6JZ&QPqHahk_{ms=HzS;>Xuur5{pMopmuw6U6I0 z=E6+5GtpO7;K`axBv>|v+r*p%i$IRAMJ z(&U>FOfhZ2G82%3xq%}0Bx3$s(&BF$j?Ui>6!o5o2>+|j|%eds6- zTRl1{!?;0^RlNI$#zP(8EPC4FROdIRxwLDifkcn%zV)xwVCg-wYxny5`-`*Z-Yz2R3(QJb ztMg}|C{L>uUF&tMH=wMsY5Ftt2ILkVXn2x^e=Au-@-V`q4O_ATwfSBfS65dj#}zyj zEB7UP{iwfJo96NHDmwM(8}!mPysIaYOOo#k=&#MQqnMM%=hw*{#t#%dM$o<(Q=7** zi4^Tcjh3uEJxo_H?_tPaaJf6mrJ{=qx|;rAq@>0cZ6PVTEn=R-zAEy@feEO=uSYXU zIaYkcB!*&~SYxqUJnyg@DMqz`hFcf24pxTC&dcnBtk%aJ9RdpPB8Gc#oGNSL z3D~QYR@^URbUr7#H{3>&AowQE#>KGgq8aLWtev7W(Uws)KgzKhy43#_Z1gn}7WL+U z-)$-BMpkwVwlQnN=_vG5*jTRSD2M}w7bQ194RL^g(2Vn(L43fi*F(xls`EbTi(E3l z*Zq}oQISFa)jRW@OY_d^Du!Z<4y|!S*Ui*G!vFJM56xQ-kbGR~kZJ25ksrM^K;n(8 zNRiMx-`~e#n2%S2#u-Y9cY^PN^QzZKASw;UrdlfCo|2wp`Ur z5qqPMIdNpdGi9#8Ep|(_aOy@$b_pdua*4uI2>BcK3vcp=iC(5H_IUV$Y0N#|1S;_E zDl{{Hx{6X>@oVensCy-u38&wuuvSA~QCg~w-ywYG&XICyn1zvcEP5Usj*NlQxGw{+ zRF?SyKz>WYITJ3l{66j@-{MLB36PXJF&yyi8+uHm7^t2=%vB?V3R6*3hr6YGZ7-pO z-d$rwA)d-tYFrauI~F42**L5_?Ew{*Forfk#utzjovCyGmmNrjE6ZYRegeA(zn(gB z6TD;ZIX~2y{O8|FbQRf`7d|fBLq6@0V&~`OnI0=!^p7#Hyc7f^5b%3u!PETwlAk<- zziR!u;@>-~U*}4X&EGQ#{@nL3t-qoK{|(A7X2I`Ko+{|Sq~)>sS6lwbH26Kv({AyX zTs}7cit~$k@OzXeCcrOYc$|U1qWqbO@Oz{`-dJK{x1oC-1t8dZ2nc$|Ety?)9p_Y!!PlC3_^aDhX1Pf$0XEKGX9saKbj5tm74#n z;veIiKi{Al+MhF4KQ(_B|NNm#?B7R2KT&=T(;vOZlVAQNtAxMt((+O;j|B+;0PXQ( M_~>-hh@Z~>A9vzhp8x;= literal 0 HcmV?d00001 diff --git a/tests/data/example.xls b/tests/data/example.xls new file mode 100644 index 0000000000000000000000000000000000000000..cab9cd96cb4c93657356877442cff3827c6a848f GIT binary patch literal 6656 zcmeHLU2IfE6h8NMx7_l#+d@%6@LFJ7+Jcs1fEZ-kLZL}%8!0g{jk;ZK+m&r=YEAIL zxS%AOK%(J6A4m*IUxV3 zd0^7-E9WqPNelv<^Lc;+lmRST;K%pS(B`^Yga@9MVLYSa%1#+VJR+}Rri%~Dz?fV! z#njB*FN)E>mCl^~DG%JrS1ixwh7z2v`h+02~2Q4pab3fTchs zunc$rco299SPrZJ9tKtdtAIxU7kCt?0#*ZSfVDsbs0L~P_O;fJnO=|d2H-JY*5Joe z9+*qb#sg&zX0bFVUo85DTfXl7=fQ~6feGcsZ;_`rfAegIZMFEzseeb$(Gv1DN?Wv@);1 z!)mAyw3@I3CL|_F$t#7zohn@wSSVvWVRv~$-bgHYPFK#DT+O(!k`3c7EFNdG5FNw@cfYVbDi-+I$yr7fV{qdytaV6>?Zkf8>=cM8OGsMiN@ur zQjIH80gacW=4re%70kS#+&u#RTfC-Wt?nK0ywL_q-tt$kN;V$FdTqcnVL51TC-rD>Nju6W$}$qs^SBeLm6S&-)L{uLA*Ib`8B=!|1+i4&Dom$?7qJaBL> zIN0x!c=Q}(=NBI!`BQ!1nf zHh5;mok{=+Kk$rVuz9eV4KO@y16&9P?1!_sVU(*g!WjPbQyd zNcU+-Z(o;_J}FgbYd<>4=djdU^;WiiHnwCR+IHkM^@Lb&FW||IVDl6=XTITpV{<+= zjY&eVLP8lc;K(!?`H?Rpk8)*`{(vYKV6I94&A%XrTmkLi$B>QM%YddN)cim1$0$4hX8Koqim7)JxZ>yew@z zflN-YWxm#9^IU#U*%k7cmPd26R^kR!5`bYIR_YLN19`h2OQe5_GSuKX&8qhm4eh&@ zhWxCZq9OOoDH?KOm@_owN3Y3|PMjW-Bb|5=X3o%&+l$Fjlb2(Yqb3Q=oS`U}fyq&m z1T#5m@)Dg(Q8$;O=-R9)iaP&LRR7>aVIdn&BA3Gs z+r)iM-_puE|DHsAWccObzEOAQ!MK-nw{BkF9qaXyNlzDhbE2ZAaY<|shAybq8l zV*n{~8lc4sljd89)M)jRiGyzY@W7y#@J9Y!&bk|cC?GAH66}knnMa5MjnDlx@nhmA zi|OX(@ZhgVBY9IOG=AE!&whLM#D*2&}*T+YH52T?eH$U(tfyqyYQa z8it_kl~*wSL45P_-(MU40`}>gP%?^d;bBM$hZ^5LkDFr0Uk}}jev?7j0Sj^MgQtHW cXV;%Cw=sV1CRzj>23t1OG0u0L6GhNacGe4kS>Xl^bUUC zxpF-Be1F`vpJ(sc&$HKW&tAXxeOIU|0Fa3g(9zKm4$A;)2zLw<{`(cs+|Gra?e+{# z7?KBY;)NafM8a#@0UXgj{=itD0C|hiG2IvO0<>_%vzm$%5+nvxPI3QM2a!kEY>(n)T zx?)s#zah+Q-po7L{^cvcPqWRl@RG9ytA)!L?J!tyN>#~@beQ{^A(FJ<$cj#0{1X_H zG&^@QzRHlKhE^_xa+4jq`?;f63^mKfniaK(*_U_H8Y z+)ZXDkJ<(%Dvmy1}O97`h=J=)84}%5Jb*0y#HN z7}QphNb2m<=Q`_gfQr4lrxyY9WN+$7_@K^Z{;X)keFcgo4-RrS_fIr#k9zC~lOG8>f`v;g7p#$Qiga3OUg$*5^*4UisCZYwWmi1vmBOzTqZ0pos54TN zz?7Sih{N4;oTjxTwxTrOPZRfkPAoDnOa2xHvc!3WC=&r5UnCY$4oDeKVFeh`7JCLX z43Ja~ycAzD>%dONDF1LKkmCon=N#Nr(S!A#rE`>-9ODU7uXD0_8PzB zAdB_NIJJW=wFav`xwJi?S{;vHEf5?>qX6JQuYv-8m`@uuLrfG zUfqJI@#)eOIvhYXfPVrJ_Fo`!0eRY)yWB!(Pgg}{mg9cQfrf5t9Q`Ar)KQwAaqAR} z7}|$QLaE(ytXGWHFRgx@TnZ|W1$Q#Ih7p%A&a$_`uDveE*JoW?m(I5Z+;gfy?4!65AFzw~i3>>r0(rP!=o$FkLrT=r#B(XI zVP2Md_mPGqv=K`Y{J1ZM4ue{;(}@DTlU|vyfyl)e`^&hxe$|M?XZ}SXLJ_C5;lBL3 z(K^dpO~Tk99*m~}Jdcy+HoK<))+X5_bjvgZjX*i%(^Z9m=I7$bt1)Pd-~z_|XSlG? z1wJ;S9?u6~kiBV?Y)qbf1>yk4Pf3-q`{Z?P*vBTW3meA^A{ZC}&u814Ve%VQwV}}M zhe_P(%yK{MMY>RyLszIT^0L<`6bY3vF+0=WzO@CH0vJue1jBkRbK`~$2kj^1>sLK{ zV@)QcbR_?qtZ5tzfZX0x4rJee$Dj0 zu#ohc8_sS`7$2U_G+s!CoSNh%kTk(u|>W&3q2mJK>?qP}Ag%+vzNaU7bK z)dmhq``7WZzq|MHM1fq!d!1o{g+I{;=yH=YfZVm(B-W2Rcr;Sc`$Ox)-s{bc2jI3K zZl7T(!Frn056VXfQv0QXU5oQs8Mp~eueco2PV$X1oC8DyQnxkZ_AOgGz27=)-9Z8D>ax9E*l2_*`G zqt_JwpU_M8TLN9I%*{bA?7uFYw?J;uR&tpYz-oc%Sb;Xd$zo#+EM2v|b}$8v%Rl{1FTELK|wgnIjd`YXVd8nHWYA1K7eL z`3o}A*3!j(#dsqi;nwhbo#why^`k=(ht$ht#N*{Ah^p1)d zWRhkMx73`qY3lL7|Ht5Dx-GtEF93Y4^shFbgId3)OC9ij{_Wb(F&Te3d@SxQ*&R$ z5)*cAVFisF|Lj2~J?8L0Vyb(LuUpvXF(>4A8kb}6QpB;3E~%BRcG3r1$REuT=-gYO zU>I6fNxKg%(F;Zt-wDvN(~i2C7KuFRkE@_Vq@Ag2IK2L;6gF`K>4=>?ZP$OjG5(Fr zo=d-r zuoXa^!IW1peOLUjt58a`0(1Zk6Y(9WX z$+kP}p$RZj7aM3(NW41$URrxxmYO@&ZAt z8{|=f51l?xkR$VIVaIpSW_anNlaNi3-tJBn*wN-*)2Aq}HFufJh?~*n_hUG$6Q4xB z`FetevD(iD)ta{DLjc;GE$b=*=IkFNTa2@v?aL{rUqMZJORd)?O}HQIm+{sIr{tyCiK_Qb1J$>e8L=ZwQHxhS|!sZOqkW@U=%tIhGX)%;~7s3q?75 z%jJQ2?0g3In>YC*S1mV#dA{zmK4#i)7b z9#y{VWmYpo$c62?U*&3XjgTyHOQ^s^z)$ZD=UVRA3&q2!1;WIih_~$Dt~fWXhvzNZ zd;bsibN$VJ$5{b9IQ!9`o@hgXyRWs}-ndTK2YkfY24U{TkUWk;hb$)~&96lC^! zq2y`4f=9F1gsipEXE3k=%VGS0#s`!U)Ld4&!m3y#YP7}=2XBsqli0G5Bm#jltZ8`4 z7GwoDm&B_%DqTzDxl;v_MhPoN^`CC`KjDJevfS4&3Sv&o6U5A+YQjZoIXDCvC8j{f4lnwt})U;e@$JTCAMrBwbE#i~0IkIx&i?ZBE z)XQLI?I-;<8oS?z-nM7SDr;JzfS^@zFqMtb?XB!XnXM`pZHQNVK1ZNH5)mrlzwXq> z788<(R4W78*vM38FpvqkdGI_Nn?x1Qe2&<9-)B441;R@mkb$iai#n#Cp&R=sedQWm z?-8L(O~+;+8QiGvoXAhYFG%agi{~C4-V1PLq&c$ov~*-W*iD@_%RN9|vxA?^y!01O zVosIf(gsj3l%Ju(+&z(ioWrE2trCR|A8prrP2)|)dfrT&9=x1v3M0vUsKh@Yery?^ z9R6tV#d{&^{beIhuifxc@spTMs@|ZGgTvJk$U3Cl;c}jUGT+amo~;%o>ajyg!vh*B z9za!#>v(LFx^aq@qT>(lx*3*!waR?U7nC~}XRzw2>Qa{Tk*SkDuW7y>4kzQunH;5M zng>9J$zozCSWy0SD8{q+nSqkP*$J3tYc-~z4d^gyO}Vt1$3LyUd#J!P*956wrXSI! zsH3R1tuW3wHGmx1g5=Xd?#V<%?(0!6xk^Fc*9k~pYOoY&!D6|q(ggw*d>3#6x`aL^1_0H=Rc0E)lz|6lFl z-A3qFtJsn-WYfig1#cDaSPLGU{)#RK~`IcCz*3BH_sK&Dk#g`s7m4+jY^n?acmDG3fY)nFHHXGDnOi z#&^x;&~w(=MXoaG2YZH>zckWmy=rR){{7Ye*d!ADUEf<71I^7eKtO8;%U@-F%$SIB z7Y9-MQI>`-iAlaP)ZO_7iX@GC5>^3alpM8E!4!_C1B&|+>I?sda7Rhy_ za8W_)FT4@?Ld38T5fH-vv0H){qX=e>rm8?kCl_{8N1*wyrdu5f4<%mnG4Y#&h~xND zf=4CDg+(j($zgyTlXAsO$eIS~t+UL)B9Xlw>xpP@3g{iYU#=L-R~OD3Pf{ z>;1k$~(L{yD;?il|1r+{Salt44iMY;6g)%c4C0(iH(&WnMf? zRwtzMsr}$)GuZJdl9qL5kFhZJs$JIm2BD)haJ%0=EYNgr+XfMP5e@3b;vcl%K)r2^ zh4uA?9xVSW!VPnM^TX6@}|wzV4~F1_9V;ZtnF~?tO<2kPl}nO?ugn zJb~U8FnW&ucgs;QZ@oeZjhr*;{{3q}F%|YjHzau$wdece=BcmSl(z*!|CiGnC-`>3 z{t^6mcel&f$?0~tvf@>2;i3RJxFqU0Nf*G@V}dwOlL-;mN$|O0W`jRc--LmaPG*N@ zbN4*?MMRDgPo^9`ycZBxoN3A~FH0v{D$tps^7_svoIYXnXH{GryVwnNE`M159t0G3iI^3`GWKimQ!JNP0GT`EIap167$tuDAmagEu@_ z|9zB12!Fyr!qEX_?f`nF>FH$d^71xT5KSQ!IIHVIs!|Y~<=m zPot$svN(&+PBQaQ36T@_V_x~5?67ZRtE8D zL~U`oCRlg3e&N`1zmnGwzBP*|x7&k=M1=5r2zqx4csm6Bce^tZ{nPR86l>-v8*+|6Jv6=KuFqDB!ig z-~ayZD!=OeKUcY%9dE0I-*zA8*DC*|X83cRyLsU@m;N?UcxZ3K@n>fJbA`K7b(=qa zTQpps{v(_G>3)~Mw|e&5gb41q|E+F+df&y@Ey;hI8T|JDpuDOA3Vg2+5bnW`5V*Jk Ih;Q%y2U1mcY5)KL literal 0 HcmV?d00001 diff --git a/tests/delete-utf8.sh b/tests/delete-utf8.sh new file mode 100644 index 0000000..cdcdfeb --- /dev/null +++ b/tests/delete-utf8.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +DATA + +# ================================= ASSERTION ================================ # + +cat << DATA > "tmp/${t}/${t}.assert" +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --projectName "${t} biểu tượng cảm xúc ⛲" +${cmd} --delete "${t} biểu tượng cảm xúc ⛲" +${cmd} --list | grep "${t}" | cut -d ':' -f 2 > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/delete.sh b/tests/delete.sh new file mode 100644 index 0000000..c1c6872 --- /dev/null +++ b/tests/delete.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +DATA + +# ================================= ASSERTION ================================ # + +cat << DATA > "tmp/${t}/${t}.assert" +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --delete "${t}" +${cmd} --list | grep "${t}" | cut -d ':' -f 2 > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/download.sh b/tests/download.sh new file mode 100644 index 0000000..57eae8a --- /dev/null +++ b/tests/download.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# ================================== ACTION ================================== # + +${cmd} --download "https://git.io/fj5ju" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "data/duplicates-deletion.json" "tmp/${t}/${t}.output" diff --git a/tests/export-csv-utf8.sh b/tests/export-csv-utf8.sh new file mode 100644 index 0000000..76c5c54 --- /dev/null +++ b/tests/export-csv-utf8.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.tsv" +🔣 code meaning +🍇 1F347 GRAPES +🍉 1F349 WATERMELON +🍒 1F352 CHERRIES +🍓 1F353 STRAWBERRY +🍍 1F34D PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +🔣,code,meaning +🍇,1F347,GRAPES +🍉,1F349,WATERMELON +🍒,1F352,CHERRIES +🍓,1F353,STRAWBERRY +🍍,1F34D,PINEAPPLE +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.tsv" +${cmd} --export "${t}" --output "tmp/${t}/${t} biểu tượng cảm xúc 🍉.csv" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t} biểu tượng cảm xúc 🍉.csv" diff --git a/tests/export-csv.sh b/tests/export-csv.sh new file mode 100644 index 0000000..de71fb1 --- /dev/null +++ b/tests/export-csv.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.tsv" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.tsv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.csv" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.csv" diff --git a/tests/export-html-utf8.sh b/tests/export-html-utf8.sh new file mode 100644 index 0000000..a883263 --- /dev/null +++ b/tests/export-html-utf8.sh @@ -0,0 +1,72 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ ${2} ]]; then + majorversion="${2%%.*}" +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================= ASSERTION ================================ # + +if [[ "$majorversion" = 2 ]]; then + cat << "DATA" > "tmp/${t}/${t}.assert" + + +export-html-utf8 + + + + + + + + +
codemeaning
1F347FOUNTAIN
1F349FLAG IN HOLE
1F352SAILBOAT
+ + +DATA +else + cat << "DATA" > "tmp/${t}/${t}.assert" + + +export-html-utf8 + + + + + + + + +
codemeaning
1F347FOUNTAIN
1F349FLAG IN HOLE
1F352SAILBOAT
+ + +DATA +fi + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.html" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.html" diff --git a/tests/export-html.sh b/tests/export-html.sh new file mode 100644 index 0000000..f281f25 --- /dev/null +++ b/tests/export-html.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" + + +export-html + + + + + + + + +
abc
123
000
$\'
+ + +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.html" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.html" diff --git a/tests/export-ods-utf8.sh b/tests/export-ods-utf8.sh new file mode 100644 index 0000000..2ba61e3 --- /dev/null +++ b/tests/export-ods-utf8.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,"FLAG IN HOLE" +⛵,1F352,SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.ods" +(cd tmp/"${t}" && + ssconvert -S "${t}.ods" "${t}.csv" && + mv "${t}.csv.1" "${t}.output") + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/export-ods.sh b/tests/export-ods.sh new file mode 100644 index 0000000..41aee04 --- /dev/null +++ b/tests/export-ods.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi +if [[ -z "$(command -v ssconvert 2> /dev/null)" ]] ; then + echo 1>&2 "ERROR: This test requires ssconvert (gnumeric)" + exit 127 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.ods" +(cd tmp/"${t}" && + ssconvert -S "${t}.ods" "${t}.csv" && + mv "${t}.csv.1" "${t}.output") + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/export-tsv-utf8.sh b/tests/export-tsv-utf8.sh new file mode 100644 index 0000000..bbd80c6 --- /dev/null +++ b/tests/export-tsv-utf8.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +🔣,code,meaning +🍇,1F347,GRAPES +🍉,1F349,WATERMELON +🍒,1F352,CHERRIES +🍓,1F353,STRAWBERRY +🍍,1F34D,PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +🔣 code meaning +🍇 1F347 GRAPES +🍉 1F349 WATERMELON +🍒 1F352 CHERRIES +🍓 1F353 STRAWBERRY +🍍 1F34D PINEAPPLE +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.tsv" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.tsv" diff --git a/tests/export-tsv.sh b/tests/export-tsv.sh new file mode 100644 index 0000000..b783d42 --- /dev/null +++ b/tests/export-tsv.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.tsv" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.tsv" diff --git a/tests/export-utf8.sh b/tests/export-utf8.sh new file mode 100644 index 0000000..16a429c --- /dev/null +++ b/tests/export-utf8.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +🔣,code,meaning +🍇,1F347,GRAPES +🍉,1F349,WATERMELON +🍒,1F352,CHERRIES +🍓,1F353,STRAWBERRY +🍍,1F34D,PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +🔣 code meaning +🍇 1F347 GRAPES +🍉 1F349 WATERMELON +🍒 1F352 CHERRIES +🍓 1F353 STRAWBERRY +🍍 1F34D PINEAPPLE +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/export-xls-utf8.sh b/tests/export-xls-utf8.sh new file mode 100644 index 0000000..bf5532a --- /dev/null +++ b/tests/export-xls-utf8.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.xls" +(cd tmp/"${t}" && + ssconvert -S "${t}.xls" "${t}.csv" && + mv "${t}.csv" "${t}.output") + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/export-xls.sh b/tests/export-xls.sh new file mode 100644 index 0000000..3a9894d --- /dev/null +++ b/tests/export-xls.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.xls" +(cd tmp/"${t}" && + ssconvert -S "${t}.xls" "${t}.csv" && + mv "${t}.csv" "${t}.output") + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/export-xlsx-utf8.sh b/tests/export-xlsx-utf8.sh new file mode 100644 index 0000000..17b8a95 --- /dev/null +++ b/tests/export-xlsx-utf8.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +⌨,code,meaning +⛲,1F347,FOUNTAIN +⛳,1F349,FLAG IN HOLE +⛵,1F352,SAILBOAT +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.xlsx" +(cd tmp/"${t}" && + ssconvert -S "${t}.xlsx" "${t}.csv" && + mv "${t}.csv" "${t}.output") + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" + + diff --git a/tests/export-xlsx.sh b/tests/export-xlsx.sh new file mode 100644 index 0000000..dbe77ec --- /dev/null +++ b/tests/export-xlsx.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --output "tmp/${t}/${t}.xlsx" +(cd tmp/"${t}" && + ssconvert -S "${t}.xlsx" "${t}.csv" && + mv "${t}.csv" "${t}.output") + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/export.sh b/tests/export.sh new file mode 100644 index 0000000..a12194c --- /dev/null +++ b/tests/export.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/format-create-separator.sh b/tests/format-create-separator.sh new file mode 100644 index 0000000..8b5b55d --- /dev/null +++ b/tests/format-create-separator.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.txt" +a;b;c +1;2;3 +0;0;0 +$;\;' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a b c +1 2 3 +0 0 0 +$ \ ' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.txt" --format "csv" --separator ";" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/format-create.sh b/tests/format-create.sh new file mode 100644 index 0000000..5a51546 --- /dev/null +++ b/tests/format-create.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Column 1 +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --format "line-based" +${cmd} --export "${t}" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/format-export-output.sh b/tests/format-export-output.sh new file mode 100644 index 0000000..e859b74 --- /dev/null +++ b/tests/format-export-output.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --format "csv" --output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/format-export.sh b/tests/format-export.sh new file mode 100644 index 0000000..eb7fc57 --- /dev/null +++ b/tests/format-export.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +a,b,c +1,2,3 +0,0,0 +$,\,' +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" --format "csv" > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/help.sh b/tests/help.sh new file mode 100644 index 0000000..e7dda25 --- /dev/null +++ b/tests/help.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Script to provide a command line interface to an OpenRefine server. +DATA + +# ================================== ACTION ================================== # + +${cmd} --help | sed '3q;d' > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/info-utf8.sh b/tests/info-utf8.sh new file mode 100644 index 0000000..033bafb --- /dev/null +++ b/tests/info-utf8.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.tsv" +🔣 code meaning +🍇 1F347 GRAPES +🍉 1F349 WATERMELON +🍒 1F352 CHERRIES +🍓 1F353 STRAWBERRY +🍍 1F34D PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << DATA > "tmp/${t}/${t}.assert" + column 001: 🔣 +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.tsv" +${cmd} --info "${t}" | grep 'column 001' > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/info.sh b/tests/info.sh new file mode 100644 index 0000000..5403260 --- /dev/null +++ b/tests/info.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +DATA + +# ================================= ASSERTION ================================ # + +cat << DATA > "tmp/${t}/${t}.assert" + column 002: b +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --info "${t}" | grep 'column 002' > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/list-utf8.sh b/tests/list-utf8.sh new file mode 100644 index 0000000..e3d0359 --- /dev/null +++ b/tests/list-utf8.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +DATA + +# ================================= ASSERTION ================================ # + +cat << DATA > "tmp/${t}/${t}.assert" + ${t} biểu tượng cảm xúc ⛲ +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" --projectName "${t} biểu tượng cảm xúc ⛲" +${cmd} --list | grep "${t}" | cut -d ':' -f 2 > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/list.sh b/tests/list.sh new file mode 100644 index 0000000..f592ad1 --- /dev/null +++ b/tests/list.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +a,b,c +1,2,3 +DATA + +# ================================= ASSERTION ================================ # + +cat << DATA > "tmp/${t}/${t}.assert" + ${t} +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --list | grep "${t}" | cut -d ':' -f 2 > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/template-facets.sh b/tests/template-facets.sh new file mode 100644 index 0000000..e6e4c80 --- /dev/null +++ b/tests/template-facets.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +email,name,state,gender,purchase +danny.baron@example1.com,Danny Baron,CA,M,TV +melanie.white@example2.edu,Melanie White,NC,F,iPhone +danny.baron@example1.com,D. Baron,CA,M,Winter jacket +ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight +arthur.duff@example4.com,Arthur Duff,OR,M,Dining table +danny.baron@example1.com,Daniel Baron,CA,M,Bike +jean.griffith@example5.org,Jean Griffith,WA,F,Power drill +melanie.white@example2.edu,Melanie White,NC,F,iPad +ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier +arthur.duff@example4.com,Arthur Duff,OR,M,Night table +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "events" : [ + { "name" : "Melanie White", "purchase" : "iPhone" }, + { "name" : "Jean Griffith", "purchase" : "Power drill" }, + { "name" : "Melanie White", "purchase" : "iPad" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "events" : [ +' \ +--template ' { "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--facets '{"type":"list","name":"gender","columnName":"gender","expression":"value","omitBlank":false,"omitError":false,"selection":[{"v":{"v":"F","l":"F"}}],"selectBlank":false,"selectError":false,"invert":false}' \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/template-filterQuery-utf8.sh b/tests/template-filterQuery-utf8.sh new file mode 100644 index 0000000..e8dfef9 --- /dev/null +++ b/tests/template-filterQuery-utf8.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +🔣,code,meaning +🍇,1F347,GRAPES +🍉,1F349,WATERMELON +🍒,1F352,CHERRIES +🍓,1F353,STRAWBERRY +🍍,1F34D,PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "emojis" : [ + { "symbol" : "🍇", "meaning" : "GRAPES" }, + { "symbol" : "🍉", "meaning" : "WATERMELON" }, + { "symbol" : "🍍", "meaning" : "PINEAPPLE" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "emojis" : [ +' \ +--template ' { "symbol" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, "meaning" : {{jsonize(cells["meaning"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--filterQuery '^1F34' \ +--filterColumn 'code' \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/template-filterQuery.sh b/tests/template-filterQuery.sh new file mode 100644 index 0000000..a2bda73 --- /dev/null +++ b/tests/template-filterQuery.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +email,name,state,gender,purchase +danny.baron@example1.com,Danny Baron,CA,M,TV +melanie.white@example2.edu,Melanie White,NC,F,iPhone +danny.baron@example1.com,D. Baron,CA,M,Winter jacket +ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight +arthur.duff@example4.com,Arthur Duff,OR,M,Dining table +danny.baron@example1.com,Daniel Baron,CA,M,Bike +jean.griffith@example5.org,Jean Griffith,WA,F,Power drill +melanie.white@example2.edu,Melanie White,NC,F,iPad +ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier +arthur.duff@example4.com,Arthur Duff,OR,M,Night table +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "events" : [ + { "name" : "Melanie White", "purchase" : "iPhone" }, + { "name" : "Jean Griffith", "purchase" : "Power drill" }, + { "name" : "Melanie White", "purchase" : "iPad" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "events" : [ +' \ +--template ' { "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--filterQuery '^F$' \ +--filterColumn 'gender' \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/template-splitToFiles-mode.sh b/tests/template-splitToFiles-mode.sh new file mode 100644 index 0000000..2f8c02e --- /dev/null +++ b/tests/template-splitToFiles-mode.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +email,name,state,gender,purchase +arthur.duff@example4.com,Arthur Duff,OR,M,Dining table +,Arthur Duff,OR,M,Night table +ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier +ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight +danny.baron@example1.com,Daniel Baron,CA,M,Bike +,Danny Baron,CA,M,TV +,D. Baron,CA,M,Winter jacket +jean.griffith@example5.org,Jean Griffith,WA,F,Power drill +melanie.white@example2.edu,Melanie White,NC,F,iPad +,Melanie White,NC,F,iPhone +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "events" : [ + { "name" : "Melanie White", "purchase" : "iPad" } { "name" : "Melanie White", "purchase" : "iPhone" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "events" : [ +' \ +--template ' { "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--mode "record-based" \ +--splitToFiles true \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +ls "tmp/${t}" +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}_6.output" diff --git a/tests/template-splitToFiles-suffixById-utf8.sh b/tests/template-splitToFiles-suffixById-utf8.sh new file mode 100644 index 0000000..94a2d03 --- /dev/null +++ b/tests/template-splitToFiles-suffixById-utf8.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +🔣,code,meaning +🍇,1F347,GRAPES +🍉,1F349,WATERMELON +🍒,1F352,CHERRIES +🍓,1F353,STRAWBERRY +🍍,1F34D,PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "emojis" : [ + { "symbol" : "🍍", "meaning" : "PINEAPPLE" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "emojis" : [ +' \ +--template ' { "symbol" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, "meaning" : {{jsonize(cells["meaning"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--splitToFiles true \ +--suffixById true \ +--output "tmp/${t}/trái cây.json" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/trái cây_🍍.json" diff --git a/tests/template-splitToFiles-suffixById.sh b/tests/template-splitToFiles-suffixById.sh new file mode 100644 index 0000000..4009205 --- /dev/null +++ b/tests/template-splitToFiles-suffixById.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +email,name,state,gender,purchase +danny.baron@example1.com,Danny Baron,CA,M,TV +melanie.white@example2.edu,Melanie White,NC,F,iPhone +danny.baron@example1.com,D. Baron,CA,M,Winter jacket +ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight +arthur.duff@example4.com,Arthur Duff,OR,M,Dining table +danny.baron@example1.com,Daniel Baron,CA,M,Bike +jean.griffith@example5.org,Jean Griffith,WA,F,Power drill +melanie.white@example2.edu,Melanie White,NC,F,iPad +ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier +arthur.duff@example4.com,Arthur Duff,OR,M,Night table +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "events" : [ + { "name" : "Arthur Duff", "purchase" : "Night table" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "events" : [ +' \ +--template ' { "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--splitToFiles true \ +--suffixById true \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +ls "tmp/${t}" +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}_arthur.duff@example4.com.output" diff --git a/tests/template-splitToFiles-utf8.sh b/tests/template-splitToFiles-utf8.sh new file mode 100644 index 0000000..fa36abf --- /dev/null +++ b/tests/template-splitToFiles-utf8.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +🔣,code,meaning +🍇,1F347,GRAPES +🍉,1F349,WATERMELON +🍒,1F352,CHERRIES +🍓,1F353,STRAWBERRY +🍍,1F34D,PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "emojis" : [ + { "symbol" : "🍍", "meaning" : "PINEAPPLE" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "emojis" : [ +' \ +--template ' { "symbol" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, "meaning" : {{jsonize(cells["meaning"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--splitToFiles true \ +--output "tmp/${t}/trái cây.json" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/trái cây_5.json" diff --git a/tests/template-splitToFiles.sh b/tests/template-splitToFiles.sh new file mode 100644 index 0000000..4cb2976 --- /dev/null +++ b/tests/template-splitToFiles.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +email,name,state,gender,purchase +danny.baron@example1.com,Danny Baron,CA,M,TV +melanie.white@example2.edu,Melanie White,NC,F,iPhone +danny.baron@example1.com,D. Baron,CA,M,Winter jacket +ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight +arthur.duff@example4.com,Arthur Duff,OR,M,Dining table +danny.baron@example1.com,Daniel Baron,CA,M,Bike +jean.griffith@example5.org,Jean Griffith,WA,F,Power drill +melanie.white@example2.edu,Melanie White,NC,F,iPad +ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier +arthur.duff@example4.com,Arthur Duff,OR,M,Night table +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "events" : [ + { "name" : "Arthur Duff", "purchase" : "Night table" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "events" : [ +' \ +--template ' { "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--splitToFiles true \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +ls "tmp/${t}" +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}_10.output" diff --git a/tests/template-utf8.sh b/tests/template-utf8.sh new file mode 100644 index 0000000..851f8e2 --- /dev/null +++ b/tests/template-utf8.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +🔣,code,meaning +🍇,1F347,GRAPES +🍉,1F349,WATERMELON +🍒,1F352,CHERRIES +🍓,1F353,STRAWBERRY +🍍,1F34D,PINEAPPLE +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "emojis" : [ + { "symbol" : "🍇", "meaning" : "GRAPES" }, + { "symbol" : "🍉", "meaning" : "WATERMELON" }, + { "symbol" : "🍒", "meaning" : "CHERRIES" }, + { "symbol" : "🍓", "meaning" : "STRAWBERRY" }, + { "symbol" : "🍍", "meaning" : "PINEAPPLE" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "emojis" : [ +' \ +--template ' { "symbol" : {{jsonize(with(row.columnNames[0],cn,cells[cn].value))}}, "meaning" : {{jsonize(cells["meaning"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/template.sh b/tests/template.sh new file mode 100644 index 0000000..ef31b97 --- /dev/null +++ b/tests/template.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# =================================== DATA =================================== # + +cat << "DATA" > "tmp/${t}/${t}.csv" +email,name,state,gender,purchase +danny.baron@example1.com,Danny Baron,CA,M,TV +melanie.white@example2.edu,Melanie White,NC,F,iPhone +danny.baron@example1.com,D. Baron,CA,M,Winter jacket +ben.tyler@example3.org,Ben Tyler,NV,M,Flashlight +arthur.duff@example4.com,Arthur Duff,OR,M,Dining table +danny.baron@example1.com,Daniel Baron,CA,M,Bike +jean.griffith@example5.org,Jean Griffith,WA,F,Power drill +melanie.white@example2.edu,Melanie White,NC,F,iPad +ben.morisson@example6.org,Ben Morisson,FL,M,Amplifier +arthur.duff@example4.com,Arthur Duff,OR,M,Night table +DATA + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +{ "events" : [ + { "name" : "Danny Baron", "purchase" : "TV" }, + { "name" : "Melanie White", "purchase" : "iPhone" }, + { "name" : "D. Baron", "purchase" : "Winter jacket" }, + { "name" : "Ben Tyler", "purchase" : "Flashlight" }, + { "name" : "Arthur Duff", "purchase" : "Dining table" }, + { "name" : "Daniel Baron", "purchase" : "Bike" }, + { "name" : "Jean Griffith", "purchase" : "Power drill" }, + { "name" : "Melanie White", "purchase" : "iPad" }, + { "name" : "Ben Morisson", "purchase" : "Amplifier" }, + { "name" : "Arthur Duff", "purchase" : "Night table" } +] } +DATA + +# ================================== ACTION ================================== # + +${cmd} --create "tmp/${t}/${t}.csv" +${cmd} --export "${t}" \ +--prefix '{ "events" : [ +' \ +--template ' { "name" : {{jsonize(cells["name"].value)}}, "purchase" : {{jsonize(cells["purchase"].value)}} }' \ +--rowSeparator ', +' \ +--suffix ' +] } +' \ +--output "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output" diff --git a/tests/usage.sh b/tests/usage.sh new file mode 100644 index 0000000..1a7e0d4 --- /dev/null +++ b/tests/usage.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# =============================== ENVIRONMENT ================================ # + +if [[ ${1} ]]; then + cmd="${1}" +else + echo 1>&2 "execute tests-cli.sh to run all tests"; exit 1 +fi + +t="$(basename "${BASH_SOURCE[0]}" .sh)" +cd "${BASH_SOURCE%/*}/" || exit 1 +mkdir -p "tmp/${t}" + +# ================================= ASSERTION ================================ # + +cat << "DATA" > "tmp/${t}/${t}.assert" +Usage: +DATA + +# ================================== ACTION ================================== # + +${cmd} | head -n 1 | cut -c 1-6 > "tmp/${t}/${t}.output" + +# =================================== TEST =================================== # + +diff -u "tmp/${t}/${t}.assert" "tmp/${t}/${t}.output"