2020-06-29 22:08:09 +02:00
|
|
|
#!/bin/bash
|
2020-07-04 00:20:08 +02:00
|
|
|
# openrefine-bash-curl.sh, Felix Lohmeier, v0.4, 2020-07-04
|
2020-06-29 22:08:09 +02:00
|
|
|
# How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
|
2020-06-29 22:08:40 +02:00
|
|
|
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
2020-06-29 22:08:09 +02:00
|
|
|
# tested on Linux (Fedora 33), needs to be adapted to work on macOS
|
2020-07-03 01:38:11 +02:00
|
|
|
# TODO: example for engine config (facets)
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# make script executable from another directory
|
|
|
|
cd "$(dirname "${0}")" || exit 1
|
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ================================== CONFIG ================================== #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# config
|
|
|
|
port="3333"
|
|
|
|
endpoint="http://localhost:${port}"
|
|
|
|
memory="1400M"
|
|
|
|
date="$(date +%Y%m%d_%H%M%S)"
|
|
|
|
workspace="${date}"
|
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# =============================== REQUIREMENTS =============================== #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# check requirement java
|
2020-07-03 01:38:11 +02:00
|
|
|
java="$(command -v java 2> /dev/null)"
|
|
|
|
if [[ -z "${java}" ]] ; then
|
2020-06-29 22:08:09 +02:00
|
|
|
echo 1>&2 "ERROR: OpenRefine requires JAVA runtime environment (jre)" \
|
|
|
|
"https://openjdk.java.net/install/"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# check requirement cURL
|
2020-07-03 01:38:11 +02:00
|
|
|
curl="$(command -v curl 2> /dev/null)"
|
|
|
|
if [[ -z "${curl}" ]] ; then
|
2020-06-29 22:08:09 +02:00
|
|
|
echo 1>&2 "ERROR: This shell script requires cURL" \
|
|
|
|
"https://curl.haxx.se/download.html"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# install jq 1.4 (faster startup time than 1.5 and 1.6) in this directory
|
|
|
|
if [[ ! -f "jq" ]]; then
|
|
|
|
echo "Download jq..."
|
|
|
|
curl -L --output "jq" \
|
|
|
|
"https://github.com/stedolan/jq/releases/download/jq-1.4/jq-linux-x86_64"
|
|
|
|
chmod +x "jq"
|
|
|
|
echo
|
|
|
|
fi
|
|
|
|
jq="$(readlink -f jq)"
|
|
|
|
|
|
|
|
# install OpenRefine 3.3 in subdirectory openrefine
|
|
|
|
openrefine_url="https://github.com/OpenRefine/OpenRefine/releases/download/3.3/openrefine-linux-3.3.tar.gz"
|
|
|
|
if [[ ! -d "openrefine" ]]; then
|
|
|
|
echo "Download OpenRefine..."
|
|
|
|
mkdir -p "openrefine"
|
|
|
|
curl -L --output "$(basename ${openrefine_url})" "${openrefine_url}"
|
|
|
|
echo "Install OpenRefine in subdirectory openrefine..."
|
|
|
|
tar -xzf "$(basename ${openrefine_url})" -C openrefine --strip 1 --totals
|
|
|
|
rm -f "$(basename ${openrefine_url})"
|
|
|
|
# do not try to open OpenRefine in browser
|
|
|
|
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' \
|
|
|
|
openrefine/refine.ini
|
|
|
|
# set autosave period from 5 minutes to 25 hours
|
|
|
|
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1500/' \
|
|
|
|
openrefine/refine.ini
|
|
|
|
# set min java heap space to allocated memory
|
|
|
|
sed -i 's/-Xms$REFINE_MIN_MEMORY/-Xms$REFINE_MEMORY/' \
|
|
|
|
openrefine/refine
|
|
|
|
echo
|
|
|
|
fi
|
|
|
|
openrefine="$(readlink -f openrefine/refine)"
|
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# =============================== ENVIRONMENT ================================ #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 21:57:02 +02:00
|
|
|
# start OpenRefine
|
2020-07-03 01:38:11 +02:00
|
|
|
function start() {
|
|
|
|
${openrefine} -v warn -m "${memory}" -p "${port}" -d "${workspace}" &
|
|
|
|
pid_server=${!}
|
|
|
|
timeout 30s bash -c "until curl -s \"${endpoint}\" \
|
|
|
|
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|
|
|
|
|| { echo 1>&2 "ERROR: starting OpenRefine server failed!"; stop; exit 1; }
|
|
|
|
}
|
|
|
|
|
2020-07-03 21:57:02 +02:00
|
|
|
# stop OpenRefine
|
2020-07-03 01:38:11 +02:00
|
|
|
function stop() {
|
|
|
|
echo
|
|
|
|
# print system resources
|
|
|
|
ps -o start,etime,%mem,%cpu,rss -p "${pid_server}"
|
|
|
|
echo
|
2020-06-29 22:08:09 +02:00
|
|
|
# SIGKILL (kill -9) prevents saving OpenRefine projects
|
|
|
|
{ kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null
|
2020-07-03 01:38:11 +02:00
|
|
|
# grep log for server exceptions
|
2020-07-03 21:57:02 +02:00
|
|
|
echo "check log for any warnings..."
|
|
|
|
if grep -i 'exception\|error' "${workspace}/${date}.log"; then
|
|
|
|
exit 1
|
|
|
|
else
|
|
|
|
log "no warnings, all good!"
|
|
|
|
fi
|
2020-07-03 01:38:11 +02:00
|
|
|
}
|
2020-07-03 21:57:02 +02:00
|
|
|
|
|
|
|
# cleanup handler
|
2020-07-04 00:20:08 +02:00
|
|
|
trap "stop;exit 1" HUP INT QUIT TERM
|
2020-07-03 01:38:11 +02:00
|
|
|
|
2020-07-03 21:57:02 +02:00
|
|
|
# get csrf token (introduced in OpenRefine 3.3)
|
2020-07-03 01:38:11 +02:00
|
|
|
function csrf() {
|
|
|
|
response=$(curl -fsS "${endpoint}/command/core/get-csrf-token")
|
|
|
|
if [[ "${response}" != '{"token":"'* ]]; then
|
2020-07-03 21:57:02 +02:00
|
|
|
echo 1>&2 "ERROR: getting CSRF token failed!"; return 1
|
2020-07-03 01:38:11 +02:00
|
|
|
else
|
|
|
|
echo "$response" | cut -d \" -f 4
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2020-07-03 21:57:02 +02:00
|
|
|
# check and store project ids from import in associative array p
|
2020-07-04 00:20:08 +02:00
|
|
|
declare -A ids
|
2020-07-03 21:57:02 +02:00
|
|
|
function store() {
|
|
|
|
if [[ $# -eq 2 ]]; then
|
2020-07-04 00:20:08 +02:00
|
|
|
ids[$1]=$(cut -d '=' -f 2 "$2")
|
2020-07-03 21:57:02 +02:00
|
|
|
else
|
|
|
|
echo 1>&2 "ERROR: invalid arguments supplied to import function"; return 1
|
|
|
|
fi
|
2020-07-04 00:20:08 +02:00
|
|
|
if [[ "${#ids[$1]}" != 13 ]]; then
|
2020-07-03 21:57:02 +02:00
|
|
|
echo 1>&2 "ERROR: returned project id is not valid"; return 1
|
2020-07-03 01:38:11 +02:00
|
|
|
else
|
2020-07-03 21:57:02 +02:00
|
|
|
rm "$2"
|
2020-07-03 01:38:11 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
}
|
|
|
|
|
2020-07-03 21:57:02 +02:00
|
|
|
# create directories
|
2020-06-29 22:08:09 +02:00
|
|
|
mkdir -p "${workspace}"
|
|
|
|
|
2020-07-03 21:57:02 +02:00
|
|
|
# logging
|
2020-06-29 22:08:09 +02:00
|
|
|
exec &> >(tee -a "${workspace}/${date}.log")
|
2020-07-03 21:57:02 +02:00
|
|
|
function log() {
|
|
|
|
echo "$(date +%H:%M:%S.%3N) [ client] $1"
|
|
|
|
}
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ======================= TEMPLATES FOR YOUR WORKFLOW ======================== #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ------------------------------- START SERVER ------------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
echo "start OpenRefine server..."
|
|
|
|
start
|
|
|
|
echo
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- IMPORT OPTION 1 ------------------------------ #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# create project from heredoc
|
2020-07-04 00:20:08 +02:00
|
|
|
# project id will be accessible as ${ids[example1]}
|
|
|
|
p="example1"
|
2020-07-03 21:57:02 +02:00
|
|
|
input="example1.csv"
|
|
|
|
filename="${input##*/})"
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "import ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --write-out "%{redirect_url}\n" \
|
|
|
|
--form project-file="@-;filename=${input}" \
|
2020-07-04 00:20:08 +02:00
|
|
|
--form project-name="${p}" \
|
2020-06-29 22:08:09 +02:00
|
|
|
--form format="text/line-based/*sv" \
|
2020-07-03 01:38:11 +02:00
|
|
|
--form options='{"separator": " "}' \
|
|
|
|
"${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)" \
|
2020-07-03 21:57:02 +02:00
|
|
|
> "${workspace}/${filename}.id" \
|
2020-06-29 22:08:09 +02:00
|
|
|
<< "DATA"
|
2020-07-03 01:38:11 +02:00
|
|
|
a b c
|
|
|
|
1 2 3
|
|
|
|
0 0 0
|
|
|
|
$ \ '
|
2020-06-29 22:08:09 +02:00
|
|
|
DATA
|
2020-07-03 21:57:02 +02:00
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
store "${p}" "${workspace}/${filename}.id" \
|
2020-07-03 21:57:02 +02:00
|
|
|
|| { echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1; } \
|
2020-07-04 00:20:08 +02:00
|
|
|
&& log "imported ${input} as ${p} (${ids[$p]})"; echo
|
2020-07-03 21:57:02 +02:00
|
|
|
else
|
|
|
|
echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1
|
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- IMPORT OPTION 2 ------------------------------ #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# mockup test data
|
2020-06-29 22:08:09 +02:00
|
|
|
cat << DATA > "${workspace}/test.csv"
|
|
|
|
z,x,y
|
|
|
|
3,2,1
|
|
|
|
0,0,0
|
|
|
|
DATA
|
2020-07-03 01:38:11 +02:00
|
|
|
|
|
|
|
# create project from file
|
2020-07-04 00:20:08 +02:00
|
|
|
# project id will be accessible as ${ids[example2]}
|
|
|
|
p="example2"
|
2020-07-03 21:57:02 +02:00
|
|
|
input="${workspace}/test.csv"
|
|
|
|
filename="${input##*/})"
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "import ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --write-out "%{redirect_url}\n" \
|
|
|
|
--form project-file="@${input}" \
|
2020-07-04 00:20:08 +02:00
|
|
|
--form project-name="${p}" \
|
2020-06-29 22:08:09 +02:00
|
|
|
--form format="text/line-based/*sv" \
|
2020-07-04 00:20:08 +02:00
|
|
|
--form options='{"separator": "\t"}' \
|
2020-07-03 21:57:02 +02:00
|
|
|
"${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)" \
|
|
|
|
> "${workspace}/${filename}.id"
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
store "${p}" "${workspace}/${filename}.id" \
|
2020-07-03 21:57:02 +02:00
|
|
|
|| { echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1; } \
|
2020-07-04 00:20:08 +02:00
|
|
|
&& log "imported ${input} as ${p} (${ids[$p]})"; echo
|
2020-07-03 21:57:02 +02:00
|
|
|
else
|
|
|
|
echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1
|
|
|
|
fi
|
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- IMPORT OPTION 3 ------------------------------ #
|
2020-07-03 21:57:02 +02:00
|
|
|
|
|
|
|
# mockup test data
|
|
|
|
cat << DATA > "${workspace}/test2.csv"
|
|
|
|
r,s,t
|
|
|
|
1,1,1
|
|
|
|
2,2,2
|
|
|
|
DATA
|
|
|
|
|
|
|
|
# create projects from files (in parallel)
|
2020-07-04 00:20:08 +02:00
|
|
|
# project ids will be accessible as ${ids[test]} and ${ids[test2]}
|
2020-07-03 21:57:02 +02:00
|
|
|
inputs=( "${workspace}/test.csv" "${workspace}/test2.csv" )
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "import files" "${inputs[@]}" "..."
|
2020-07-03 21:57:02 +02:00
|
|
|
pid=()
|
|
|
|
for i in "${!inputs[@]}"; do
|
|
|
|
filename="${inputs[$i]##*/}"
|
2020-07-04 00:20:08 +02:00
|
|
|
p="${filename%%.*}"
|
2020-07-03 21:57:02 +02:00
|
|
|
curl -fsS --write-out "%{redirect_url}\n" \
|
|
|
|
--form project-file="@${inputs[$i]}" \
|
2020-07-04 00:20:08 +02:00
|
|
|
--form project-name="${p}" \
|
2020-07-03 21:57:02 +02:00
|
|
|
--form format="text/line-based/*sv" \
|
|
|
|
--form options='{"separator": ","}' \
|
|
|
|
"${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)" \
|
|
|
|
> "${workspace}/${filename}.id" &
|
|
|
|
pid+=("$!")
|
|
|
|
done
|
|
|
|
for i in "${!inputs[@]}"; do
|
|
|
|
filename="${inputs[$i]##*/}"
|
2020-07-04 00:20:08 +02:00
|
|
|
p="${filename%%.*}"
|
2020-07-03 21:57:02 +02:00
|
|
|
wait "${pid[$i]}"
|
|
|
|
if [[ $(wait "${pid[$i]}") -eq 0 ]]; then
|
2020-07-04 00:20:08 +02:00
|
|
|
store "${p}" "${workspace}/${filename}.id" \
|
2020-07-03 21:57:02 +02:00
|
|
|
|| { echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1; } \
|
2020-07-04 00:20:08 +02:00
|
|
|
&& log "imported ${inputs[$i]} as ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: import of ${inputs[$i]} failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
|
|
|
done
|
2020-06-29 22:08:09 +02:00
|
|
|
echo
|
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ---------------------------- TRANSFORM OPTION 1 ---------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# mockup test data
|
|
|
|
cat << DATA > "${workspace}/test.json"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"newColumnName": "test",
|
|
|
|
"columnInsertIndex": 2,
|
|
|
|
"baseColumnName": "b",
|
|
|
|
"expression": "grel:value.replace('2','FILE')",
|
|
|
|
"onError": "set-to-blank"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
DATA
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# apply operation from file
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
2020-07-03 21:57:02 +02:00
|
|
|
input="${workspace}/test.json"
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "add column test to ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 21:57:02 +02:00
|
|
|
--data-urlencode operations@"${input}" \
|
|
|
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "transformed ${p} (${ids[$p]}) with ${input}"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) with ${input} failed!"
|
2020-07-03 21:57:02 +02:00
|
|
|
stop; exit 1
|
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ---------------------------- TRANSFORM OPTION 2 ---------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# apply operation from quoted heredoc
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
echo "add column test2 to ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-06-29 22:08:09 +02:00
|
|
|
--data-urlencode "operations@-" \
|
2020-07-03 21:57:02 +02:00
|
|
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
2020-06-29 22:08:09 +02:00
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
2020-07-03 01:38:11 +02:00
|
|
|
"newColumnName": "test2",
|
2020-06-29 22:08:09 +02:00
|
|
|
"columnInsertIndex": 2,
|
|
|
|
"baseColumnName": "b",
|
|
|
|
"expression": "grel:value.replace('2','FOO')",
|
|
|
|
"onError": "set-to-blank"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
2020-07-03 21:57:02 +02:00
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "transformed ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ---------------------------- TRANSFORM OPTION 3 ---------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# apply operation from unquoted heredoc (allows using bash variables)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
2020-07-03 01:38:11 +02:00
|
|
|
new_column="test3"
|
2020-06-29 22:08:09 +02:00
|
|
|
base_column="b"
|
|
|
|
replace_value="BAR"
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "add column ${new_column} to ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-06-29 22:08:09 +02:00
|
|
|
--data-urlencode "operations@-" \
|
2020-07-03 21:57:02 +02:00
|
|
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null \
|
|
|
|
<< JSON
|
2020-06-29 22:08:09 +02:00
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"newColumnName": "${new_column}",
|
|
|
|
"columnInsertIndex": 3,
|
|
|
|
"baseColumnName": "${base_column}",
|
|
|
|
"expression": "grel:value.replace('2','${replace_value}')",
|
|
|
|
"onError": "set-to-blank"
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
2020-07-03 21:57:02 +02:00
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "transformed ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ---------------------------- TRANSFORM OPTION 4 ---------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# apply operation from unquoted heredoc with multi-line expression (requires jq)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
2020-06-29 22:08:09 +02:00
|
|
|
replace_value="!"
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "add column test4 to ${p}..."
|
2020-07-03 01:38:11 +02:00
|
|
|
read -r -d '' expression << EXPRESSION
|
|
|
|
grel:value.replace(
|
|
|
|
'2',
|
|
|
|
'${replace_value}'
|
|
|
|
)
|
|
|
|
EXPRESSION
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-06-29 22:08:09 +02:00
|
|
|
--data-urlencode "operations@-" \
|
2020-07-03 21:57:02 +02:00
|
|
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null \
|
|
|
|
<< JSON
|
2020-07-03 01:38:11 +02:00
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"newColumnName": "test4",
|
|
|
|
"columnInsertIndex": 4,
|
|
|
|
"baseColumnName": "b",
|
|
|
|
"expression": $(echo "${expression}" | ${jq} -s -R '.'),
|
|
|
|
"onError": "set-to-blank"
|
|
|
|
}
|
|
|
|
]
|
2020-06-29 22:08:09 +02:00
|
|
|
JSON
|
2020-07-03 21:57:02 +02:00
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "transformed ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ---------------------------- TRANSFORM OPTION 5 ---------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# apply multiple operations generated on-the-fly (requires jq)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
2020-07-03 01:38:11 +02:00
|
|
|
columns=( "test" "test2" "test3" )
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "delete columns" "${columns[@]}" "in ${p}..."
|
2020-06-29 22:08:09 +02:00
|
|
|
payload=()
|
|
|
|
for column in "${columns[@]}"; do
|
2020-07-03 01:38:11 +02:00
|
|
|
payload+=( "$(cat << JSON
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-removal",
|
|
|
|
"columnName": "${column}"
|
|
|
|
}
|
|
|
|
]
|
2020-06-29 22:08:09 +02:00
|
|
|
JSON
|
|
|
|
)" )
|
|
|
|
done
|
2020-07-03 21:57:02 +02:00
|
|
|
if echo "${payload[@]}" | "${jq}" -s add | curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-06-29 22:08:09 +02:00
|
|
|
--data-urlencode operations@- \
|
2020-07-03 21:57:02 +02:00
|
|
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "transformed ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- EXPORT OPTION 1 ------------------------------ #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
|
|
|
# export to stdout
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
echo "export ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-06-29 22:08:09 +02:00
|
|
|
--data format="tsv" \
|
2020-07-03 01:38:11 +02:00
|
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
2020-07-03 21:57:02 +02:00
|
|
|
"${endpoint}/command/core/export-rows"
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
#log "printed export of ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- EXPORT OPTION 2 ------------------------------ #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# export to file
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
output="${workspace}/${p}.csv"
|
|
|
|
echo "export ${p} to file..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
--data format="csv" \
|
|
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
2020-06-29 22:08:09 +02:00
|
|
|
"${endpoint}/command/core/export-rows" \
|
2020-07-03 21:57:02 +02:00
|
|
|
> "${output}"
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "${p} (${ids[$p]}) saved to file ${output}"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- EXPORT OPTION 3 ------------------------------ #
|
2020-07-03 01:38:11 +02:00
|
|
|
|
|
|
|
# templating export to stdout
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example2"
|
|
|
|
echo "export ${p} using template..."
|
2020-07-03 01:38:11 +02:00
|
|
|
IFS= read -r -d '' template << TEMPLATE
|
|
|
|
{
|
|
|
|
"z": {{cells['z'].value.jsonize()}},
|
|
|
|
"y": {{cells['y'].value.jsonize()}}
|
|
|
|
}
|
|
|
|
TEMPLATE
|
2020-07-03 21:57:02 +02:00
|
|
|
if echo "${template}" | head -c -2 | curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
--data format="template" \
|
|
|
|
--data prefix="[
|
|
|
|
" \
|
|
|
|
--data suffix="
|
|
|
|
]" \
|
|
|
|
--data separator=",
|
|
|
|
" \
|
|
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
|
|
--data-urlencode template@- \
|
2020-07-03 21:57:02 +02:00
|
|
|
"${endpoint}/command/core/export-rows"
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
echo
|
|
|
|
#log "printed export of ${p} (${ids[$p]})"
|
|
|
|
echo
|
2020-07-03 21:57:02 +02:00
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-07-03 01:38:11 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- EXPORT OPTION 4 ------------------------------ #
|
2020-07-03 01:38:11 +02:00
|
|
|
|
|
|
|
# templating export to file
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example2"
|
|
|
|
output="${workspace}/${p}.json"
|
|
|
|
echo "export ${p} to file using template..."
|
2020-07-03 01:38:11 +02:00
|
|
|
IFS= read -r -d '' template << TEMPLATE
|
|
|
|
{
|
|
|
|
"z": {{cells['z'].value.jsonize()}},
|
|
|
|
"y": {{cells['y'].value.jsonize()}}
|
|
|
|
}
|
|
|
|
TEMPLATE
|
2020-07-03 21:57:02 +02:00
|
|
|
if echo "${template}" | head -c -2 | curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
--data format="template" \
|
|
|
|
--data prefix="[
|
|
|
|
" \
|
|
|
|
--data suffix="
|
|
|
|
]" \
|
|
|
|
--data separator=",
|
|
|
|
" \
|
|
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
|
|
--data-urlencode template@- \
|
|
|
|
"${endpoint}/command/core/export-rows" \
|
2020-07-03 21:57:02 +02:00
|
|
|
> "${output}"
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "${p} (${ids[$p]}) saved to ${output}"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-07-03 01:38:11 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ----------------------------- EXPORT OPTION 5 ------------------------------ #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 21:57:02 +02:00
|
|
|
# export projects to files (in parallel)
|
2020-07-04 00:20:08 +02:00
|
|
|
ps=( "example1" "example2" )
|
2020-07-03 01:38:11 +02:00
|
|
|
format="tsv"
|
2020-07-04 00:20:08 +02:00
|
|
|
echo "export" "${ps[@]}" "to files..."
|
2020-06-29 22:08:09 +02:00
|
|
|
pid=()
|
2020-07-04 00:20:08 +02:00
|
|
|
for p in "${ps[@]}"; do
|
2020-06-29 22:08:09 +02:00
|
|
|
curl -fs \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
--data format="${format}" \
|
|
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
2020-06-29 22:08:09 +02:00
|
|
|
"${endpoint}/command/core/export-rows" \
|
2020-07-04 00:20:08 +02:00
|
|
|
> "${workspace}/${p}.${format}" &
|
2020-06-29 22:08:09 +02:00
|
|
|
pid+=("$!")
|
|
|
|
done
|
2020-07-04 00:20:08 +02:00
|
|
|
for i in "${!ps[@]}"; do
|
|
|
|
p="${ps[$i]}"
|
2020-07-03 21:57:02 +02:00
|
|
|
if [[ $(wait "${pid[$i]}") -eq 0 ]]; then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "${p} (${ids[$p]}) saved to ${workspace}/${p}.${format}"
|
2020-07-03 21:57:02 +02:00
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
done
|
|
|
|
echo
|
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ------------------------------ LIST PROJECTS ------------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# print id and name for each project (requires jq)
|
|
|
|
echo "list projects..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --get \
|
2020-07-03 01:38:11 +02:00
|
|
|
"${endpoint}/command/core/get-all-project-metadata" \
|
2020-07-03 21:57:02 +02:00
|
|
|
| "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"'
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
#log "printed list of projects"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
|
|
|
echo 1>&2 "ERROR: list projects failed!"; stop; exit 1
|
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ------------------------------- GET METADATA ------------------------------- #
|
2020-07-03 01:38:11 +02:00
|
|
|
|
|
|
|
# print metadata (requires jq)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
echo "metadata for ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --get \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
"${endpoint}/command/core/get-project-metadata" \
|
2020-07-04 00:20:08 +02:00
|
|
|
| "${jq}" "{ id: ${ids[$p]} } + ."
|
2020-07-03 21:57:02 +02:00
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
#log "printed metadata of ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: getting metadata of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ------------------------------ GET ROW COUNT ------------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# print total number of rows (requires jq)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
echo "total number of rows in ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --get \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
"${endpoint}/command/core/get-rows" \
|
2020-07-03 21:57:02 +02:00
|
|
|
| "${jq}" -r '.total'
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
#log "printed row count of ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: getting rowcount of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ------------------------------- GET COLUMNS -------------------------------- #
|
2020-07-03 01:38:11 +02:00
|
|
|
|
|
|
|
# print columns (requires jq)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
echo "column names of ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --get \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
"${endpoint}/command/core/get-models" \
|
2020-07-03 21:57:02 +02:00
|
|
|
| "${jq}" -r '.columnModel | .columns[] | .name'
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
#log "printed column names of ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: getting columns of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# -------------------------- GET OPERATIONS HISTORY -------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# save operations history to file (requires jq)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
output="${workspace}/${p}_history.json"
|
|
|
|
echo "history of operations for ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --get \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
"${endpoint}/command/core/get-operations" \
|
|
|
|
| "${jq}" '[ .entries[] | .operation ]' \
|
2020-07-03 21:57:02 +02:00
|
|
|
> "${output}"
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "ops history of ${p} (${ids[$p]}) saved to ${output}"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: getting ops history of ${p} (${ids[$p]}) failed!"
|
|
|
|
stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ---------------------------- GET IMPORT HISTORY ---------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# print import options history (requires jq)
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example2"
|
|
|
|
echo "history of import for ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS --get \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
2020-07-03 01:38:11 +02:00
|
|
|
"${endpoint}/command/core/get-project-metadata" \
|
2020-07-03 21:57:02 +02:00
|
|
|
| "${jq}" ".importOptionMetadata[0]"
|
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
#log "printed import history of ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: getting imp history of ${p} (${ids[$p]}) failed!"
|
|
|
|
stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ---------------------------------- DELETE ---------------------------------- #
|
2020-06-29 22:08:09 +02:00
|
|
|
|
2020-07-03 01:38:11 +02:00
|
|
|
# delete project
|
2020-07-04 00:20:08 +02:00
|
|
|
p="example1"
|
|
|
|
echo "delete project ${p}..."
|
2020-07-03 21:57:02 +02:00
|
|
|
if curl -fsS \
|
2020-07-04 00:20:08 +02:00
|
|
|
--data project="${ids[$p]}" \
|
|
|
|
"${endpoint}/command/core/delete-project?csrf_token=$(csrf)" > /dev/null
|
2020-07-03 21:57:02 +02:00
|
|
|
then
|
2020-07-04 00:20:08 +02:00
|
|
|
log "deleted ${p} (${ids[$p]})"
|
2020-07-03 21:57:02 +02:00
|
|
|
echo
|
|
|
|
else
|
2020-07-04 00:20:08 +02:00
|
|
|
echo 1>&2 "ERROR: deletion of ${p} (${ids[$p]}) failed!"; stop; exit 1
|
2020-07-03 21:57:02 +02:00
|
|
|
fi
|
2020-07-03 01:38:11 +02:00
|
|
|
|
2020-07-04 00:20:08 +02:00
|
|
|
# ------------------------------- STOP SERVER -------------------------------- #
|
2020-07-03 01:38:11 +02:00
|
|
|
|
|
|
|
echo "stop OpenRefine server..."
|
2020-07-04 00:20:08 +02:00
|
|
|
stop
|