This commit is contained in:
parent
1a5efc0c3c
commit
bf14449df9
|
@ -1,8 +1,9 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# openrefine-bash-curl.sh, Felix Lohmeier, v0.1, 2020-06-29
|
# openrefine-bash-curl.sh, Felix Lohmeier, v0.2, 2020-07-03
|
||||||
# How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
|
# How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts
|
||||||
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
# https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d
|
||||||
# tested on Linux (Fedora 33), needs to be adapted to work on macOS
|
# tested on Linux (Fedora 33), needs to be adapted to work on macOS
|
||||||
|
# TODO: example for engine config (facets)
|
||||||
|
|
||||||
# make script executable from another directory
|
# make script executable from another directory
|
||||||
cd "$(dirname "${0}")" || exit 1
|
cd "$(dirname "${0}")" || exit 1
|
||||||
|
@ -16,19 +17,19 @@ memory="1400M"
|
||||||
date="$(date +%Y%m%d_%H%M%S)"
|
date="$(date +%Y%m%d_%H%M%S)"
|
||||||
workspace="${date}"
|
workspace="${date}"
|
||||||
|
|
||||||
# ============================= INSTALL ====================================== #
|
# ========================== REQUIREMENTS #=================================== #
|
||||||
|
|
||||||
# check requirement java
|
# check requirement java
|
||||||
JAVA="$(command -v java 2> /dev/null)"
|
java="$(command -v java 2> /dev/null)"
|
||||||
if [[ -z "${JAVA}" ]] ; then
|
if [[ -z "${java}" ]] ; then
|
||||||
echo 1>&2 "ERROR: OpenRefine requires JAVA runtime environment (jre)" \
|
echo 1>&2 "ERROR: OpenRefine requires JAVA runtime environment (jre)" \
|
||||||
"https://openjdk.java.net/install/"
|
"https://openjdk.java.net/install/"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# check requirement cURL
|
# check requirement cURL
|
||||||
CURL="$(command -v curl 2> /dev/null)"
|
curl="$(command -v curl 2> /dev/null)"
|
||||||
if [[ -z "${CURL}" ]] ; then
|
if [[ -z "${curl}" ]] ; then
|
||||||
echo 1>&2 "ERROR: This shell script requires cURL" \
|
echo 1>&2 "ERROR: This shell script requires cURL" \
|
||||||
"https://curl.haxx.se/download.html"
|
"https://curl.haxx.se/download.html"
|
||||||
exit 1
|
exit 1
|
||||||
|
@ -68,18 +69,49 @@ openrefine="$(readlink -f openrefine/refine)"
|
||||||
|
|
||||||
# ============================ ENVIRONMENT =================================== #
|
# ============================ ENVIRONMENT =================================== #
|
||||||
|
|
||||||
# wait for user input after each step
|
function log() {
|
||||||
function pause(){
|
echo "$(date +%H:%M:%S.%3N) [ client] $1"
|
||||||
read -r -s -n 1 -p "Press any key to continue..."
|
|
||||||
echo; echo
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# safe cleanup handler
|
function start() {
|
||||||
function cleanup(){
|
${openrefine} -v warn -m "${memory}" -p "${port}" -d "${workspace}" &
|
||||||
|
pid_server=${!}
|
||||||
|
timeout 30s bash -c "until curl -s \"${endpoint}\" \
|
||||||
|
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|
||||||
|
|| { echo 1>&2 "ERROR: starting OpenRefine server failed!"; stop; exit 1; }
|
||||||
|
}
|
||||||
|
|
||||||
|
function stop() {
|
||||||
|
echo
|
||||||
|
# print system resources
|
||||||
|
ps -o start,etime,%mem,%cpu,rss -p "${pid_server}"
|
||||||
|
echo
|
||||||
# SIGKILL (kill -9) prevents saving OpenRefine projects
|
# SIGKILL (kill -9) prevents saving OpenRefine projects
|
||||||
{ kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null
|
{ kill -9 "${pid_server}" && wait "${pid_server}"; } 2>/dev/null
|
||||||
|
# grep log for server exceptions
|
||||||
|
grep -i 'exception\|error' "${workspace}/${date}.log" \
|
||||||
|
&& exit 1 || log "no warnings, all good!"
|
||||||
|
}
|
||||||
|
trap "stop;exit 1" SIGHUP SIGINT SIGQUIT SIGTERM
|
||||||
|
|
||||||
|
function csrf() {
|
||||||
|
response=$(curl -fsS "${endpoint}/command/core/get-csrf-token")
|
||||||
|
if [[ "${response}" != '{"token":"'* ]]; then
|
||||||
|
echo 1>&2 "ERROR: getting CSRF token failed!"; stop; exit 1
|
||||||
|
else
|
||||||
|
echo "$response" | cut -d \" -f 4
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
function import() {
|
||||||
|
p[$project]=$(echo "$1" | cut -d '=' -f 2)
|
||||||
|
# error handling: exit if import failed
|
||||||
|
if [[ "${#p[$project]}" != 13 ]]; then
|
||||||
|
echo 1>&2 "$1"; stop; exit 1
|
||||||
|
else
|
||||||
|
log "loaded as project id ${p[$project]}"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
trap "cleanup;exit 1" SIGHUP SIGINT SIGQUIT SIGTERM
|
|
||||||
|
|
||||||
# create workspace
|
# create workspace
|
||||||
mkdir -p "${workspace}"
|
mkdir -p "${workspace}"
|
||||||
|
@ -87,94 +119,61 @@ mkdir -p "${workspace}"
|
||||||
# simple logging
|
# simple logging
|
||||||
exec &> >(tee -a "${workspace}/${date}.log")
|
exec &> >(tee -a "${workspace}/${date}.log")
|
||||||
|
|
||||||
# =========================== START SERVER =================================== #
|
# declare associative array for projects
|
||||||
|
declare -A p
|
||||||
|
|
||||||
|
# =================== TEMPLATES FOR YOUR WORKFLOW ============================ #
|
||||||
|
|
||||||
|
# -------------------------- START SERVER ------------------------------------ #
|
||||||
|
|
||||||
# start OpenRefine server
|
|
||||||
echo "start OpenRefine server..."
|
echo "start OpenRefine server..."
|
||||||
${openrefine} -v warn -m "${memory}" -p "${port}" -d "${workspace}" &
|
start
|
||||||
pid_server=${!}
|
|
||||||
timeout 30s bash -c "until curl -s \"${endpoint}\" \
|
|
||||||
| cat | grep -q -o 'OpenRefine' ; do sleep 1; done" \
|
|
||||||
|| { echo 1>&2 "ERROR: starting OpenRefine server failed!"; cleanup; exit 1; }
|
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# ------------------------- IMPORT OPTION 1 ---------------------------------- #
|
||||||
|
|
||||||
# =========================== CSRF TOKEN ===================================== #
|
# create project from heredoc
|
||||||
|
project="example1" # project id will be accessible as ${p[example1]}
|
||||||
# get CSRF token (introduced in OpenRefine 3.3)
|
echo "import ${project}..."
|
||||||
function csrf(){
|
import "$(curl -fsS --write-out "%{redirect_url}\n" \
|
||||||
response=$(curl -fsS "${endpoint}/command/core/get-csrf-token")
|
|
||||||
if [[ "${response}" != '{"token":"'* ]]; then
|
|
||||||
echo 1>&2 "ERROR: getting CSRF token failed!"; cleanup; exit 1
|
|
||||||
else
|
|
||||||
csrf=$(echo "$response" | cut -d \" -f 4)
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
# ============================= IMPORT ======================================= #
|
|
||||||
|
|
||||||
# create example data from heredoc and store project id from response
|
|
||||||
echo "import example data..."
|
|
||||||
response=$(csrf; curl -fsS --write-out "%{redirect_url}\n" \
|
|
||||||
--form project-file="@-;filename=example1.csv" \
|
--form project-file="@-;filename=example1.csv" \
|
||||||
--form project-name="example1" \
|
--form project-name="${project}" \
|
||||||
--form format="text/line-based/*sv" \
|
--form format="text/line-based/*sv" \
|
||||||
"${endpoint}/command/core/create-project-from-upload?csrf_token=${csrf}" \
|
--form options='{"separator": " "}' \
|
||||||
|
"${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)" \
|
||||||
<< "DATA"
|
<< "DATA"
|
||||||
a,b,c
|
a b c
|
||||||
1,2,3
|
1 2 3
|
||||||
0,0,0
|
0 0 0
|
||||||
$,\,'
|
$ \ '
|
||||||
DATA
|
DATA
|
||||||
) && p1=$(echo "$response" | cut -d '=' -f 2)
|
)"
|
||||||
# error handling: exit if import failed
|
|
||||||
if [[ "${#p1}" != 13 ]]; then
|
|
||||||
echo 1>&2 "$response"; cleanup; exit 1
|
|
||||||
fi
|
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# -------------------------- IMPORT OPTION 2 --------------------------------- #
|
||||||
|
|
||||||
# create another project from file
|
# mockup test data
|
||||||
echo "import example data from file..."
|
|
||||||
cat << DATA > "${workspace}/test.csv"
|
cat << DATA > "${workspace}/test.csv"
|
||||||
z,x,y
|
z,x,y
|
||||||
3,2,1
|
3,2,1
|
||||||
0,0,0
|
0,0,0
|
||||||
DATA
|
DATA
|
||||||
response=$(csrf; curl -fsS --write-out "%{redirect_url}\n" \
|
|
||||||
|
# create project from file
|
||||||
|
project="example2" # project id will be accessible as ${p[example2]}
|
||||||
|
echo "import ${project} from file..."
|
||||||
|
import "$(curl -fsS --write-out "%{redirect_url}\n" \
|
||||||
--form project-file="@${workspace}/test.csv" \
|
--form project-file="@${workspace}/test.csv" \
|
||||||
--form project-name="example2" \
|
--form project-name="${project}" \
|
||||||
--form format="text/line-based/*sv" \
|
--form format="text/line-based/*sv" \
|
||||||
"${endpoint}/command/core/create-project-from-upload?csrf_token=${csrf}") \
|
--form options='{"separator": ","}' \
|
||||||
&& p2=$(echo "$response" | cut -d '=' -f 2)
|
"${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)")"
|
||||||
if [[ "${#p2}" != 13 ]]; then
|
|
||||||
echo 1>&2 "$response"; cleanup; exit 1
|
|
||||||
fi
|
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# ------------------------ TRANSFORM OPTION 1 -------------------------------- #
|
||||||
|
|
||||||
# ============================ TRANSFORM ===================================== #
|
# mockup test data
|
||||||
|
cat << DATA > "${workspace}/test.json"
|
||||||
# export to stdout
|
|
||||||
echo "export data..."
|
|
||||||
curl -fsS \
|
|
||||||
--data project="${p1}" \
|
|
||||||
--data format="tsv" \
|
|
||||||
"${endpoint}/command/core/export-rows" \
|
|
||||||
|| { cleanup; exit 1; }
|
|
||||||
echo
|
|
||||||
|
|
||||||
pause
|
|
||||||
|
|
||||||
# apply operation from quoted heredoc
|
|
||||||
echo "add column test..."
|
|
||||||
csrf; curl -fsS \
|
|
||||||
--data-urlencode "operations@-" \
|
|
||||||
"${endpoint}/command/core/apply-operations?project=${p1}&csrf_token=${csrf}" \
|
|
||||||
<< "JSON" || { cleanup; exit 1; }
|
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"op": "core/column-addition",
|
"op": "core/column-addition",
|
||||||
|
@ -184,6 +183,39 @@ csrf; curl -fsS \
|
||||||
"newColumnName": "test",
|
"newColumnName": "test",
|
||||||
"columnInsertIndex": 2,
|
"columnInsertIndex": 2,
|
||||||
"baseColumnName": "b",
|
"baseColumnName": "b",
|
||||||
|
"expression": "grel:value.replace('2','FILE')",
|
||||||
|
"onError": "set-to-blank"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
DATA
|
||||||
|
|
||||||
|
# apply operation from file
|
||||||
|
echo "add column test..."
|
||||||
|
curl -fsS \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
|
--data-urlencode operations@"${workspace}/test.json" \
|
||||||
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" \
|
||||||
|
|| { stop; exit 1; }
|
||||||
|
echo; echo
|
||||||
|
|
||||||
|
# ------------------------ TRANSFORM OPTION 2 -------------------------------- #
|
||||||
|
|
||||||
|
# apply operation from quoted heredoc
|
||||||
|
echo "add column test2..."
|
||||||
|
curl -fsS \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
|
--data-urlencode "operations@-" \
|
||||||
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" \
|
||||||
|
<< "JSON" || { stop; exit 1; }
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"op": "core/column-addition",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based"
|
||||||
|
},
|
||||||
|
"newColumnName": "test2",
|
||||||
|
"columnInsertIndex": 2,
|
||||||
|
"baseColumnName": "b",
|
||||||
"expression": "grel:value.replace('2','FOO')",
|
"expression": "grel:value.replace('2','FOO')",
|
||||||
"onError": "set-to-blank"
|
"onError": "set-to-blank"
|
||||||
}
|
}
|
||||||
|
@ -191,28 +223,18 @@ csrf; curl -fsS \
|
||||||
JSON
|
JSON
|
||||||
echo; echo
|
echo; echo
|
||||||
|
|
||||||
pause
|
# ------------------------ TRANSFORM OPTION 3 -------------------------------- #
|
||||||
|
|
||||||
# export to stdout
|
|
||||||
echo "export data (again)..."
|
|
||||||
curl -fsS \
|
|
||||||
--data project="${p1}" \
|
|
||||||
--data format="tsv" \
|
|
||||||
"${endpoint}/command/core/export-rows" \
|
|
||||||
|| { cleanup; exit 1; }
|
|
||||||
echo
|
|
||||||
|
|
||||||
pause
|
|
||||||
|
|
||||||
# apply operation from unquoted heredoc (allows using bash variables)
|
# apply operation from unquoted heredoc (allows using bash variables)
|
||||||
echo "add column test2..."
|
echo "add column test3..."
|
||||||
new_column="test2"
|
new_column="test3"
|
||||||
base_column="b"
|
base_column="b"
|
||||||
replace_value="BAR"
|
replace_value="BAR"
|
||||||
csrf; curl -fsS \
|
curl -fsS \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
--data-urlencode "operations@-" \
|
--data-urlencode "operations@-" \
|
||||||
"${endpoint}/command/core/apply-operations?project=${p1}&csrf_token=${csrf}" \
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" \
|
||||||
<< JSON || { cleanup; exit 1; }
|
<< JSON || { stop; exit 1; }
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"op": "core/column-addition",
|
"op": "core/column-addition",
|
||||||
|
@ -229,159 +251,245 @@ csrf; curl -fsS \
|
||||||
JSON
|
JSON
|
||||||
echo; echo
|
echo; echo
|
||||||
|
|
||||||
pause
|
# ------------------------ TRANSFORM OPTION 4 -------------------------------- #
|
||||||
|
|
||||||
# apply operation from unquoted heredoc with multi-line expression (requires jq)
|
# apply operation from unquoted heredoc with multi-line expression (requires jq)
|
||||||
echo "add column test3..."
|
echo "add column test4..."
|
||||||
replace_value="!"
|
replace_value="!"
|
||||||
read -r -d '' expression <<- EXPR
|
read -r -d '' expression << EXPRESSION
|
||||||
grel:value.replace(
|
grel:value.replace(
|
||||||
'2',
|
'2',
|
||||||
'${replace_value}'
|
'${replace_value}'
|
||||||
)
|
)
|
||||||
EXPR
|
EXPRESSION
|
||||||
csrf; curl -fsS \
|
curl -fsS \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
--data-urlencode "operations@-" \
|
--data-urlencode "operations@-" \
|
||||||
"${endpoint}/command/core/apply-operations?project=${p1}&csrf_token=${csrf}" \
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" \
|
||||||
<<- JSON || { cleanup; exit 1; }
|
<< JSON || { stop; exit 1; }
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"op": "core/column-addition",
|
"op": "core/column-addition",
|
||||||
"engineConfig": {
|
"engineConfig": {
|
||||||
"mode": "row-based"
|
"mode": "row-based"
|
||||||
},
|
},
|
||||||
"newColumnName": "test3",
|
"newColumnName": "test4",
|
||||||
"columnInsertIndex": 4,
|
"columnInsertIndex": 4,
|
||||||
"baseColumnName": "b",
|
"baseColumnName": "b",
|
||||||
"expression": $(echo "${expression}" | ${jq} -s -R '.'),
|
"expression": $(echo "${expression}" | ${jq} -s -R '.'),
|
||||||
"onError": "set-to-blank"
|
"onError": "set-to-blank"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
JSON
|
JSON
|
||||||
echo; echo
|
echo; echo
|
||||||
|
|
||||||
pause
|
# ------------------------ TRANSFORM OPTION 5 -------------------------------- #
|
||||||
|
|
||||||
# export to stdout
|
|
||||||
echo "export data (again)..."
|
|
||||||
curl -fsS \
|
|
||||||
--data project="${p1}" \
|
|
||||||
--data format="tsv" \
|
|
||||||
"${endpoint}/command/core/export-rows" \
|
|
||||||
|| { cleanup; exit 1; }
|
|
||||||
echo
|
|
||||||
|
|
||||||
pause
|
|
||||||
|
|
||||||
# apply multiple operations generated on-the-fly (requires jq)
|
# apply multiple operations generated on-the-fly (requires jq)
|
||||||
echo "delete columns..."
|
echo "delete columns..."
|
||||||
columns=( "test" "test2" )
|
columns=( "test" "test2" "test3" )
|
||||||
payload=()
|
payload=()
|
||||||
for column in "${columns[@]}"; do
|
for column in "${columns[@]}"; do
|
||||||
payload+=( "$(cat <<- JSON
|
payload+=( "$(cat << JSON
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"op": "core/column-removal",
|
"op": "core/column-removal",
|
||||||
"columnName": "${column}"
|
"columnName": "${column}"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
JSON
|
JSON
|
||||||
)" )
|
)" )
|
||||||
done
|
done
|
||||||
csrf; echo "${payload[@]}" | "${jq}" -s add | curl -fsS \
|
echo "${payload[@]}" | "${jq}" -s add | curl -fsS \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
--data-urlencode operations@- \
|
--data-urlencode operations@- \
|
||||||
"${endpoint}/command/core/apply-operations?project=${p1}&csrf_token=${csrf}" \
|
"${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" \
|
||||||
|| { cleanup; exit 1; }
|
|| { stop; exit 1; }
|
||||||
echo; echo
|
echo; echo
|
||||||
|
|
||||||
pause
|
# -------------------------- EXPORT OPTION 1 --------------------------------- #
|
||||||
|
|
||||||
# ============================== EXPORT ====================================== #
|
|
||||||
|
|
||||||
# export to stdout
|
# export to stdout
|
||||||
echo "export data..."
|
echo "export example1..."
|
||||||
curl -fsS \
|
curl -fsS \
|
||||||
--data project="${p1}" \
|
--data project="${p[example1]}" \
|
||||||
--data format="tsv" \
|
--data format="tsv" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
"${endpoint}/command/core/export-rows" \
|
"${endpoint}/command/core/export-rows" \
|
||||||
|| { cleanup; exit 1; }
|
|| { stop; exit 1; }
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# -------------------------- EXPORT OPTION 2 --------------------------------- #
|
||||||
|
|
||||||
# export to stdout
|
# export to file
|
||||||
echo "export data..."
|
output="${workspace}/example1.csv"
|
||||||
|
echo "export example1..."
|
||||||
curl -fsS \
|
curl -fsS \
|
||||||
--data project="${p2}" \
|
--data project="${p[example1]}" \
|
||||||
--data format="tsv" \
|
--data format="csv" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
"${endpoint}/command/core/export-rows" \
|
"${endpoint}/command/core/export-rows" \
|
||||||
|| { cleanup; exit 1; }
|
> "${output}" \
|
||||||
|
|| { stop; exit 1; } \
|
||||||
|
&& log "saved to file ${output}"
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# -------------------------- EXPORT OPTION 3 --------------------------------- #
|
||||||
|
|
||||||
|
# templating export to stdout
|
||||||
|
echo "export example2 using template..."
|
||||||
|
IFS= read -r -d '' template << TEMPLATE
|
||||||
|
{
|
||||||
|
"z": {{cells['z'].value.jsonize()}},
|
||||||
|
"y": {{cells['y'].value.jsonize()}}
|
||||||
|
}
|
||||||
|
TEMPLATE
|
||||||
|
echo "${template}" | head -c -2 | curl -fsS \
|
||||||
|
--data project="${p[example2]}" \
|
||||||
|
--data format="template" \
|
||||||
|
--data prefix="[
|
||||||
|
" \
|
||||||
|
--data suffix="
|
||||||
|
]" \
|
||||||
|
--data separator=",
|
||||||
|
" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
|
--data-urlencode template@- \
|
||||||
|
"${endpoint}/command/core/export-rows" \
|
||||||
|
|| { stop; exit 1; }
|
||||||
|
echo; echo
|
||||||
|
|
||||||
|
# -------------------------- EXPORT OPTION 4 --------------------------------- #
|
||||||
|
|
||||||
|
# templating export to file
|
||||||
|
output="${workspace}/example2.json"
|
||||||
|
echo "export example2 using template..."
|
||||||
|
IFS= read -r -d '' template << TEMPLATE
|
||||||
|
{
|
||||||
|
"z": {{cells['z'].value.jsonize()}},
|
||||||
|
"y": {{cells['y'].value.jsonize()}}
|
||||||
|
}
|
||||||
|
TEMPLATE
|
||||||
|
echo "${template}" | head -c -2 | curl -fsS \
|
||||||
|
--data project="${p[example2]}" \
|
||||||
|
--data format="template" \
|
||||||
|
--data prefix="[
|
||||||
|
" \
|
||||||
|
--data suffix="
|
||||||
|
]" \
|
||||||
|
--data separator=",
|
||||||
|
" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
|
--data-urlencode template@- \
|
||||||
|
"${endpoint}/command/core/export-rows" \
|
||||||
|
> "${output}" \
|
||||||
|
|| { stop; exit 1; } \
|
||||||
|
&& log "saved to file ${output}"
|
||||||
|
echo; echo
|
||||||
|
|
||||||
|
# -------------------------- EXPORT OPTION 5 --------------------------------- #
|
||||||
|
|
||||||
# export projects to files (example for parallel execution)
|
# export projects to files (example for parallel execution)
|
||||||
echo "export to files..."
|
projects=( "example1" "example2" )
|
||||||
projects=( "${p1}" "${p2}" )
|
format="tsv"
|
||||||
|
echo "export ${projects[*]} to files..."
|
||||||
pid=()
|
pid=()
|
||||||
for project in "${projects[@]}"; do
|
for project in "${projects[@]}"; do
|
||||||
echo "export project ${project} to file ${workspace}/${project}.tsv"
|
|
||||||
curl -fs \
|
curl -fs \
|
||||||
--data project="${project}" \
|
--data project="${p[$project]}" \
|
||||||
--data format="tsv" \
|
--data format="${format}" \
|
||||||
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
||||||
"${endpoint}/command/core/export-rows" \
|
"${endpoint}/command/core/export-rows" \
|
||||||
> "${workspace}/${project}.tsv" &
|
> "${workspace}/${project}.${format}" &
|
||||||
pid+=("$!")
|
pid+=("$!")
|
||||||
done
|
done
|
||||||
for i in "${!projects[@]}"; do
|
for i in "${!projects[@]}"; do
|
||||||
wait "${pid[$i]}" \
|
wait "${pid[$i]}" \
|
||||||
|| { echo 1>&2 "ERROR: export of ${projects[$i]} failed!"; cleanup; exit 1; }
|
|| { echo 1>&2 "ERROR: export of ${projects[$i]} failed!"; stop; exit 1; } \
|
||||||
|
&& log "${projects[$i]} saved to file ${workspace}/${projects[$i]}.${format}"
|
||||||
done
|
done
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# -------------------------- LIST PROJECTS ----------------------------------- #
|
||||||
|
|
||||||
# ============================= METADATA ===================================== #
|
# print id and name for each project (requires jq)
|
||||||
|
echo "list projects..."
|
||||||
# get metadata (requires jq)
|
curl -fsS --get \
|
||||||
echo "show metadata for project ${p2}"
|
"${endpoint}/command/core/get-all-project-metadata" \
|
||||||
curl -fsS \
|
| "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"' \
|
||||||
"${endpoint}/command/core/get-project-metadata?project=${p2}" \
|
|| { stop; exit 1; }
|
||||||
| "${jq}" "{ id: ${p1} } + ." \
|
|
||||||
|| { cleanup; exit 1; }
|
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# -------------------------- GET METADATA ------------------------------------ #
|
||||||
|
|
||||||
# get history (requires jq)
|
# print metadata (requires jq)
|
||||||
echo "save operations history for project ${p1}" \
|
echo "metadata for project example1..."
|
||||||
"to file ${workspace}/${p1}_history.json"
|
curl -fsS --get \
|
||||||
curl -fsS \
|
--data project="${p[example1]}" \
|
||||||
"${endpoint}/command/core/get-operations?project=${p1}" \
|
"${endpoint}/command/core/get-project-metadata" \
|
||||||
|
| "${jq}" "{ id: ${p[example1]} } + ." \
|
||||||
|
|| { stop; exit 1; }
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ---------------------------- GET ROWS -------------------------------------- #
|
||||||
|
|
||||||
|
# print total number of rows (requires jq)
|
||||||
|
echo "total number of rows in project example1..."
|
||||||
|
curl -fsS --get \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
|
"${endpoint}/command/core/get-rows" \
|
||||||
|
| "${jq}" -r '.total' \
|
||||||
|
|| { stop; exit 1; }
|
||||||
|
echo
|
||||||
|
|
||||||
|
# -------------------------- GET COLUMNS ------------------------------------- #
|
||||||
|
|
||||||
|
# print columns (requires jq)
|
||||||
|
echo "column names of project example1..."
|
||||||
|
curl -fsS --get \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
|
"${endpoint}/command/core/get-models" \
|
||||||
|
| "${jq}" -r '.columnModel | .columns[] | .name' \
|
||||||
|
|| { stop; exit 1; }
|
||||||
|
echo
|
||||||
|
|
||||||
|
# ---------------------- GET OPERATIONS HISTORY ------------------------------ #
|
||||||
|
|
||||||
|
# save operations history to file (requires jq)
|
||||||
|
output="${workspace}/example1_history.json"
|
||||||
|
echo "operations history for project example1..."
|
||||||
|
curl -fsS --get \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
|
"${endpoint}/command/core/get-operations" \
|
||||||
| "${jq}" '[ .entries[] | .operation ]' \
|
| "${jq}" '[ .entries[] | .operation ]' \
|
||||||
> "${workspace}/${p1}_history.json" \
|
> "${output}" \
|
||||||
|| { cleanup; exit 1; }
|
|| { stop; exit 1; } \
|
||||||
|
&& log "saved to file ${output}"
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# ------------------------ GET IMPORT History -------------------------------- #
|
||||||
|
|
||||||
# =========================== STOP SERVER ==================================== #
|
# print import options history (requires jq)
|
||||||
|
echo "print import options history for project example2..."
|
||||||
# show allocated system resources
|
curl -fsS --get \
|
||||||
echo "show system resources..."
|
--data project="${p[example2]}" \
|
||||||
ps -o start,etime,%mem,%cpu,rss -p "${pid_server}"
|
"${endpoint}/command/core/get-project-metadata" \
|
||||||
|
| "${jq}" ".importOptionMetadata[0]" \
|
||||||
|
|| { stop; exit 1; }
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
# ------------------------- DELETE project ----------------------------------- #
|
||||||
|
|
||||||
|
# delete project
|
||||||
|
echo "delete project example1..."
|
||||||
|
curl -fsS \
|
||||||
|
--data project="${p[example1]}" \
|
||||||
|
"${endpoint}/command/core/delete-project?csrf_token=$(csrf)" \
|
||||||
|
|| { stop; exit 1; }
|
||||||
|
echo; echo
|
||||||
|
|
||||||
|
# --------------------------- STOP SERVER ------------------------------------ #
|
||||||
|
|
||||||
# stop OpenRefine server without saving projects to workspace
|
|
||||||
echo "stop OpenRefine server..."
|
echo "stop OpenRefine server..."
|
||||||
cleanup
|
stop
|
||||||
echo
|
echo
|
||||||
|
|
||||||
pause
|
|
||||||
|
|
||||||
# grep log for server exceptions
|
|
||||||
echo "check log for any warnings..."
|
|
||||||
grep -i 'exception\|error' "${workspace}/${date}.log" \
|
|
||||||
&& exit 1 || echo "no warnings, all good!" && exit 0
|
|
Loading…
Reference in New Issue