From dd8d28d7e0c7c8d30fa7c163e19dc9cddb13e3c6 Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Sat, 4 Jul 2020 00:20:08 +0200 Subject: [PATCH] --- openrefine-bash-curl.sh | 285 +++++++++++++++++++++------------------- 1 file changed, 147 insertions(+), 138 deletions(-) diff --git a/openrefine-bash-curl.sh b/openrefine-bash-curl.sh index 92e0c9d..fa7ddfe 100644 --- a/openrefine-bash-curl.sh +++ b/openrefine-bash-curl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# openrefine-bash-curl.sh, Felix Lohmeier, v0.3, 2020-07-03 +# openrefine-bash-curl.sh, Felix Lohmeier, v0.4, 2020-07-04 # How to control OpenRefine 3.3+ with cURL (and jq) in Bash scripts # https://gist.github.com/felixlohmeier/d76bd27fbc4b8ab6d683822cdf61f81d # tested on Linux (Fedora 33), needs to be adapted to work on macOS @@ -8,7 +8,7 @@ # make script executable from another directory cd "$(dirname "${0}")" || exit 1 -# ============================= CONFIG ======================================= # +# ================================== CONFIG ================================== # # config port="3333" @@ -17,7 +17,7 @@ memory="1400M" date="$(date +%Y%m%d_%H%M%S)" workspace="${date}" -# ========================== REQUIREMENTS ==================================== # +# =============================== REQUIREMENTS =============================== # # check requirement java java="$(command -v java 2> /dev/null)" @@ -67,7 +67,7 @@ if [[ ! -d "openrefine" ]]; then fi openrefine="$(readlink -f openrefine/refine)" -# ============================ ENVIRONMENT =================================== # +# =============================== ENVIRONMENT ================================ # # start OpenRefine function start() { @@ -96,7 +96,7 @@ function stop() { } # cleanup handler -trap "stop;exit 1" SIGHUP SIGINT SIGQUIT SIGTERM +trap "stop;exit 1" HUP INT QUIT TERM # get csrf token (introduced in OpenRefine 3.3) function csrf() { @@ -109,14 +109,14 @@ function csrf() { } # check and store project ids from import in associative array p -declare -A p +declare -A ids function store() { if [[ $# -eq 2 ]]; then - p[$1]=$(cut -d '=' -f 2 "$2") + ids[$1]=$(cut -d '=' -f 2 "$2") else echo 1>&2 "ERROR: invalid arguments supplied to import function"; return 1 fi - if [[ "${#p[$1]}" != 13 ]]; then + if [[ "${#ids[$1]}" != 13 ]]; then echo 1>&2 "ERROR: returned project id is not valid"; return 1 else rm "$2" @@ -132,25 +132,25 @@ function log() { echo "$(date +%H:%M:%S.%3N) [ client] $1" } -# =================== TEMPLATES FOR YOUR WORKFLOW ============================ # +# ======================= TEMPLATES FOR YOUR WORKFLOW ======================== # -# -------------------------- START SERVER ------------------------------------ # +# ------------------------------- START SERVER ------------------------------- # echo "start OpenRefine server..." start echo -# ------------------------- IMPORT OPTION 1 ---------------------------------- # +# ----------------------------- IMPORT OPTION 1 ------------------------------ # # create project from heredoc -# project id will be accessible as ${p[example1]} -project="example1" +# project id will be accessible as ${ids[example1]} +p="example1" input="example1.csv" filename="${input##*/})" -echo "import ${project}..." +echo "import ${p}..." if curl -fsS --write-out "%{redirect_url}\n" \ --form project-file="@-;filename=${input}" \ - --form project-name="${project}" \ + --form project-name="${p}" \ --form format="text/line-based/*sv" \ --form options='{"separator": " "}' \ "${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)" \ @@ -162,14 +162,14 @@ a b c $ \ ' DATA then - store "${project}" "${workspace}/${filename}.id" \ + store "${p}" "${workspace}/${filename}.id" \ || { echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1; } \ - && log "imported ${input} as ${p[$project]}"; echo + && log "imported ${input} as ${p} (${ids[$p]})"; echo else echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1 fi -# -------------------------- IMPORT OPTION 2 --------------------------------- # +# ----------------------------- IMPORT OPTION 2 ------------------------------ # # mockup test data cat << DATA > "${workspace}/test.csv" @@ -179,27 +179,27 @@ z,x,y DATA # create project from file -# project id will be accessible as ${p[example2]} -project="example2" +# project id will be accessible as ${ids[example2]} +p="example2" input="${workspace}/test.csv" filename="${input##*/})" -echo "import ${project}..." +echo "import ${p}..." if curl -fsS --write-out "%{redirect_url}\n" \ --form project-file="@${input}" \ - --form project-name="${project}" \ + --form project-name="${p}" \ --form format="text/line-based/*sv" \ - --form options='{"separator": ","}' \ + --form options='{"separator": "\t"}' \ "${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)" \ > "${workspace}/${filename}.id" then - store "${project}" "${workspace}/${filename}.id" \ + store "${p}" "${workspace}/${filename}.id" \ || { echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1; } \ - && log "imported ${input} as ${p[$project]}"; echo + && log "imported ${input} as ${p} (${ids[$p]})"; echo else echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1 fi -# -------------------------- IMPORT OPTION 3 --------------------------------- # +# ----------------------------- IMPORT OPTION 3 ------------------------------ # # mockup test data cat << DATA > "${workspace}/test2.csv" @@ -209,16 +209,16 @@ r,s,t DATA # create projects from files (in parallel) -# project ids will be accessible as ${p[test]} and ${p[test2]} +# project ids will be accessible as ${ids[test]} and ${ids[test2]} inputs=( "${workspace}/test.csv" "${workspace}/test2.csv" ) -echo "import files" "${input[@]}" "..." +echo "import files" "${inputs[@]}" "..." pid=() for i in "${!inputs[@]}"; do filename="${inputs[$i]##*/}" - project="${filename%%.*}" + p="${filename%%.*}" curl -fsS --write-out "%{redirect_url}\n" \ --form project-file="@${inputs[$i]}" \ - --form project-name="${project}" \ + --form project-name="${p}" \ --form format="text/line-based/*sv" \ --form options='{"separator": ","}' \ "${endpoint}/command/core/create-project-from-upload?csrf_token=$(csrf)" \ @@ -227,19 +227,19 @@ for i in "${!inputs[@]}"; do done for i in "${!inputs[@]}"; do filename="${inputs[$i]##*/}" - project="${filename%%.*}" + p="${filename%%.*}" wait "${pid[$i]}" if [[ $(wait "${pid[$i]}") -eq 0 ]]; then - store "${project}" "${workspace}/${filename}.id" \ + store "${p}" "${workspace}/${filename}.id" \ || { echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1; } \ - && log "imported ${input} as ${p[$project]}" + && log "imported ${inputs[$i]} as ${p} (${ids[$p]})" else - echo 1>&2 "ERROR: import of ${input} failed!"; stop; exit 1 + echo 1>&2 "ERROR: import of ${inputs[$i]} failed!"; stop; exit 1 fi done echo -# ------------------------ TRANSFORM OPTION 1 -------------------------------- # +# ---------------------------- TRANSFORM OPTION 1 ---------------------------- # # mockup test data cat << DATA > "${workspace}/test.json" @@ -259,28 +259,28 @@ cat << DATA > "${workspace}/test.json" DATA # apply operation from file -project="example1" +p="example1" input="${workspace}/test.json" -echo "add column test..." +echo "add column test to ${p}..." if curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data-urlencode operations@"${input}" \ "${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null then - log "transformed ${p[$project]} with ${input}" + log "transformed ${p} (${ids[$p]}) with ${input}" echo else - echo 1>&2 "ERROR: transform ${p[$project]} with ${input} failed!" + echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) with ${input} failed!" stop; exit 1 fi -# ------------------------ TRANSFORM OPTION 2 -------------------------------- # +# ---------------------------- TRANSFORM OPTION 2 ---------------------------- # # apply operation from quoted heredoc -project="example1" -echo "add column test2..." +p="example1" +echo "add column test2 to ${p}..." if curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data-urlencode "operations@-" \ "${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null \ << "JSON" @@ -299,22 +299,22 @@ if curl -fsS \ ] JSON then - log "transformed ${p[$project]}" + log "transformed ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: transform ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# ------------------------ TRANSFORM OPTION 3 -------------------------------- # +# ---------------------------- TRANSFORM OPTION 3 ---------------------------- # # apply operation from unquoted heredoc (allows using bash variables) -project="example1" +p="example1" new_column="test3" base_column="b" replace_value="BAR" -echo "add column test3..." +echo "add column ${new_column} to ${p}..." if curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data-urlencode "operations@-" \ "${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null \ << JSON @@ -333,18 +333,18 @@ if curl -fsS \ ] JSON then - log "transformed ${p[$project]}" + log "transformed ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: transform ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# ------------------------ TRANSFORM OPTION 4 -------------------------------- # +# ---------------------------- TRANSFORM OPTION 4 ---------------------------- # # apply operation from unquoted heredoc with multi-line expression (requires jq) -project="example1" +p="example1" replace_value="!" -echo "add column test4..." +echo "add column test4 to ${p}..." read -r -d '' expression << EXPRESSION grel:value.replace( '2', @@ -352,7 +352,7 @@ grel:value.replace( ) EXPRESSION if curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data-urlencode "operations@-" \ "${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null \ << JSON @@ -371,18 +371,18 @@ if curl -fsS \ ] JSON then - log "transformed ${p[$project]}" + log "transformed ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: transform ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# ------------------------ TRANSFORM OPTION 5 -------------------------------- # +# ---------------------------- TRANSFORM OPTION 5 ---------------------------- # # apply multiple operations generated on-the-fly (requires jq) -project="example1" +p="example1" columns=( "test" "test2" "test3" ) -echo "delete columns..." +echo "delete columns" "${columns[@]}" "in ${p}..." payload=() for column in "${columns[@]}"; do payload+=( "$(cat << JSON @@ -396,56 +396,57 @@ JSON )" ) done if echo "${payload[@]}" | "${jq}" -s add | curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data-urlencode operations@- \ "${endpoint}/command/core/apply-operations?csrf_token=$(csrf)" > /dev/null then - log "transformed ${p[$project]}" + log "transformed ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: transform ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: transform ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# -------------------------- EXPORT OPTION 1 --------------------------------- # +# ----------------------------- EXPORT OPTION 1 ------------------------------ # # export to stdout -project="example1" -echo "export example1..." +p="example1" +echo "export ${p}..." if curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data format="tsv" \ --data engine='{"facets":[],"mode":"row-based"}' \ "${endpoint}/command/core/export-rows" then + #log "printed export of ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: export of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# -------------------------- EXPORT OPTION 2 --------------------------------- # +# ----------------------------- EXPORT OPTION 2 ------------------------------ # # export to file -project="example1" -output="${workspace}/example1.csv" -echo "export example1..." +p="example1" +output="${workspace}/${p}.csv" +echo "export ${p} to file..." if curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data format="csv" \ --data engine='{"facets":[],"mode":"row-based"}' \ "${endpoint}/command/core/export-rows" \ > "${output}" then - log "${p[$project]} saved to file ${output}" + log "${p} (${ids[$p]}) saved to file ${output}" echo else - echo 1>&2 "ERROR: export of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# -------------------------- EXPORT OPTION 3 --------------------------------- # +# ----------------------------- EXPORT OPTION 3 ------------------------------ # # templating export to stdout -project="example2" -echo "export example2 using template..." +p="example2" +echo "export ${p} using template..." IFS= read -r -d '' template << TEMPLATE { "z": {{cells['z'].value.jsonize()}}, @@ -453,7 +454,7 @@ IFS= read -r -d '' template << TEMPLATE } TEMPLATE if echo "${template}" | head -c -2 | curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data format="template" \ --data prefix="[ " \ @@ -465,17 +466,19 @@ if echo "${template}" | head -c -2 | curl -fsS \ --data-urlencode template@- \ "${endpoint}/command/core/export-rows" then - echo; echo + echo + #log "printed export of ${p} (${ids[$p]})" + echo else - echo 1>&2 "ERROR: export of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# -------------------------- EXPORT OPTION 4 --------------------------------- # +# ----------------------------- EXPORT OPTION 4 ------------------------------ # # templating export to file -project="example2" -output="${workspace}/example2.json" -echo "export example2 using template..." +p="example2" +output="${workspace}/${p}.json" +echo "export ${p} to file using template..." IFS= read -r -d '' template << TEMPLATE { "z": {{cells['z'].value.jsonize()}}, @@ -483,7 +486,7 @@ IFS= read -r -d '' template << TEMPLATE } TEMPLATE if echo "${template}" | head -c -2 | curl -fsS \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data format="template" \ --data prefix="[ " \ @@ -496,39 +499,39 @@ if echo "${template}" | head -c -2 | curl -fsS \ "${endpoint}/command/core/export-rows" \ > "${output}" then - log "${p[$project]} saved to ${output}" + log "${p} (${ids[$p]}) saved to ${output}" echo else - echo 1>&2 "ERROR: export of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# -------------------------- EXPORT OPTION 5 --------------------------------- # +# ----------------------------- EXPORT OPTION 5 ------------------------------ # # export projects to files (in parallel) -projects=( "example1" "example2" ) +ps=( "example1" "example2" ) format="tsv" -echo "export ${projects[*]} to files..." +echo "export" "${ps[@]}" "to files..." pid=() -for project in "${projects[@]}"; do +for p in "${ps[@]}"; do curl -fs \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ --data format="${format}" \ --data engine='{"facets":[],"mode":"row-based"}' \ "${endpoint}/command/core/export-rows" \ - > "${workspace}/${project}.${format}" & + > "${workspace}/${p}.${format}" & pid+=("$!") done -for i in "${!projects[@]}"; do - project="${projects[$i]}" +for i in "${!ps[@]}"; do + p="${ps[$i]}" if [[ $(wait "${pid[$i]}") -eq 0 ]]; then - log "${p[$project]} saved to ${workspace}/${project}.${format}" + log "${p} (${ids[$p]}) saved to ${workspace}/${p}.${format}" else - echo 1>&2 "ERROR: export of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: export of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi done echo -# -------------------------- LIST PROJECTS ----------------------------------- # +# ------------------------------ LIST PROJECTS ------------------------------- # # print id and name for each project (requires jq) echo "list projects..." @@ -536,106 +539,112 @@ if curl -fsS --get \ "${endpoint}/command/core/get-all-project-metadata" \ | "${jq}" -r '.projects | keys[] as $k | "\($k): \(.[$k] | .name)"' then + #log "printed list of projects" echo else echo 1>&2 "ERROR: list projects failed!"; stop; exit 1 fi -# -------------------------- GET METADATA ------------------------------------ # +# ------------------------------- GET METADATA ------------------------------- # # print metadata (requires jq) -project="example1" -echo "metadata for project example1..." +p="example1" +echo "metadata for ${p}..." if curl -fsS --get \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ "${endpoint}/command/core/get-project-metadata" \ - | "${jq}" "{ id: ${p[$project]} } + ." + | "${jq}" "{ id: ${ids[$p]} } + ." then + #log "printed metadata of ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: getting metadata of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: getting metadata of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# -------------------------- GET ROWCOUNT ------------------------------------ # +# ------------------------------ GET ROW COUNT ------------------------------- # # print total number of rows (requires jq) -project="example1" -echo "total number of rows in project example1..." +p="example1" +echo "total number of rows in ${p}..." if curl -fsS --get \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ "${endpoint}/command/core/get-rows" \ | "${jq}" -r '.total' then + #log "printed row count of ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: getting rowcount of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: getting rowcount of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# -------------------------- GET COLUMNS ------------------------------------- # +# ------------------------------- GET COLUMNS -------------------------------- # # print columns (requires jq) -project="example1" -echo "column names of project example1..." +p="example1" +echo "column names of ${p}..." if curl -fsS --get \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ "${endpoint}/command/core/get-models" \ | "${jq}" -r '.columnModel | .columns[] | .name' then + #log "printed column names of ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: getting columns of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: getting columns of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# ---------------------- GET OPERATIONS HISTORY ------------------------------ # +# -------------------------- GET OPERATIONS HISTORY -------------------------- # # save operations history to file (requires jq) -project="example1" -output="${workspace}/example1_history.json" -echo "history of operations for project example1..." +p="example1" +output="${workspace}/${p}_history.json" +echo "history of operations for ${p}..." if curl -fsS --get \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ "${endpoint}/command/core/get-operations" \ | "${jq}" '[ .entries[] | .operation ]' \ > "${output}" then - log "ops history of ${p[$project]} saved to ${output}" + log "ops history of ${p} (${ids[$p]}) saved to ${output}" echo else - echo 1>&2 "ERROR: getting ops history of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: getting ops history of ${p} (${ids[$p]}) failed!" + stop; exit 1 fi -# ------------------------ GET IMPORT History -------------------------------- # +# ---------------------------- GET IMPORT HISTORY ---------------------------- # # print import options history (requires jq) -project="example2" -echo "history of import for project example2..." +p="example2" +echo "history of import for ${p}..." if curl -fsS --get \ - --data project="${p[$project]}" \ + --data project="${ids[$p]}" \ "${endpoint}/command/core/get-project-metadata" \ | "${jq}" ".importOptionMetadata[0]" then + #log "printed import history of ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: getting imp history of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: getting imp history of ${p} (${ids[$p]}) failed!" + stop; exit 1 fi -# ------------------------- DELETE project ----------------------------------- # +# ---------------------------------- DELETE ---------------------------------- # # delete project -project="example1" -echo "delete project example1..." +p="example1" +echo "delete project ${p}..." if curl -fsS \ - --data project="${p[$project]}" \ - "${endpoint}/command/core/delete-project?csrf_token=$(csrf)" + --data project="${ids[$p]}" \ + "${endpoint}/command/core/delete-project?csrf_token=$(csrf)" > /dev/null then - log "${p[$project]} deleted" + log "deleted ${p} (${ids[$p]})" echo else - echo 1>&2 "ERROR: deletion of ${p[$project]} failed!"; stop; exit 1 + echo 1>&2 "ERROR: deletion of ${p} (${ids[$p]}) failed!"; stop; exit 1 fi -# --------------------------- STOP SERVER ------------------------------------ # +# ------------------------------- STOP SERVER -------------------------------- # echo "stop OpenRefine server..." -stop -echo \ No newline at end of file +stop \ No newline at end of file