release v1.4, replaced python client with one-file-executable, updated OpenRefine version (dev snapshot 2017-08-02)
This commit is contained in:
parent
f86135fe53
commit
b1d90b8eec
|
@ -0,0 +1,6 @@
|
||||||
|
# downloaded program libraries
|
||||||
|
openrefine
|
||||||
|
openrefine-client
|
||||||
|
|
||||||
|
# examples output directories
|
||||||
|
examples/powerhouse-museum/output
|
|
@ -1,22 +1,12 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# openrefine-batch.sh, Felix Lohmeier, v1.3, 2017-06-22
|
# openrefine-batch.sh, Felix Lohmeier, v1.4, 2017-08-02
|
||||||
# https://github.com/felixlohmeier/openrefine-batch
|
# https://github.com/felixlohmeier/openrefine-batch
|
||||||
|
|
||||||
# declare download URLs for OpenRefine and OpenRefine client
|
# declare download URLs for OpenRefine and OpenRefine client
|
||||||
openrefine_URL="https://github.com/felixlohmeier/OpenRefine/releases/download/2.7%2Boptions/openrefine-2.7.options.tar.gz"
|
openrefine_URL="https://github.com/felixlohmeier/OpenRefine/releases/download/2017-08-02/openrefine-linux-2017-08-02.tar.gz"
|
||||||
client_URL="https://github.com/felixlohmeier/openrefine-client/archive/v0.3.1.tar.gz"
|
client_URL="https://github.com/felixlohmeier/openrefine-client/releases/download/v0.3.1/openrefine-client_0-3-1_linux-64bit"
|
||||||
|
|
||||||
# check system requirements
|
# check system requirements
|
||||||
PYTHON="$(which python 2> /dev/null)"
|
|
||||||
if [ -z "$PYTHON" ] ; then
|
|
||||||
echo 1>&2 "This action requires you to have 'python' installed and present in your PATH. You can download it for free at http://www.python.org/"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
PYTHON_VERSION="$($PYTHON --version 2>&1 | cut -f 2 -d ' ' | cut -f 1,2 -d .)"
|
|
||||||
if [ "$PYTHON_VERSION" != "2.6" ] && [ "$PYTHON_VERSION" != "2.7" ]; then
|
|
||||||
echo 1>&2 "This action requires Python version 2.6.x. or 2.7.x. You can download it for free at http://www.python.org/"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
JAVA="$(which java 2> /dev/null)"
|
JAVA="$(which java 2> /dev/null)"
|
||||||
if [ -z "$JAVA" ] ; then
|
if [ -z "$JAVA" ] ; then
|
||||||
echo 1>&2 "This action requires you to have 'Java JRE' installed. You can download it for free at https://java.com"
|
echo 1>&2 "This action requires you to have 'Java JRE' installed. You can download it for free at https://java.com"
|
||||||
|
@ -34,7 +24,7 @@ if [ ! -d "openrefine" ]; then
|
||||||
tar -xzf "$(basename $openrefine_URL)" -C openrefine --strip 1 --totals
|
tar -xzf "$(basename $openrefine_URL)" -C openrefine --strip 1 --totals
|
||||||
rm -f "$(basename $openrefine_URL)"
|
rm -f "$(basename $openrefine_URL)"
|
||||||
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' openrefine/refine.ini
|
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' openrefine/refine.ini
|
||||||
sed -i 's/#REFINE_AUTOSAVE_PERIOD=1/REFINE_AUTOSAVE_PERIOD=180/' openrefine/refine.ini
|
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' openrefine/refine.ini
|
||||||
sed -i 's/-Xms$REFINE_MIN_MEMORY/-Xms$REFINE_MEMORY/' openrefine/refine
|
sed -i 's/-Xms$REFINE_MIN_MEMORY/-Xms$REFINE_MEMORY/' openrefine/refine
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
@ -43,10 +33,8 @@ fi
|
||||||
if [ ! -d "openrefine-client" ]; then
|
if [ ! -d "openrefine-client" ]; then
|
||||||
echo "Download OpenRefine client..."
|
echo "Download OpenRefine client..."
|
||||||
mkdir -p openrefine-client
|
mkdir -p openrefine-client
|
||||||
wget -q $wget_opt $client_URL
|
wget -q -P openrefine-client $wget_opt $client_URL
|
||||||
echo "Install OpenRefine client in subdirectory openrefine-client..."
|
chmod +x openrefine-client/openrefine-client_0-3-1_linux-64bit
|
||||||
tar -xzf "$(basename $client_URL)" -C openrefine-client --strip 1 --totals
|
|
||||||
rm -f "$(basename $client_URL)"
|
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -223,7 +211,7 @@ if [ -n "$inputfiles" ]; then
|
||||||
for inputfile in "${inputfiles[@]}" ; do
|
for inputfile in "${inputfiles[@]}" ; do
|
||||||
echo "import ${inputfile}..."
|
echo "import ${inputfile}..."
|
||||||
# run client with input command
|
# run client with input command
|
||||||
openrefine-client/refine.py -P ${port} -c ${inputdir}/${inputfile} $inputformat "${inputoptions[@]}"
|
openrefine-client/openrefine-client_0-3-1_linux-64bit -P ${port} -c ${inputdir}/${inputfile} $inputformat "${inputoptions[@]}"
|
||||||
# show allocated system resources
|
# show allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
||||||
|
@ -254,7 +242,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
|
|
||||||
# get project ids
|
# get project ids
|
||||||
echo "get project ids..."
|
echo "get project ids..."
|
||||||
openrefine-client/refine.py -P ${port} -l > "${outputdir}/projects.tmp"
|
openrefine-client/openrefine-client_0-3-1_linux-64bit -P ${port} -l > "${outputdir}/projects.tmp"
|
||||||
projectids=($(cat "${outputdir}/projects.tmp" | cut -c 2-14))
|
projectids=($(cat "${outputdir}/projects.tmp" | cut -c 2-14))
|
||||||
projectnames=($(cat "${outputdir}/projects.tmp" | cut -c 17-))
|
projectnames=($(cat "${outputdir}/projects.tmp" | cut -c 17-))
|
||||||
cat "${outputdir}/projects.tmp" && rm "${outputdir:?}/projects.tmp"
|
cat "${outputdir}/projects.tmp" && rm "${outputdir:?}/projects.tmp"
|
||||||
|
@ -291,7 +279,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
for jsonfile in "${jsonfiles[@]}" ; do
|
for jsonfile in "${jsonfiles[@]}" ; do
|
||||||
echo "transform ${jsonfile}..."
|
echo "transform ${jsonfile}..."
|
||||||
# run client with apply command
|
# run client with apply command
|
||||||
openrefine-client/refine.py -P ${port} -f ${configdir}/${jsonfile} ${projectids[i]}
|
openrefine-client/openrefine-client_0-3-1_linux-64bit -P ${port} -f ${configdir}/${jsonfile} ${projectids[i]}
|
||||||
# allocated system resources
|
# allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
||||||
|
@ -323,7 +311,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
filename=${projectnames[i]%.*}
|
filename=${projectnames[i]%.*}
|
||||||
echo "export to file ${filename}.tsv..."
|
echo "export to file ${filename}.tsv..."
|
||||||
# run client with export command
|
# run client with export command
|
||||||
openrefine-client/refine.py -P ${port} -E --output="${outputdir}/${filename}.tsv" ${projectids[i]}
|
openrefine-client/openrefine-client_0-3-1_linux-64bit -P ${port} -E --output="${outputdir}/${filename}.tsv" ${projectids[i]}
|
||||||
# show allocated system resources
|
# show allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
||||||
|
|
Loading…
Reference in New Issue