release v0.5

This commit is contained in:
Felix Lohmeier 2017-02-27 23:31:10 +01:00
parent b80fe929bc
commit 2f0d8fb080
2 changed files with 43 additions and 43 deletions

View File

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
# openrefine-batch.sh, Felix Lohmeier, v0.4, 27.02.2017 # openrefine-batch.sh, Felix Lohmeier, v0.5, 27.02.2017
# https://github.com/felixlohmeier/openrefine-batch # https://github.com/felixlohmeier/openrefine-batch
# user input # user input
@ -9,7 +9,7 @@ if [ -z "$1" ]
exit 2 exit 2
else else
inputdir=$(readlink -f $1) inputdir=$(readlink -f $1)
inputfiles=($(find -L ${inputdir}/* -type f -printf "%f\n")) inputfiles=($(find -L ${inputdir}/* -type f -printf "%f\n" 2>/dev/null))
fi fi
if [ -z "$2" ] if [ -z "$2" ]
then then
@ -17,7 +17,7 @@ if [ -z "$2" ]
exit 2 exit 2
else else
configdir=$(readlink -f $2) configdir=$(readlink -f $2)
jsonfiles=($(find -L ${configdir}/* -type f -printf "%f\n")) jsonfiles=($(find -L ${configdir}/* -type f -printf "%f\n" 2>/dev/null))
fi fi
if [ -z "$3" ] if [ -z "$3" ]
then then
@ -33,7 +33,7 @@ if [ -z "$4" ]
exit 2 exit 2
else else
crossdir=$(readlink -f $4) crossdir=$(readlink -f $4)
crossprojects=($(find -L ${crossdir}/* -maxdepth 0 -type d -printf "%f\n")) crossprojects=($(find -L ${crossdir}/* -maxdepth 0 -type d -printf "%f\n" 2>/dev/null))
fi fi
if [ -z "$5" ] if [ -z "$5" ]
then then
@ -102,7 +102,7 @@ if [ -n "$inputfiles" ]; then
# show server logs # show server logs
sudo docker attach ${uuid} & sudo docker attach ${uuid} &
# statistics # statistics
ps -o start,etime,%mem,%cpu,rss -C java ps -o start,etime,%mem,%cpu,rss -C java --sort=start
# restart server to clear memory # restart server to clear memory
echo "save project and restart OpenRefine server..." echo "save project and restart OpenRefine server..."
sudo docker stop -t=5000 ${uuid} sudo docker stop -t=5000 ${uuid}
@ -113,27 +113,27 @@ if [ -n "$inputfiles" ]; then
done done
fi fi
if [ -n "$jsonfiles" ]; then
# get project ids # get project ids
projects=($(sudo docker run --rm --link ${uuid} felixlohmeier/openrefine-client -H ${uuid} -l | cut -c 2-14)) projects=($(sudo docker run --rm --link ${uuid} felixlohmeier/openrefine-client -H ${uuid} -l | cut -c 2-14))
# copy existing projects for use with OpenRefine cross function # copy existing projects for use with OpenRefine cross function
if [ -n "$crossprojects" ]; then if [ -n "$crossprojects" ]; then
cp -r $crossdir/*.project $outputdir/ rsync -a --exclude='*.project/history' $crossdir/*.project $outputdir
fi fi
# loop for all projects # loop for all projects
for projectid in "${projects[@]}" ; do for projectid in "${projects[@]}" ; do
echo "begin project $projectid @ $(date)" echo "begin project $projectid @ $(date)"
# show server logs
sudo docker attach ${uuid} &
if [ -n "$jsonfiles" ]; then
# apply transformation rules # apply transformation rules
for jsonfile in "${jsonfiles[@]}" ; do for jsonfile in "${jsonfiles[@]}" ; do
echo "transform ${jsonfile}..." echo "transform ${jsonfile}..."
# show server logs
sudo docker attach ${uuid} &
# apply # apply
sudo docker run --rm --link ${uuid} -v ${configdir}:/data felixlohmeier/openrefine-client -H ${uuid} -f ${jsonfile} ${projectid} sudo docker run --rm --link ${uuid} -v ${configdir}:/data felixlohmeier/openrefine-client -H ${uuid} -f ${jsonfile} ${projectid}
# statistics # statistics
ps -o start,etime,%mem,%cpu,rss -C java ps -o start,etime,%mem,%cpu,rss -C java --sort=start
if [ "$restart" = "restart-true" ]; then if [ "$restart" = "restart-true" ]; then
# restart server to clear memory # restart server to clear memory
echo "save project and restart OpenRefine server..." echo "save project and restart OpenRefine server..."
@ -141,16 +141,16 @@ if [ -n "$jsonfiles" ]; then
sudo docker rm ${uuid} sudo docker rm ${uuid}
sudo docker run -d --name=${uuid} -v ${outputdir}:/data felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data sudo docker run -d --name=${uuid} -v ${outputdir}:/data felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
until sudo docker run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done until sudo docker run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
sudo docker attach ${uuid} &
fi fi
done done
fi
# export files # export files
echo "export to file ${projectid}.tsv..." echo "export to file ${projectid}.tsv..."
# show server logs
sudo docker attach ${uuid} &
# export # export
sudo docker run --rm --link ${uuid} -v ${outputdir}:/data felixlohmeier/openrefine-client -H ${uuid} -E --output=${projectid}.tsv ${projectid} sudo docker run --rm --link ${uuid} -v ${outputdir}:/data felixlohmeier/openrefine-client -H ${uuid} -E --output=${projectid}.tsv ${projectid}
# statistics # statistics
ps -o start,etime,%mem,%cpu,rss -C java ps -o start,etime,%mem,%cpu,rss -C java --sort=start
# restart server to clear memory # restart server to clear memory
echo "restart OpenRefine server..." echo "restart OpenRefine server..."
sudo docker stop -t=5000 ${uuid} sudo docker stop -t=5000 ${uuid}
@ -161,17 +161,17 @@ if [ -n "$jsonfiles" ]; then
echo "finished project $projectid @ $(date)" echo "finished project $projectid @ $(date)"
echo "" echo ""
done done
# list output files # list output files
echo "output (number of lines / size in bytes):" echo "output (number of lines / size in bytes):"
wc -c -l ${outputdir}/*.tsv wc -c -l ${outputdir}/*.tsv
echo "" echo ""
fi
# cleanup # cleanup
echo "cleanup..." echo "cleanup..."
sudo docker stop -t=5000 ${uuid} sudo docker stop -t=5000 ${uuid}
sudo docker rm ${uuid} sudo docker rm ${uuid}
sudo rm -r -f ${outputdir}/workspace*.json rm -r -f ${outputdir}/workspace*.json
echo "" echo ""
# time # time