release v0.4
This commit is contained in:
parent
7d11206f42
commit
b80fe929bc
10
README.md
10
README.md
|
@ -48,7 +48,7 @@ clone or [download GitHub repository](https://github.com/felixlohmeier/openrefin
|
||||||
#### Options
|
#### Options
|
||||||
|
|
||||||
```
|
```
|
||||||
./openrefine-batch.sh $inputdir $configdir $outputdir $crossdir $ram $version $inputformat $inputoptions
|
./openrefine-batch.sh $inputdir $configdir $outputdir $crossdir $ram $version $restart $inputformat $inputoptions
|
||||||
```
|
```
|
||||||
|
|
||||||
1. inputdir: path to directory with source files (multiple files may be imported into a single project [by providing a zip or tar.gz archive](https://github.com/OpenRefine/OpenRefine/wiki/Importers))
|
1. inputdir: path to directory with source files (multiple files may be imported into a single project [by providing a zip or tar.gz archive](https://github.com/OpenRefine/OpenRefine/wiki/Importers))
|
||||||
|
@ -57,8 +57,9 @@ clone or [download GitHub repository](https://github.com/felixlohmeier/openrefin
|
||||||
4. crossdir: path to directory with additional OpenRefine projects (will be copied to workspace before transformation step to support the [cross function](https://github.com/OpenRefine/OpenRefine/wiki/GREL-Other-Functions#crosscell-c-string-projectname-string-columnname))
|
4. crossdir: path to directory with additional OpenRefine projects (will be copied to workspace before transformation step to support the [cross function](https://github.com/OpenRefine/OpenRefine/wiki/GREL-Other-Functions#crosscell-c-string-projectname-string-columnname))
|
||||||
5. ram: maximum RAM for OpenRefine java heap space (default: 4G)
|
5. ram: maximum RAM for OpenRefine java heap space (default: 4G)
|
||||||
6. version: OpenRefine version (2.7rc1, 2.6rc2, 2.6rc1, dev)
|
6. version: OpenRefine version (2.7rc1, 2.6rc2, 2.6rc1, dev)
|
||||||
7. inputformat: csv, tsv, xml, json, line-based, fixed-width, xlsx or ods
|
7. restart: restart docker container after each transformation to clear memory (restart-true/restart-false)
|
||||||
8. inputoptions: several options provided by [openrefine-client](https://hub.docker.com/r/felixlohmeier/openrefine-client/)
|
8. inputformat: (csv, tsv, xml, json, line-based, fixed-width, xlsx, ods)
|
||||||
|
9. inputoptions: several options provided by [openrefine-client](https://hub.docker.com/r/felixlohmeier/openrefine-client/)
|
||||||
|
|
||||||
inputoptions (mandatory for xml, json, fixed-width, xslx, ods):
|
inputoptions (mandatory for xml, json, fixed-width, xslx, ods):
|
||||||
* `--recordPath=RECORDPATH` (xml, json): please provide path in multiple arguments without slashes, e.g. /collection/record/ should be entered like this: `--recordPath=collection --recordPath=record`
|
* `--recordPath=RECORDPATH` (xml, json): please provide path in multiple arguments without slashes, e.g. /collection/record/ should be entered like this: `--recordPath=collection --recordPath=record`
|
||||||
|
@ -85,7 +86,7 @@ more inputoptions (optional, only together with inputformat):
|
||||||
The script uses `docker attach` to print log messages from OpenRefine server and `ps` to show statistics for each step. Here is a sample log:
|
The script uses `docker attach` to print log messages from OpenRefine server and `ps` to show statistics for each step. Here is a sample log:
|
||||||
|
|
||||||
```
|
```
|
||||||
[03:27 felix ~/openrefine-batch (master *)]$ ./openrefine-batch.sh examples/powerhouse-museum/input/ examples/powerhouse-museum/config/ examples/powerhouse-museum/output/ examples/powerhouse-museum/cross/ 4G 2.7rc1 tsv --processQuotes=false --guessCellValueTypes=true
|
[03:27 felix ~/openrefine-batch (master *)]$ ./openrefine-batch.sh examples/powerhouse-museum/input/ examples/powerhouse-museum/config/ examples/powerhouse-museum/output/ examples/powerhouse-museum/cross/ 4G 2.7rc1 restart-true tsv --processQuotes=false --guessCellValueTypes=true
|
||||||
Input directory: /home/felix/openrefine-batch/examples/powerhouse-museum/input
|
Input directory: /home/felix/openrefine-batch/examples/powerhouse-museum/input
|
||||||
Input files: phm-collection.tsv
|
Input files: phm-collection.tsv
|
||||||
Input format: --format=tsv
|
Input format: --format=tsv
|
||||||
|
@ -96,6 +97,7 @@ Cross directory: /home/felix/openrefine-batch/examples/powerhouse-museum/
|
||||||
Cross projects:
|
Cross projects:
|
||||||
OpenRefine heap space: 4G
|
OpenRefine heap space: 4G
|
||||||
OpenRefine version: 2.7rc1
|
OpenRefine version: 2.7rc1
|
||||||
|
Docker restart: restart-true
|
||||||
Docker container: 6b7eb36f-fc72-4040-b135-acee36948c13
|
Docker container: 6b7eb36f-fc72-4040-b135-acee36948c13
|
||||||
Output directory: /home/felix/openrefine-batch/examples/powerhouse-museum/output
|
Output directory: /home/felix/openrefine-batch/examples/powerhouse-museum/output
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# openrefine-batch.sh, Felix Lohmeier, v0.3, 27.02.2017
|
# openrefine-batch.sh, Felix Lohmeier, v0.4, 27.02.2017
|
||||||
# https://github.com/felixlohmeier/openrefine-batch
|
# https://github.com/felixlohmeier/openrefine-batch
|
||||||
|
|
||||||
# user input
|
# user input
|
||||||
|
@ -49,15 +49,21 @@ if [ -z "$6" ]
|
||||||
fi
|
fi
|
||||||
if [ -z "$7" ]
|
if [ -z "$7" ]
|
||||||
then
|
then
|
||||||
inputformat=""
|
restart="restart-true"
|
||||||
else
|
else
|
||||||
inputformat="--format=${7}"
|
restart="$7"
|
||||||
fi
|
fi
|
||||||
if [ -z "$8" ]
|
if [ -z "$8" ]
|
||||||
|
then
|
||||||
|
inputformat=""
|
||||||
|
else
|
||||||
|
inputformat="--format=${8}"
|
||||||
|
fi
|
||||||
|
if [ -z "$9" ]
|
||||||
then
|
then
|
||||||
inputoptions=""
|
inputoptions=""
|
||||||
else
|
else
|
||||||
inputoptions=( "$8" "$9" "${10}" "${11}" "${12}" "${13}" "${14}" "${15}" "${16}" "${17}" "${18}" "${19}" "${20}" )
|
inputoptions=( "$9" "${10}" "${11}" "${12}" "${13}" "${14}" "${15}" "${16}" "${17}" "${18}" "${19}" "${20}" )
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# variables
|
# variables
|
||||||
|
@ -73,6 +79,7 @@ echo "Cross projects: ${crossprojects[@]}"
|
||||||
echo "OpenRefine heap space: $ram"
|
echo "OpenRefine heap space: $ram"
|
||||||
echo "OpenRefine version: $version"
|
echo "OpenRefine version: $version"
|
||||||
echo "Docker container: $uuid"
|
echo "Docker container: $uuid"
|
||||||
|
echo "Docker restart: $restart"
|
||||||
echo "Output directory: $outputdir"
|
echo "Output directory: $outputdir"
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
|
@ -127,12 +134,14 @@ if [ -n "$jsonfiles" ]; then
|
||||||
sudo docker run --rm --link ${uuid} -v ${configdir}:/data felixlohmeier/openrefine-client -H ${uuid} -f ${jsonfile} ${projectid}
|
sudo docker run --rm --link ${uuid} -v ${configdir}:/data felixlohmeier/openrefine-client -H ${uuid} -f ${jsonfile} ${projectid}
|
||||||
# statistics
|
# statistics
|
||||||
ps -o start,etime,%mem,%cpu,rss -C java
|
ps -o start,etime,%mem,%cpu,rss -C java
|
||||||
# restart server to clear memory
|
if [ "$restart" = "restart-true" ]; then
|
||||||
echo "save project and restart OpenRefine server..."
|
# restart server to clear memory
|
||||||
sudo docker stop -t=5000 ${uuid}
|
echo "save project and restart OpenRefine server..."
|
||||||
sudo docker rm ${uuid}
|
sudo docker stop -t=5000 ${uuid}
|
||||||
sudo docker run -d --name=${uuid} -v ${outputdir}:/data felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
sudo docker rm ${uuid}
|
||||||
until sudo docker run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
sudo docker run -d --name=${uuid} -v ${outputdir}:/data felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
||||||
|
until sudo docker run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
# export files
|
# export files
|
||||||
echo "export to file ${projectid}.tsv..."
|
echo "export to file ${projectid}.tsv..."
|
||||||
|
|
Loading…
Reference in New Issue