diff --git a/README.md b/README.md index 1504ad6..0cb4cfe 100644 --- a/README.md +++ b/README.md @@ -42,13 +42,13 @@ Windows: clone or [download GitHub repository](https://github.com/felixlohmeier/openrefine-batch/archive/master.zip) to get example data ``` -./openrefine-batch.sh examples/powerhouse-museum/input/ examples/powerhouse-museum/config/ examples/powerhouse-museum/output/ examples/powerhouse-museum/cross/ 4G tsv --processQuotes=false --guessCellValueTypes=true +./openrefine-batch.sh examples/powerhouse-museum/input/ examples/powerhouse-museum/config/ examples/powerhouse-museum/output/ examples/powerhouse-museum/cross/ 4G 2.7rc1 tsv --processQuotes=false --guessCellValueTypes=true ``` #### Options ``` -./openrefine-batch.sh $inputdir $configdir $outputdir $crossdir $ram $inputformat $inputoptions +./openrefine-batch.sh $inputdir $configdir $outputdir $crossdir $ram $version $inputformat $inputoptions ``` 1. inputdir: path to directory with source files (multiple files may be imported into a single project [by providing a zip or tar.gz archive](https://github.com/OpenRefine/OpenRefine/wiki/Importers)) @@ -56,8 +56,9 @@ clone or [download GitHub repository](https://github.com/felixlohmeier/openrefin 3. outputdir: path to directory for exported files (and OpenRefine workspace) 4. crossdir: path to directory with additional OpenRefine projects (will be copied to workspace before transformation step to support the [cross function](https://github.com/OpenRefine/OpenRefine/wiki/GREL-Other-Functions#crosscell-c-string-projectname-string-columnname)) 5. ram: maximum RAM for OpenRefine java heap space (default: 4G) -6. inputformat: csv, tsv, xml, json, line-based, fixed-width, xlsx or ods -7. inputoptions: several options provided by [openrefine-client](https://hub.docker.com/r/felixlohmeier/openrefine-client/) +6. version: OpenRefine version (2.7rc1, 2.6rc2, 2.6rc1, dev) +7. inputformat: csv, tsv, xml, json, line-based, fixed-width, xlsx or ods +8. inputoptions: several options provided by [openrefine-client](https://hub.docker.com/r/felixlohmeier/openrefine-client/) inputoptions (mandatory for xml, json, fixed-width, xslx, ods): * `--recordPath=RECORDPATH` (xml, json): please provide path in multiple arguments without slashes, e.g. /collection/record/ should be entered like this: `--recordPath=collection --recordPath=record` @@ -84,7 +85,7 @@ more inputoptions (optional, only together with inputformat): The script uses `docker attach` to print log messages from OpenRefine server and `ps` to show statistics for each step. Here is a sample log: ``` -[03:27 felix ~/openrefine-batch (master *)]$ ./openrefine-batch.sh examples/powerhouse-museum/input/ examples/powerhouse-museum/config/ examples/powerhouse-museum/output/ examples/powerhouse-museum/cross/ 4G tsv --processQuotes=false --guessCellValueTypes=true +[03:27 felix ~/openrefine-batch (master *)]$ ./openrefine-batch.sh examples/powerhouse-museum/input/ examples/powerhouse-museum/config/ examples/powerhouse-museum/output/ examples/powerhouse-museum/cross/ 4G 2.7rc1 tsv --processQuotes=false --guessCellValueTypes=true Input directory: /home/felix/openrefine-batch/examples/powerhouse-museum/input Input files: phm-collection.tsv Input format: --format=tsv diff --git a/openrefine-batch.sh b/openrefine-batch.sh index 47e270d..7462e13 100755 --- a/openrefine-batch.sh +++ b/openrefine-batch.sh @@ -1,5 +1,5 @@ #!/bin/bash -# openrefine-batch.sh, Felix Lohmeier, v0.2, 27.02.2017 +# openrefine-batch.sh, Felix Lohmeier, v0.3, 27.02.2017 # https://github.com/felixlohmeier/openrefine-batch # user input @@ -9,7 +9,7 @@ if [ -z "$1" ] exit 2 else inputdir=$(readlink -f $1) - inputfiles=($(find ${inputdir}/* -type f -printf "%f\n")) + inputfiles=($(find -L ${inputdir}/* -type f -printf "%f\n")) fi if [ -z "$2" ] then @@ -17,14 +17,14 @@ if [ -z "$2" ] exit 2 else configdir=$(readlink -f $2) - jsonfiles=($(find ${configdir}/* -type f -printf "%f\n")) + jsonfiles=($(find -L ${configdir}/* -type f -printf "%f\n")) fi if [ -z "$3" ] then echo 1>&2 "please provide path to output directory" exit 2 else - outputdir=$(readlink -f $3) + outputdir=$(readlink -m $3) mkdir -p ${outputdir} fi if [ -z "$4" ] @@ -33,7 +33,7 @@ if [ -z "$4" ] exit 2 else crossdir=$(readlink -f $4) - crossprojects=($(find ${crossdir}/* -maxdepth 0 -type d -printf "%f\n")) + crossprojects=($(find -L ${crossdir}/* -maxdepth 0 -type d -printf "%f\n")) fi if [ -z "$5" ] then @@ -43,19 +43,24 @@ if [ -z "$5" ] fi if [ -z "$6" ] then - inputformat="" + version="2.7rc1" else - inputformat="--format=${6}" + version="$6" fi if [ -z "$7" ] + then + inputformat="" + else + inputformat="--format=${7}" +fi +if [ -z "$8" ] then inputoptions="" else - inputoptions=( "$7" "$8" "$9" "${10}" "${11}" "${12}" "${13}" "${14}" "${15}" "${16}" "${17}" "${18}" "${19}" "${20}" ) + inputoptions=( "$8" "$9" "${10}" "${11}" "${12}" "${13}" "${14}" "${15}" "${16}" "${17}" "${18}" "${19}" "${20}" ) fi # variables -version="2.7rc1" uuid=$(cat /proc/sys/kernel/random/uuid) echo "Input directory: $inputdir" echo "Input files: ${inputfiles[@]}"