OpenRefine 3.4.1, openrefine-client 0.3.10
This commit is contained in:
parent
80fb37cb65
commit
4199fadc04
91
README.md
91
README.md
|
@ -158,12 +158,12 @@ The script prints log messages from OpenRefine server and makes use of `ps` to s
|
||||||
```
|
```
|
||||||
[felix@tux openrefine-batch]$ ./openrefine-batch.sh -a examples/powerhouse-museum/input/ -b examples/powerhouse-museum/config/ -c examples/powerhouse-museum/output/ -f tsv -i processQuotes=false -i guessCellValueTypes=true -RX
|
[felix@tux openrefine-batch]$ ./openrefine-batch.sh -a examples/powerhouse-museum/input/ -b examples/powerhouse-museum/config/ -c examples/powerhouse-museum/output/ -f tsv -i processQuotes=false -i guessCellValueTypes=true -RX
|
||||||
Download OpenRefine...
|
Download OpenRefine...
|
||||||
openrefine-linux-3.2.tar.gz 100%[===============================================>] 101,13M 9,46MB/s in 19s
|
openrefine-linux-3.4.1.tar.gz 100%[============================================================================================>] 114,70M 8,49MB/s in 20s
|
||||||
Install OpenRefine in subdirectory openrefine...
|
Install OpenRefine in subdirectory openrefine...
|
||||||
Total bytes read: 125419520 (120MiB, 74MiB/s)
|
Total bytes read: 139970560 (134MiB, 86MiB/s)
|
||||||
|
|
||||||
Download OpenRefine client...
|
Download OpenRefine client...
|
||||||
openrefine-client_0-3-9_linux 100%[===============================================>] 4,25M 2,61MB/s in 1,6s
|
openrefine-client_0-3-10_linux 100%[============================================================================================>] 4,25M 2,61MB/s in 1,6s
|
||||||
|
|
||||||
Input directory: /home/felix/git/openrefine-batch/examples/powerhouse-museum/input
|
Input directory: /home/felix/git/openrefine-batch/examples/powerhouse-museum/input
|
||||||
Input files: phm-collection.tsv
|
Input files: phm-collection.tsv
|
||||||
|
@ -184,94 +184,101 @@ restart after transform: false
|
||||||
|
|
||||||
=== 1. Launch OpenRefine ===
|
=== 1. Launch OpenRefine ===
|
||||||
|
|
||||||
starting time: Sa 8. Aug 13:32:45 CEST 2020
|
starting time: Mo 4. Jan 16:56:28 CET 2021
|
||||||
|
|
||||||
|
Using refine.ini for configuration
|
||||||
|
openrefine/refine: line 810: [: 2048M: integer expression expected
|
||||||
You have 15927M of free memory.
|
You have 15927M of free memory.
|
||||||
Your current configuration is set to use 2048M of memory.
|
Your current configuration is set to use 2048M of memory.
|
||||||
OpenRefine can run better when given more memory. Read our FAQ on how to allocate more memory here:
|
OpenRefine can run better when given more memory. Read our FAQ on how to allocate more memory here:
|
||||||
https://github.com/OpenRefine/OpenRefine/wiki/FAQ:-Allocate-More-Memory
|
https://github.com/OpenRefine/OpenRefine/wiki/FAQ:-Allocate-More-Memory
|
||||||
|
/usr/bin/java -cp server/classes:server/target/lib/* -Drefine.headless=true -Xms2048M -Xmx2048M -Drefine.memory=2048M -Drefine.max_form_content_size=1048576 -Drefine.verbosity=info -Dpython.path=main/webapp/WEB-INF/lib/jython -Dpython.cachedir=/home/felix/.local/share/google/refine/cachedir -Drefine.data_dir=/home/felix/git/openrefine-batch/examples/powerhouse-museum/output -Drefine.webapp=main/webapp -Drefine.port=3333 -Drefine.host=127.0.0.1 -Drefine.autosave=1440 com.google.refine.Refine
|
||||||
Starting OpenRefine at 'http://127.0.0.1:3333/'
|
Starting OpenRefine at 'http://127.0.0.1:3333/'
|
||||||
|
|
||||||
13:32:46.213 [ refine_server] Starting Server bound to '127.0.0.1:3333' (0ms)
|
16:56:28.993 [ refine_server] Starting Server bound to '127.0.0.1:3333' (0ms)
|
||||||
13:32:46.214 [ refine_server] refine.memory size: 2048M JVM Max heap: 2058354688 (1ms)
|
16:56:28.994 [ refine_server] refine.memory size: 2048M JVM Max heap: 2058354688 (1ms)
|
||||||
13:32:46.224 [ refine_server] Initializing context: '/' from '/home/felix/git/openrefine-batch/openrefine/webapp' (10ms)
|
16:56:29.004 [ refine_server] Initializing context: '/' from '/home/felix/git/openrefine-batch/openrefine/webapp' (10ms)
|
||||||
SLF4J: Class path contains multiple SLF4J bindings.
|
SLF4J: Class path contains multiple SLF4J bindings.
|
||||||
SLF4J: Found binding in [jar:file:/home/felix/git/openrefine-batch/openrefine/server/target/lib/slf4j-log4j12-1.7.18.jar!/org/slf4j/impl/StaticLoggerBinder.class]
|
SLF4J: Found binding in [jar:file:/home/felix/git/openrefine-batch/openrefine/server/target/lib/slf4j-log4j12-1.7.18.jar!/org/slf4j/impl/StaticLoggerBinder.class]
|
||||||
SLF4J: Found binding in [jar:file:/home/felix/git/openrefine-batch/openrefine/webapp/WEB-INF/lib/slf4j-log4j12-1.7.18.jar!/org/slf4j/impl/StaticLoggerBinder.class]
|
SLF4J: Found binding in [jar:file:/home/felix/git/openrefine-batch/openrefine/webapp/WEB-INF/lib/slf4j-log4j12-1.7.18.jar!/org/slf4j/impl/StaticLoggerBinder.class]
|
||||||
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
|
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
|
||||||
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
|
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
|
||||||
13:32:46.937 [ refine] Starting OpenRefine 3.2 [55c921b]... (713ms)
|
16:56:29.690 [ refine] Starting OpenRefine 3.4.1 [437dc4d]... (686ms)
|
||||||
13:32:46.937 [ refine] initializing FileProjectManager with dir (0ms)
|
16:56:29.690 [ refine] initializing FileProjectManager with dir (0ms)
|
||||||
13:32:46.937 [ refine] /home/felix/git/openrefine-batch/examples/powerhouse-museum/output (0ms)
|
16:56:29.690 [ refine] /home/felix/git/openrefine-batch/examples/powerhouse-museum/output (0ms)
|
||||||
13:32:46.947 [ FileProjectManager] Failed to load workspace from any attempted alternatives. (10ms)
|
16:56:29.696 [ FileProjectManager] Failed to load workspace from any attempted alternatives. (6ms)
|
||||||
13:32:52.249 [ refine] Running in headless mode (5302ms)
|
16:56:35.245 [ refine] Running in headless mode (5549ms)
|
||||||
|
|
||||||
=== 2. Import all files ===
|
=== 2. Import all files ===
|
||||||
|
|
||||||
starting time: Sa 8. Aug 13:32:53 CEST 2020
|
starting time: Mo 4. Jan 16:56:36 CET 2021
|
||||||
|
|
||||||
import phm-collection.tsv...
|
import phm-collection.tsv...
|
||||||
13:32:53.686 [ refine] POST /command/core/create-project-from-upload (1437ms)
|
16:56:36.199 [ refine] GET /command/core/get-csrf-token (954ms)
|
||||||
13:33:01.606 [ refine] GET /command/core/get-models (7920ms)
|
16:56:36.339 [ refine] POST /command/core/create-project-from-upload (140ms)
|
||||||
13:33:01.722 [ refine] POST /command/core/get-rows (116ms)
|
16:56:45.999 [ refine] GET /command/core/get-models (9660ms)
|
||||||
id: 1705197298924
|
16:56:46.105 [ refine] POST /command/core/get-rows (106ms)
|
||||||
|
id: 1841400347972
|
||||||
rows: 75814
|
rows: 75814
|
||||||
STARTED ELAPSED %MEM %CPU RSS
|
STARTED ELAPSED %MEM %CPU RSS
|
||||||
13:32:45 00:16 6.0 201 993192
|
16:56:27 00:18 6.1 194 997020
|
||||||
|
|
||||||
=== 3. Prepare transform & export ===
|
=== 3. Prepare transform & export ===
|
||||||
|
|
||||||
starting time: Sa 8. Aug 13:33:01 CEST 2020
|
starting time: Mo 4. Jan 16:56:46 CET 2021
|
||||||
|
|
||||||
get project ids...
|
get project ids...
|
||||||
13:33:02.003 [ refine] GET /command/core/get-all-project-metadata (281ms)
|
16:56:46.383 [ refine] GET /command/core/get-csrf-token (278ms)
|
||||||
1705197298924: phm-collection
|
16:56:46.387 [ refine] GET /command/core/get-all-project-metadata (4ms)
|
||||||
|
1841400347972: phm-collection
|
||||||
|
|
||||||
=== 4. Transform phm-collection ===
|
=== 4. Transform phm-collection ===
|
||||||
|
|
||||||
starting time: Sa 8. Aug 13:33:02 CEST 2020
|
starting time: Mo 4. Jan 16:56:46 CET 2021
|
||||||
|
|
||||||
transform phm-transform.json...
|
transform phm-transform.json...
|
||||||
13:33:02.187 [ refine] GET /command/core/get-models (184ms)
|
16:56:46.594 [ refine] GET /command/core/get-csrf-token (207ms)
|
||||||
13:33:02.193 [ refine] POST /command/core/apply-operations (6ms)
|
16:56:46.597 [ refine] GET /command/core/get-models (3ms)
|
||||||
File /home/felix/git/openrefine-batch/examples/powerhouse-museum/config/phm-transform.json has been successfully applied to project 1705197298924
|
16:56:46.607 [ refine] POST /command/core/apply-operations (10ms)
|
||||||
|
File /home/felix/git/openrefine-batch/examples/powerhouse-museum/config/phm-transform.json has been successfully applied to project 1841400347972
|
||||||
STARTED ELAPSED %MEM %CPU RSS
|
STARTED ELAPSED %MEM %CPU RSS
|
||||||
13:32:45 00:32 6.3 165 1037688
|
16:56:27 00:34 6.2 162 1026072
|
||||||
|
|
||||||
|
|
||||||
=== 5. Export phm-collection ===
|
=== 5. Export phm-collection ===
|
||||||
|
|
||||||
starting time: Sa 8. Aug 13:33:17 CEST 2020
|
starting time: Mo 4. Jan 16:57:02 CET 2021
|
||||||
|
|
||||||
export to file phm-collection.tsv...
|
export to file phm-collection.tsv...
|
||||||
13:33:18.001 [ refine] GET /command/core/get-models (15808ms)
|
16:57:02.322 [ refine] GET /command/core/get-csrf-token (15715ms)
|
||||||
13:33:18.005 [ refine] GET /command/core/get-all-project-metadata (4ms)
|
16:57:02.325 [ refine] GET /command/core/get-models (3ms)
|
||||||
13:33:18.007 [ refine] POST /command/core/export-rows/phm-collection.tsv (2ms)
|
16:57:02.328 [ refine] GET /command/core/get-all-project-metadata (3ms)
|
||||||
|
16:57:02.331 [ refine] POST /command/core/export-rows/phm-collection.tsv (3ms)
|
||||||
Export to file /home/felix/git/openrefine-batch/examples/powerhouse-museum/output/phm-collection.tsv complete
|
Export to file /home/felix/git/openrefine-batch/examples/powerhouse-museum/output/phm-collection.tsv complete
|
||||||
STARTED ELAPSED %MEM %CPU RSS
|
STARTED ELAPSED %MEM %CPU RSS
|
||||||
13:32:45 00:35 6.7 168 1098564
|
16:56:27 00:37 7.2 169 1181160
|
||||||
|
|
||||||
|
|
||||||
output (number of lines / size in bytes):
|
output (number of lines / size in bytes):
|
||||||
75728 59431272 /home/felix/git/openrefine-batch/examples/powerhouse-museum/output/phm-collection.tsv
|
75728 59431272 /home/felix/git/openrefine-batch/examples/powerhouse-museum/output/phm-collection.tsv
|
||||||
|
|
||||||
cleanup...
|
cleanup...
|
||||||
13:33:24.667 [ ProjectManager] Saving all modified projects ... (6660ms)
|
16:57:08.684 [ ProjectManager] Saving all modified projects ... (6353ms)
|
||||||
13:33:28.044 [ project_utilities] Saved project '1705197298924' (3377ms)
|
16:57:12.069 [ project_utilities] Saved project '1841400347972' (3385ms)
|
||||||
|
|
||||||
=== Statistics ===
|
=== Statistics ===
|
||||||
|
|
||||||
starting time and run time of each step:
|
starting time and run time of each step:
|
||||||
Start process Sa 8. Aug 13:32:45 CEST 2020 (00:00:00)
|
Start process Mo 4. Jan 16:56:28 CET 2021 (00:00:00)
|
||||||
Launch OpenRefine Sa 8. Aug 13:32:45 CEST 2020 (00:00:08)
|
Launch OpenRefine Mo 4. Jan 16:56:28 CET 2021 (00:00:08)
|
||||||
Import all files Sa 8. Aug 13:32:53 CEST 2020 (00:00:08)
|
Import all files Mo 4. Jan 16:56:36 CET 2021 (00:00:10)
|
||||||
Prepare transform & export Sa 8. Aug 13:33:01 CEST 2020 (00:00:01)
|
Prepare transform & export Mo 4. Jan 16:56:46 CET 2021 (00:00:00)
|
||||||
Transform phm-collection Sa 8. Aug 13:33:02 CEST 2020 (00:00:15)
|
Transform phm-collection Mo 4. Jan 16:56:46 CET 2021 (00:00:16)
|
||||||
Export phm-collection Sa 8. Aug 13:33:17 CEST 2020 (00:00:12)
|
Export phm-collection Mo 4. Jan 16:57:02 CET 2021 (00:00:11)
|
||||||
End process Sa 8. Aug 13:33:29 CEST 2020 (00:00:00)
|
End process Mo 4. Jan 16:57:13 CET 2021 (00:00:00)
|
||||||
|
|
||||||
total run time: 00:00:44 (hh:mm:ss)
|
total run time: 00:00:45 (hh:mm:ss)
|
||||||
highest memory load: 1072 MB
|
highest memory load: 1153 MB
|
||||||
```
|
```
|
||||||
|
|
||||||
### Performance gain with extended cross function
|
### Performance gain with extended cross function
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# openrefine-batch-docker.sh, Felix Lohmeier, v1.14, 2020-08-08
|
# openrefine-batch-docker.sh, Felix Lohmeier, v1.15, 2021-01-04
|
||||||
# https://github.com/felixlohmeier/openrefine-batch
|
# https://github.com/felixlohmeier/openrefine-batch
|
||||||
|
|
||||||
# check system requirements
|
# check system requirements
|
||||||
|
@ -40,7 +40,7 @@ Usage: ./openrefine-batch-docker.sh [-a INPUTDIR] [-b TRANSFORMDIR] [-c OUTPUTDI
|
||||||
-i INPUTOPTIONS several options provided by openrefine-client, see below...
|
-i INPUTOPTIONS several options provided by openrefine-client, see below...
|
||||||
-m RAM maximum RAM for OpenRefine java heap space (default: 2048M)
|
-m RAM maximum RAM for OpenRefine java heap space (default: 2048M)
|
||||||
-t TEMPLATING several options for templating export, see below...
|
-t TEMPLATING several options for templating export, see below...
|
||||||
-v VERSION OpenRefine version (3.2, 3.1, 3.0, 2.8, 2.7, ...; default: 3.2)
|
-v VERSION OpenRefine version (3.4.1, 3.4, 3.3, 3.2, 3.1, 3.0, 2.8, 2.7, ...; default: 3.4.1)
|
||||||
-E do NOT export files
|
-E do NOT export files
|
||||||
-R do NOT restart OpenRefine after each transformation (e.g. config file)
|
-R do NOT restart OpenRefine after each transformation (e.g. config file)
|
||||||
-X do NOT restart OpenRefine after each project (e.g. input file)
|
-X do NOT restart OpenRefine after each project (e.g. input file)
|
||||||
|
@ -108,7 +108,7 @@ EOF
|
||||||
|
|
||||||
# defaults
|
# defaults
|
||||||
ram="2048M"
|
ram="2048M"
|
||||||
version="3.2"
|
version="3.4.1"
|
||||||
restartfile="true"
|
restartfile="true"
|
||||||
restarttransform="true"
|
restarttransform="true"
|
||||||
export="true"
|
export="true"
|
||||||
|
@ -229,7 +229,7 @@ echo "starting time: $(date --date=@${checkpointdate[$((checkpoints + 1))]})"
|
||||||
echo ""
|
echo ""
|
||||||
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
||||||
# wait until server is available
|
# wait until server is available
|
||||||
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.9 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.10 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
||||||
# show server logs
|
# show server logs
|
||||||
${docker[*]} attach ${uuid} &
|
${docker[*]} attach ${uuid} &
|
||||||
echo ""
|
echo ""
|
||||||
|
@ -246,7 +246,7 @@ if [ -n "$inputfiles" ]; then
|
||||||
for inputfile in "${inputfiles[@]}" ; do
|
for inputfile in "${inputfiles[@]}" ; do
|
||||||
echo "import ${inputfile}..."
|
echo "import ${inputfile}..."
|
||||||
# run client with input command
|
# run client with input command
|
||||||
${docker[*]} run --rm --link ${uuid} -v ${inputdir}:/data:z felixlohmeier/openrefine-client:v0.3.9 -H ${uuid} -c $inputfile $inputformat ${inputoptions[@]}
|
${docker[*]} run --rm --link ${uuid} -v ${inputdir}:/data:z felixlohmeier/openrefine-client:v0.3.10 -H ${uuid} -c $inputfile $inputformat ${inputoptions[@]}
|
||||||
# show allocated system resources
|
# show allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -C java --sort=start
|
ps -o start,etime,%mem,%cpu,rss -C java --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -C java))
|
memoryload+=($(ps --no-headers -o rss -C java))
|
||||||
|
@ -257,7 +257,7 @@ if [ -n "$inputfiles" ]; then
|
||||||
${docker[*]} stop -t=5000 ${uuid}
|
${docker[*]} stop -t=5000 ${uuid}
|
||||||
${docker[*]} rm ${uuid}
|
${docker[*]} rm ${uuid}
|
||||||
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
||||||
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.9 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.10 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
||||||
${docker[*]} attach ${uuid} &
|
${docker[*]} attach ${uuid} &
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
@ -276,7 +276,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
|
|
||||||
# get project ids
|
# get project ids
|
||||||
echo "get project ids..."
|
echo "get project ids..."
|
||||||
${docker[*]} run --rm --link ${uuid} felixlohmeier/openrefine-client:v0.3.9 -H ${uuid} -l > "${outputdir}/projects.tmp"
|
${docker[*]} run --rm --link ${uuid} felixlohmeier/openrefine-client:v0.3.10 -H ${uuid} -l > "${outputdir}/projects.tmp"
|
||||||
projectids=($(cut -c 2-14 "${outputdir}/projects.tmp"))
|
projectids=($(cut -c 2-14 "${outputdir}/projects.tmp"))
|
||||||
projectnames=($(cut -c 17- "${outputdir}/projects.tmp"))
|
projectnames=($(cut -c 17- "${outputdir}/projects.tmp"))
|
||||||
cat "${outputdir}/projects.tmp" && rm "${outputdir:?}/projects.tmp"
|
cat "${outputdir}/projects.tmp" && rm "${outputdir:?}/projects.tmp"
|
||||||
|
@ -292,7 +292,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
${docker[*]} stop -t=5000 ${uuid}
|
${docker[*]} stop -t=5000 ${uuid}
|
||||||
${docker[*]} rm ${uuid}
|
${docker[*]} rm ${uuid}
|
||||||
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
||||||
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.9 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.10 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
||||||
${docker[*]} attach ${uuid} &
|
${docker[*]} attach ${uuid} &
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
@ -312,7 +312,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
for jsonfile in "${jsonfiles[@]}" ; do
|
for jsonfile in "${jsonfiles[@]}" ; do
|
||||||
echo "transform ${jsonfile}..."
|
echo "transform ${jsonfile}..."
|
||||||
# run client with apply command
|
# run client with apply command
|
||||||
${docker[*]} run --rm --link ${uuid} -v ${configdir}:/data:z felixlohmeier/openrefine-client:v0.3.9 -H ${uuid} -f ${jsonfile} ${projectids[i]}
|
${docker[*]} run --rm --link ${uuid} -v ${configdir}:/data:z felixlohmeier/openrefine-client:v0.3.10 -H ${uuid} -f ${jsonfile} ${projectids[i]}
|
||||||
# allocated system resources
|
# allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -C java --sort=start
|
ps -o start,etime,%mem,%cpu,rss -C java --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -C java))
|
memoryload+=($(ps --no-headers -o rss -C java))
|
||||||
|
@ -323,7 +323,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
${docker[*]} stop -t=5000 ${uuid}
|
${docker[*]} stop -t=5000 ${uuid}
|
||||||
${docker[*]} rm ${uuid}
|
${docker[*]} rm ${uuid}
|
||||||
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
||||||
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.9 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.10 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
||||||
${docker[*]} attach ${uuid} &
|
${docker[*]} attach ${uuid} &
|
||||||
fi
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
|
@ -343,7 +343,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
filename=${projectnames[i]%.*}
|
filename=${projectnames[i]%.*}
|
||||||
echo "export to file ${filename}.${exportformat}..."
|
echo "export to file ${filename}.${exportformat}..."
|
||||||
# run client with export command
|
# run client with export command
|
||||||
${docker[*]} run --rm --link ${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine-client:v0.3.9 -H ${uuid} -E --output="${filename}.${exportformat}" "${templating[@]}" ${projectids[i]}
|
${docker[*]} run --rm --link ${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine-client:v0.3.10 -H ${uuid} -E --output="${filename}.${exportformat}" "${templating[@]}" ${projectids[i]}
|
||||||
# show allocated system resources
|
# show allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -C java --sort=start
|
ps -o start,etime,%mem,%cpu,rss -C java --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -C java))
|
memoryload+=($(ps --no-headers -o rss -C java))
|
||||||
|
@ -356,7 +356,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
${docker[*]} stop -t=5000 ${uuid}
|
${docker[*]} stop -t=5000 ${uuid}
|
||||||
${docker[*]} rm ${uuid}
|
${docker[*]} rm ${uuid}
|
||||||
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
${docker[*]} run -d --name=${uuid} -v ${outputdir}:/data:z felixlohmeier/openrefine:${version} -i 0.0.0.0 -m ${ram} -d /data
|
||||||
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.9 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
until ${docker[*]} run --rm --link ${uuid} --entrypoint /usr/bin/curl felixlohmeier/openrefine-client:v0.3.10 --silent -N http://${uuid}:3333 | cat | grep -q -o "OpenRefine" ; do sleep 1; done
|
||||||
${docker[*]} attach ${uuid} &
|
${docker[*]} attach ${uuid} &
|
||||||
fi
|
fi
|
||||||
echo ""
|
echo ""
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# openrefine-batch.sh, Felix Lohmeier, v1.14, 2020-08-08
|
# openrefine-batch.sh, Felix Lohmeier, v1.15, 2021-01-04
|
||||||
# https://github.com/felixlohmeier/openrefine-batch
|
# https://github.com/felixlohmeier/openrefine-batch
|
||||||
|
|
||||||
# declare download URLs for OpenRefine and OpenRefine client
|
# declare download URLs for OpenRefine and OpenRefine client
|
||||||
openrefine_URL="https://github.com/OpenRefine/OpenRefine/releases/download/3.2/openrefine-linux-3.2.tar.gz"
|
openrefine_URL="https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz"
|
||||||
client_URL="https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.9/openrefine-client_0-3-9_linux"
|
client_URL="https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux"
|
||||||
|
|
||||||
# check system requirements
|
# check system requirements
|
||||||
JAVA="$(which java 2> /dev/null)"
|
JAVA="$(which java 2> /dev/null)"
|
||||||
|
@ -34,7 +34,7 @@ if [ ! -d "openrefine-client" ]; then
|
||||||
echo "Download OpenRefine client..."
|
echo "Download OpenRefine client..."
|
||||||
mkdir -p openrefine-client
|
mkdir -p openrefine-client
|
||||||
wget -q -P openrefine-client $wget_opt $client_URL
|
wget -q -P openrefine-client $wget_opt $client_URL
|
||||||
chmod +x openrefine-client/openrefine-client_0-3-9_linux
|
chmod +x openrefine-client/openrefine-client_0-3-10_linux
|
||||||
echo ""
|
echo ""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -259,7 +259,7 @@ if [ -n "$inputfiles" ]; then
|
||||||
for inputfile in "${inputfiles[@]}" ; do
|
for inputfile in "${inputfiles[@]}" ; do
|
||||||
echo "import ${inputfile}..."
|
echo "import ${inputfile}..."
|
||||||
# run client with input command
|
# run client with input command
|
||||||
openrefine-client/openrefine-client_0-3-9_linux -P ${port} -c ${inputdir}/${inputfile} $inputformat "${inputoptions[@]}"
|
openrefine-client/openrefine-client_0-3-10_linux -P ${port} -c ${inputdir}/${inputfile} $inputformat "${inputoptions[@]}"
|
||||||
# show allocated system resources
|
# show allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
||||||
|
@ -290,7 +290,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
|
|
||||||
# get project ids
|
# get project ids
|
||||||
echo "get project ids..."
|
echo "get project ids..."
|
||||||
openrefine-client/openrefine-client_0-3-9_linux -P ${port} -l > "${outputdir}/projects.tmp"
|
openrefine-client/openrefine-client_0-3-10_linux -P ${port} -l > "${outputdir}/projects.tmp"
|
||||||
projectids=($(cut -c 2-14 "${outputdir}/projects.tmp"))
|
projectids=($(cut -c 2-14 "${outputdir}/projects.tmp"))
|
||||||
projectnames=($(cut -c 17- "${outputdir}/projects.tmp"))
|
projectnames=($(cut -c 17- "${outputdir}/projects.tmp"))
|
||||||
cat "${outputdir}/projects.tmp" && rm "${outputdir:?}/projects.tmp"
|
cat "${outputdir}/projects.tmp" && rm "${outputdir:?}/projects.tmp"
|
||||||
|
@ -327,7 +327,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
for jsonfile in "${jsonfiles[@]}" ; do
|
for jsonfile in "${jsonfiles[@]}" ; do
|
||||||
echo "transform ${jsonfile}..."
|
echo "transform ${jsonfile}..."
|
||||||
# run client with apply command
|
# run client with apply command
|
||||||
openrefine-client/openrefine-client_0-3-9_linux -P ${port} -f ${configdir}/${jsonfile} ${projectids[i]}
|
openrefine-client/openrefine-client_0-3-10_linux -P ${port} -f ${configdir}/${jsonfile} ${projectids[i]}
|
||||||
# allocated system resources
|
# allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
||||||
|
@ -359,7 +359,7 @@ if [ -n "$jsonfiles" ] || [ "$export" = "true" ]; then
|
||||||
filename=${projectnames[i]%.*}
|
filename=${projectnames[i]%.*}
|
||||||
echo "export to file ${filename}.${exportformat}..."
|
echo "export to file ${filename}.${exportformat}..."
|
||||||
# run client with export command
|
# run client with export command
|
||||||
openrefine-client/openrefine-client_0-3-9_linux -P ${port} -E --output="${outputdir}/${filename}.${exportformat}" "${templating[@]}" ${projectids[i]}
|
openrefine-client/openrefine-client_0-3-10_linux -P ${port} -E --output="${outputdir}/${filename}.${exportformat}" "${templating[@]}" ${projectids[i]}
|
||||||
# show allocated system resources
|
# show allocated system resources
|
||||||
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
ps -o start,etime,%mem,%cpu,rss -p ${pid} --sort=start
|
||||||
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
memoryload+=($(ps --no-headers -o rss -p ${pid}))
|
||||||
|
|
Loading…
Reference in New Issue