2020-08-01 02:04:39 +02:00
|
|
|
#!/bin/bash
|
2020-07-13 12:42:14 +02:00
|
|
|
# Bibliotheca Hauptverarbeitung
|
|
|
|
# - Datenbereinigungen
|
2020-07-22 11:00:38 +02:00
|
|
|
# - Mapping auf PICA3
|
2020-08-01 02:04:39 +02:00
|
|
|
# - PICA3 als CSV (via Template) exportieren
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
# =============================== ENVIRONMENT ================================ #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
# source the main script
|
|
|
|
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
|
|
|
|
|
|
|
|
# read input
|
|
|
|
if [[ $1 ]]; then
|
|
|
|
inputdir="$(readlink -e "$1")"
|
|
|
|
else
|
|
|
|
echo 1>&2 "Please provide path to directory with input file(s)"; exit 1
|
|
|
|
fi
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
# check requirements, set trap, create workdir and tee to logfile
|
|
|
|
init
|
2020-07-13 12:42:14 +02:00
|
|
|
|
|
|
|
# ================================= STARTUP ================================== #
|
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
checkpoint "Startup"; echo
|
|
|
|
|
|
|
|
# start OpenRefine server
|
2020-07-13 12:42:14 +02:00
|
|
|
refine_start; echo
|
|
|
|
|
|
|
|
# ================================== IMPORT ================================== #
|
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
checkpoint "Import"; echo
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
# TSV-Exporte aller Einzelprojekte in ein Zip-Archiv packen
|
|
|
|
zip -j "${workdir}/bibliotheca.zip" "${inputdir}"/*.tsv
|
|
|
|
projects["bibliotheca"]="${workdir}/bibliotheca.zip"
|
|
|
|
|
|
|
|
# Neues Projekt erstellen aus Zip-Archiv
|
2020-07-13 12:42:14 +02:00
|
|
|
p="bibliotheca"
|
|
|
|
echo "import file" "${projects[$p]}" "..."
|
|
|
|
if curl -fs --write-out "%{redirect_url}\n" \
|
|
|
|
--form project-file="@${projects[$p]}" \
|
|
|
|
--form project-name="${p}" \
|
|
|
|
--form format="text/line-based/*sv" \
|
|
|
|
--form options='{
|
|
|
|
"encoding": "UTF-8",
|
|
|
|
"includeFileSources": "true",
|
|
|
|
"separator": "\t"
|
|
|
|
}' \
|
|
|
|
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
2020-08-01 02:04:39 +02:00
|
|
|
> "${workdir}/${p}.id"
|
2020-07-13 12:42:14 +02:00
|
|
|
then
|
|
|
|
log "imported ${projects[$p]} as ${p}"
|
|
|
|
else
|
|
|
|
error "import of ${projects[$p]} failed!"
|
|
|
|
fi
|
2020-08-01 02:04:39 +02:00
|
|
|
refine_store "${p}" "${workdir}/${p}.id" || error "import of ${p} failed!"
|
2020-07-13 12:42:14 +02:00
|
|
|
echo
|
|
|
|
|
|
|
|
# ================================ TRANSFORM ================================= #
|
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
checkpoint "Transform"; echo
|
|
|
|
|
2020-07-21 23:09:58 +02:00
|
|
|
# --------------------------- 01 Spalten sortieren --------------------------- #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
|
|
|
# damit Records-Mode erhalten bleibt
|
|
|
|
# - M|MEDGR > Facet > Text facet > eBook
|
|
|
|
# -- show as: records
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
2020-07-22 18:11:54 +02:00
|
|
|
echo "Spalten sortieren: Beginnen mit 1. M|MEDNR, 2. E|EXNR, 3. File..."
|
2020-07-13 12:42:14 +02:00
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-move",
|
|
|
|
"columnName": "File",
|
2020-07-21 23:09:58 +02:00
|
|
|
"index": 0,
|
|
|
|
"description": "Move column File to position 0"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"op": "core/column-move",
|
|
|
|
"columnName": "E|EXNR",
|
|
|
|
"index": 0,
|
|
|
|
"description": "Move column E|EXNR to position 0"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"op": "core/column-move",
|
|
|
|
"columnName": "M|MEDNR",
|
|
|
|
"index": 0,
|
|
|
|
"description": "Move column M|MEDNR to position 0"
|
2020-07-13 12:42:14 +02:00
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ----------------------- 02 E-Books löschen (Bautzen) ----------------------- #
|
|
|
|
|
2020-07-13 13:41:34 +02:00
|
|
|
# spec_Z_01
|
2020-07-13 12:42:14 +02:00
|
|
|
# - M|MEDGR > Facet > Text facet > eBook
|
|
|
|
# -- show as: records
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
|
|
|
echo "E-Books löschen (Bautzen)..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/row-removal",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "M|MEDGR",
|
|
|
|
"expression": "value",
|
|
|
|
"columnName": "M|MEDGR",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "eBook",
|
|
|
|
"l": "eBook"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "record-based"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ------------- 03 Zeitschriften löschen (Breitenbrunn, Dresden) ------------- #
|
|
|
|
|
2020-07-13 13:41:34 +02:00
|
|
|
# spec_Z_02
|
2020-07-13 12:42:14 +02:00
|
|
|
# - M|ART > Facet > Text facet > "Z" und "GH"
|
|
|
|
# -- show as: records
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
|
|
|
echo "Zeitschriften löschen (Breitenbrunn, Dresden)..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/row-removal",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "M|ART",
|
|
|
|
"expression": "value",
|
|
|
|
"columnName": "M|ART",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "GH",
|
|
|
|
"l": "GH"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "Z",
|
|
|
|
"l": "Z"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "record-based"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ----------------------- 04 Makulierte Medien löschen ----------------------- #
|
|
|
|
|
2020-07-13 13:41:34 +02:00
|
|
|
# spec_Z_03
|
2020-07-13 12:42:14 +02:00
|
|
|
# - E|EXSTA > Facet > Text facet > "M"
|
|
|
|
# -- show as: rows
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
|
|
|
echo "Makulierte Medien löschen..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/row-removal",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "E|EXSTA",
|
|
|
|
"expression": "value",
|
|
|
|
"columnName": "E|EXSTA",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "M",
|
|
|
|
"l": "M"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "row-based"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 17:42:00 +02:00
|
|
|
# ---------------------------------- 05 File --------------------------------- #
|
|
|
|
|
|
|
|
echo "Bibliothekskürzel aus Import-Dateiname..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/text-transform",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"columnName": "File",
|
|
|
|
"expression": "grel:with([ ['bautzen.tsv','BZ'], ['breitenbrunn.tsv','BB'], ['dresden.tsv','DD'], ['glauchau.tsv','GC'], ['plauen.tsv','PL'] ], mapping, forEach(mapping, m, if(value == m[0], m[1], '')).join(''))",
|
|
|
|
"onError": "keep-original",
|
|
|
|
"repeat": false,
|
|
|
|
"repeatCount": 10
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ---------------------------------- 06 0100 --------------------------------- #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-07-21 23:09:58 +02:00
|
|
|
# spec_B_T_01
|
|
|
|
# TODO: Aufteilung in 0100 / 0110 nach Nummernkreisen
|
|
|
|
# TODO: Korrekturen für <9 und >10-stellige
|
|
|
|
echo "K10plus-PPNs in 0100..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "M|IDNR",
|
|
|
|
"expression": "grel:value.length()",
|
|
|
|
"columnName": "M|IDNR",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": 9,
|
|
|
|
"l": "9"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": 10,
|
|
|
|
"l": "10"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "M|IDNR",
|
|
|
|
"expression": "grel:value",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "0100",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 17:42:00 +02:00
|
|
|
# ---------------------------------- 07 2199 --------------------------------- #
|
2020-07-21 23:09:58 +02:00
|
|
|
|
|
|
|
# spec_B_T_49
|
|
|
|
echo "Nummern aus Datenkonversion 2199..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "M|MEDNR",
|
2020-07-22 17:42:00 +02:00
|
|
|
"expression": "grel:'BA' + cells['File'].value + value",
|
2020-07-21 23:09:58 +02:00
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "2199",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 17:42:00 +02:00
|
|
|
# --------------------------------- 08 7100B --------------------------------- #
|
2020-07-21 23:09:58 +02:00
|
|
|
|
|
|
|
# spec_B_E_15
|
|
|
|
echo "Bibliothekssigel 7100B..."
|
2020-07-13 12:42:14 +02:00
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
2020-08-10 15:47:03 +02:00
|
|
|
"baseColumnName": "File",
|
|
|
|
"expression": "grel:with(if(value=='DD',forNonBlank(cells['E|ZWGST'].value,v,v,value),value),x,x.replace('BB','Brt 1').replace('BZ','Bn 3').replace('DD','D 161').replace('EH','D 275').replace('GC','Gla 1').replace('PL','Pl 11'))",
|
2020-07-13 12:42:14 +02:00
|
|
|
"onError": "set-to-blank",
|
2020-07-21 23:09:58 +02:00
|
|
|
"newColumnName": "7100B",
|
|
|
|
"columnInsertIndex": 3
|
2020-07-13 12:42:14 +02:00
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 17:42:00 +02:00
|
|
|
# --------------------------------- 09 7100f --------------------------------- #
|
2020-07-22 11:00:38 +02:00
|
|
|
|
|
|
|
# spec_B_E_13
|
|
|
|
echo "Zweigstelle 7100f..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
2020-08-10 15:47:03 +02:00
|
|
|
"baseColumnName": "File",
|
|
|
|
"expression": "grel:if(value=='DD',forNonBlank(cells['E|ZWGST'].value,v,v,value),value)",
|
2020-07-22 11:00:38 +02:00
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "7100f",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-07-22 17:42:00 +02:00
|
|
|
# --------------------------------- 10 7100a --------------------------------- #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# spec_B_E_07
|
2020-07-22 17:42:00 +02:00
|
|
|
echo "Standort 7100a..."
|
2020-07-13 12:42:14 +02:00
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
2020-07-22 11:00:38 +02:00
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "E|STA1",
|
2020-07-22 17:42:00 +02:00
|
|
|
"expression": "grel:value.replace('␟',' ')",
|
2020-07-22 11:00:38 +02:00
|
|
|
"onError": "set-to-blank",
|
2020-07-22 17:42:00 +02:00
|
|
|
"newColumnName": "7100a",
|
2020-07-22 11:00:38 +02:00
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
2020-07-13 12:42:14 +02:00
|
|
|
then
|
2020-07-22 11:00:38 +02:00
|
|
|
log "transformed ${p} (${projects[$p]})"
|
2020-07-13 12:42:14 +02:00
|
|
|
else
|
2020-07-22 11:00:38 +02:00
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
2020-07-13 12:42:14 +02:00
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 17:42:00 +02:00
|
|
|
# --------------------------------- 11 2000 ---------------------------------- #
|
|
|
|
|
|
|
|
# TODO: ISMN in 2020
|
|
|
|
# spec_B_T_04, spec_B_T_05
|
|
|
|
echo "ISBN 2000..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "record-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "M|ISBN",
|
|
|
|
"expression": "grel:[ forNonBlank(cells['M|ISBN'].value,v,if(isNumeric(v[0]),v,null),null), forNonBlank(cells['M|ISBN2'].value,v,if(isNumeric(v[0]),v,null),null) ].uniques().join('␟')",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "2000",
|
2020-07-22 18:11:54 +02:00
|
|
|
"columnInsertIndex": 3
|
2020-07-22 17:42:00 +02:00
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 23:38:39 +02:00
|
|
|
# --------------------------------- 12 E0XX ---------------------------------- #
|
|
|
|
|
|
|
|
# spec_B_E_10
|
|
|
|
echo "Zugangsdatum E0XX..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "E|ZUDAT",
|
|
|
|
"expression": "grel:forNonBlank(value,v,v[0,2] + '-' + v[3,5] + '-' + v[8,10],'22-07-20')",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "E0XX",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-08-11 17:36:22 +02:00
|
|
|
# --------------------------------- 13 E0XXb ---------------------------------- #
|
2020-07-22 23:38:39 +02:00
|
|
|
|
|
|
|
# spec_B_E_14
|
|
|
|
echo "Selektionsschlüssel E0XXb..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
2020-08-10 15:47:03 +02:00
|
|
|
"baseColumnName": "File",
|
|
|
|
"expression": "grel:with(if(value=='DD',forNonBlank(cells['E|ZWGST'].value,v,v,value),value),x,x.toLowercase())",
|
2020-07-22 23:38:39 +02:00
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "E0XXb",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-08-11 17:36:22 +02:00
|
|
|
# --------------------------------- 14 0500 ---------------------------------- #
|
|
|
|
|
|
|
|
# spec_B_T_56
|
|
|
|
# TODO: Regeln für ART=GH, ART=L
|
|
|
|
# TODO: Differenzierung nach MEDGR
|
|
|
|
echo "Gattung und Status 0500..."
|
|
|
|
read -r -d '' expression << EXPRESSION
|
|
|
|
if(
|
|
|
|
value == 'M',
|
|
|
|
'Aan',
|
|
|
|
if(
|
|
|
|
value == 'U',
|
|
|
|
'Asn',
|
|
|
|
if(
|
|
|
|
value == 'A',
|
|
|
|
'Ban',
|
|
|
|
if(
|
|
|
|
value == 'V',
|
|
|
|
'Ban',
|
|
|
|
if(
|
|
|
|
and(
|
|
|
|
value == 'P',
|
|
|
|
forNonBlank(cells['M|MEDGR'].value,v,if(v == 'SPIEL', true, false),false)
|
|
|
|
),
|
|
|
|
'Ban',
|
|
|
|
if(
|
|
|
|
value == 'P',
|
|
|
|
'Lax',
|
|
|
|
if(
|
|
|
|
value == 'G',
|
|
|
|
'Acn',
|
|
|
|
if(
|
|
|
|
value == 'S',
|
|
|
|
'AFn',
|
|
|
|
if(
|
|
|
|
value == 'Z',
|
|
|
|
'Abn',
|
|
|
|
null
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
EXPRESSION
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< JSON
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "M|ART",
|
2020-08-12 17:11:38 +02:00
|
|
|
"expression": $(echo "grel:${expression}" | ${jq} -s -R '.'),
|
2020-08-11 17:36:22 +02:00
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "0500",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-08-12 16:12:22 +02:00
|
|
|
# --------------------------------- 15 4000 ---------------------------------- #
|
|
|
|
|
|
|
|
# spec_B_T_17
|
|
|
|
echo "Haupttitel 4000..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
2020-08-12 17:52:44 +02:00
|
|
|
"facets": [],
|
|
|
|
"mode": "record-based"
|
2020-08-12 16:12:22 +02:00
|
|
|
},
|
|
|
|
"baseColumnName": "M|HST",
|
2020-08-12 17:52:44 +02:00
|
|
|
"expression": "grel:if(value.contains('¬'),with(value.split('¬'), v, v[0].trim() + ' @' + v[1].trim()),value)",
|
2020-08-12 16:12:22 +02:00
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "4000",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
2020-08-12 16:31:36 +02:00
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# --------------------------------- 16 8200 ---------------------------------- #
|
|
|
|
|
|
|
|
# spec_B_E_02
|
|
|
|
echo "Verbuchungsnummer 4000..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
2020-08-12 17:52:44 +02:00
|
|
|
"facets": [],
|
2020-08-12 16:31:36 +02:00
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "E|BARCO",
|
|
|
|
"expression": "grel:cells['File'].value + value",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "8200",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
2020-08-12 16:12:22 +02:00
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# ================================== EXPORT ================================== #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
checkpoint "Export"; echo
|
|
|
|
|
2020-08-12 17:01:26 +02:00
|
|
|
# Export des OpenRefine-Projekts für Tests
|
|
|
|
format="openrefine.tar.gz"
|
|
|
|
echo "export ${p} to ${format} file..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
"${endpoint}/command/core/export-project" \
|
|
|
|
> "${workdir}/${p}.${format}"
|
|
|
|
then
|
|
|
|
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
|
|
|
|
else
|
|
|
|
error "export of ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# Export der PICA3-Spalten als CSV
|
2020-08-11 17:36:22 +02:00
|
|
|
# Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
|
2020-07-22 11:00:38 +02:00
|
|
|
format="csv"
|
|
|
|
echo "export ${p} to ${format} file using template..."
|
2020-07-13 12:42:14 +02:00
|
|
|
IFS= read -r -d '' template << "TEMPLATE"
|
|
|
|
{{
|
2020-07-22 11:00:38 +02:00
|
|
|
with(
|
|
|
|
[
|
|
|
|
'2199',
|
|
|
|
'0100',
|
2020-08-11 17:36:22 +02:00
|
|
|
'0500',
|
2020-07-22 17:42:00 +02:00
|
|
|
'2000',
|
2020-08-12 16:12:22 +02:00
|
|
|
'4000',
|
2020-07-22 11:00:38 +02:00
|
|
|
'7100B',
|
|
|
|
'7100f',
|
2020-07-22 23:38:39 +02:00
|
|
|
'7100a',
|
|
|
|
'E0XX',
|
|
|
|
'E0XXb'
|
2020-07-22 11:00:38 +02:00
|
|
|
],
|
|
|
|
columns,
|
|
|
|
if(
|
|
|
|
row.index == 0,
|
|
|
|
forEach(
|
|
|
|
columns,
|
|
|
|
cn,
|
|
|
|
cn.escape('csv')
|
|
|
|
).join(',')
|
|
|
|
+ '\n'
|
|
|
|
+ with(
|
|
|
|
forEach(
|
|
|
|
columns,
|
|
|
|
cn,
|
|
|
|
forNonBlank(
|
|
|
|
cells[cn].value,
|
|
|
|
v,
|
|
|
|
v.escape('csv'),
|
|
|
|
'␀'
|
|
|
|
)
|
|
|
|
).join(',').replace('␀',''),
|
|
|
|
r,
|
|
|
|
if(
|
|
|
|
isNonBlank(r.split(',').join(',')),
|
|
|
|
r + '\n',
|
|
|
|
''
|
|
|
|
)
|
|
|
|
),
|
|
|
|
with(
|
|
|
|
forEach(
|
|
|
|
columns,
|
|
|
|
cn,
|
|
|
|
forNonBlank(
|
|
|
|
cells[cn].value,
|
|
|
|
v,
|
|
|
|
v.escape('csv'),
|
|
|
|
'␀'
|
|
|
|
)
|
|
|
|
).join(',').replace('␀',''),
|
|
|
|
r,
|
|
|
|
if(
|
|
|
|
isNonBlank(r.split(',').join(',')),
|
|
|
|
r + '\n',
|
|
|
|
''
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
2020-07-13 12:42:14 +02:00
|
|
|
}}
|
|
|
|
TEMPLATE
|
|
|
|
if echo "${template}" | head -c -2 | curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data format="template" \
|
|
|
|
--data prefix="" \
|
|
|
|
--data suffix="" \
|
|
|
|
--data separator="" \
|
|
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
|
|
--data-urlencode template@- \
|
|
|
|
"${endpoint}/command/core/export-rows" \
|
2020-08-01 02:04:39 +02:00
|
|
|
> "${workdir}/${p}.${format}"
|
2020-07-13 12:42:14 +02:00
|
|
|
then
|
2020-08-01 02:04:39 +02:00
|
|
|
log "exported ${p} (${projects[$p]}) to ${workdir}/${p}.${format}"
|
2020-07-13 12:42:14 +02:00
|
|
|
else
|
|
|
|
error "export of ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ================================== FINISH ================================== #
|
|
|
|
|
2020-08-01 02:04:39 +02:00
|
|
|
checkpoint "Finish"; echo
|
|
|
|
|
|
|
|
# stop OpenRefine server
|
2020-07-13 12:42:14 +02:00
|
|
|
refine_stop; echo
|
2020-08-01 02:04:39 +02:00
|
|
|
|
|
|
|
# calculate run time based on checkpoints
|
|
|
|
checkpoint_stats; echo
|
|
|
|
|
|
|
|
# word count on all files in workdir
|
|
|
|
count_output
|