2020-07-13 12:42:14 +02:00
|
|
|
# Bibliotheca Hauptverarbeitung
|
|
|
|
# - Datenbereinigungen
|
2020-07-22 11:00:38 +02:00
|
|
|
# - Mapping auf PICA3
|
|
|
|
# - PICA3-Spalten als CSV (via Template) exportieren
|
2020-07-13 12:42:14 +02:00
|
|
|
|
|
|
|
# ================================== CONFIG ================================== #
|
|
|
|
|
|
|
|
# TSV-Exporte aller Einzelprojekte in ein Zip-Archiv packen
|
|
|
|
zip -j "${workspace}/bibliotheca.zip" \
|
|
|
|
"${workspace}/bautzen.tsv" \
|
|
|
|
"${workspace}/breitenbrunn.tsv" \
|
|
|
|
"${workspace}/dresden.tsv" \
|
|
|
|
"${workspace}/glauchau.tsv" \
|
|
|
|
"${workspace}/plauen.tsv"
|
|
|
|
|
|
|
|
projects["bibliotheca"]="${workspace}/bibliotheca.zip"
|
|
|
|
|
|
|
|
# ================================= STARTUP ================================== #
|
|
|
|
|
|
|
|
refine_start; echo
|
|
|
|
|
|
|
|
# ================================== IMPORT ================================== #
|
|
|
|
|
|
|
|
# Neues Projekt erstellen aus Zip-Archiv
|
|
|
|
|
|
|
|
p="bibliotheca"
|
|
|
|
echo "import file" "${projects[$p]}" "..."
|
|
|
|
if curl -fs --write-out "%{redirect_url}\n" \
|
|
|
|
--form project-file="@${projects[$p]}" \
|
|
|
|
--form project-name="${p}" \
|
|
|
|
--form format="text/line-based/*sv" \
|
|
|
|
--form options='{
|
|
|
|
"encoding": "UTF-8",
|
|
|
|
"includeFileSources": "true",
|
|
|
|
"separator": "\t"
|
|
|
|
}' \
|
|
|
|
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
|
|
|
> "${workspace}/${p}.id"
|
|
|
|
then
|
|
|
|
log "imported ${projects[$p]} as ${p}"
|
|
|
|
else
|
|
|
|
error "import of ${projects[$p]} failed!"
|
|
|
|
fi
|
|
|
|
refine_store "${p}" "${workspace}/${p}.id" || error "import of ${p} failed!"
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ================================ TRANSFORM ================================= #
|
|
|
|
|
2020-07-21 23:09:58 +02:00
|
|
|
# --------------------------- 01 Spalten sortieren --------------------------- #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
|
|
|
# damit Records-Mode erhalten bleibt
|
|
|
|
# - M|MEDGR > Facet > Text facet > eBook
|
|
|
|
# -- show as: records
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
2020-07-21 23:09:58 +02:00
|
|
|
echo "Spalten sortieren: Beginnen mit 1. M|MEDNR, 2. E|EXNR, 3. File"
|
2020-07-13 12:42:14 +02:00
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-move",
|
|
|
|
"columnName": "File",
|
2020-07-21 23:09:58 +02:00
|
|
|
"index": 0,
|
|
|
|
"description": "Move column File to position 0"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"op": "core/column-move",
|
|
|
|
"columnName": "E|EXNR",
|
|
|
|
"index": 0,
|
|
|
|
"description": "Move column E|EXNR to position 0"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"op": "core/column-move",
|
|
|
|
"columnName": "M|MEDNR",
|
|
|
|
"index": 0,
|
|
|
|
"description": "Move column M|MEDNR to position 0"
|
2020-07-13 12:42:14 +02:00
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ----------------------- 02 E-Books löschen (Bautzen) ----------------------- #
|
|
|
|
|
2020-07-13 13:41:34 +02:00
|
|
|
# spec_Z_01
|
2020-07-13 12:42:14 +02:00
|
|
|
# - M|MEDGR > Facet > Text facet > eBook
|
|
|
|
# -- show as: records
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
|
|
|
echo "E-Books löschen (Bautzen)..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/row-removal",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "M|MEDGR",
|
|
|
|
"expression": "value",
|
|
|
|
"columnName": "M|MEDGR",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "eBook",
|
|
|
|
"l": "eBook"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "record-based"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ------------- 03 Zeitschriften löschen (Breitenbrunn, Dresden) ------------- #
|
|
|
|
|
2020-07-13 13:41:34 +02:00
|
|
|
# spec_Z_02
|
2020-07-13 12:42:14 +02:00
|
|
|
# - M|ART > Facet > Text facet > "Z" und "GH"
|
|
|
|
# -- show as: records
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
|
|
|
echo "Zeitschriften löschen (Breitenbrunn, Dresden)..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/row-removal",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "M|ART",
|
|
|
|
"expression": "value",
|
|
|
|
"columnName": "M|ART",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "GH",
|
|
|
|
"l": "GH"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "Z",
|
|
|
|
"l": "Z"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "record-based"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ----------------------- 04 Makulierte Medien löschen ----------------------- #
|
|
|
|
|
2020-07-13 13:41:34 +02:00
|
|
|
# spec_Z_03
|
2020-07-13 12:42:14 +02:00
|
|
|
# - E|EXSTA > Facet > Text facet > "M"
|
|
|
|
# -- show as: rows
|
|
|
|
# --- All > Edit rows > Remove all matching rows
|
|
|
|
|
|
|
|
echo "Makulierte Medien löschen..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/row-removal",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "E|EXSTA",
|
|
|
|
"expression": "value",
|
|
|
|
"columnName": "E|EXSTA",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": "M",
|
|
|
|
"l": "M"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "row-based"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-21 23:09:58 +02:00
|
|
|
# ---------------------------------- 05 0100 --------------------------------- #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-07-21 23:09:58 +02:00
|
|
|
# spec_B_T_01
|
|
|
|
# TODO: Aufteilung in 0100 / 0110 nach Nummernkreisen
|
|
|
|
# TODO: Korrekturen für <9 und >10-stellige
|
|
|
|
echo "K10plus-PPNs in 0100..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [
|
|
|
|
{
|
|
|
|
"type": "list",
|
|
|
|
"name": "M|IDNR",
|
|
|
|
"expression": "grel:value.length()",
|
|
|
|
"columnName": "M|IDNR",
|
|
|
|
"invert": false,
|
|
|
|
"omitBlank": false,
|
|
|
|
"omitError": false,
|
|
|
|
"selection": [
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": 9,
|
|
|
|
"l": "9"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"v": {
|
|
|
|
"v": 10,
|
|
|
|
"l": "10"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"selectBlank": false,
|
|
|
|
"selectError": false
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "M|IDNR",
|
|
|
|
"expression": "grel:value",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "0100",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ---------------------------------- 06 2199 --------------------------------- #
|
|
|
|
|
|
|
|
# spec_B_T_49
|
|
|
|
# TODO: Titeldaten ohne Exemplare
|
|
|
|
echo "Nummern aus Datenkonversion 2199..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "M|MEDNR",
|
|
|
|
"expression": "grel:'BA' + cells['E|ZWGST'].value + value",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "2199",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# --------------------------------- 07 7100B --------------------------------- #
|
|
|
|
|
|
|
|
# spec_B_E_15
|
|
|
|
echo "Bibliothekssigel 7100B..."
|
2020-07-13 12:42:14 +02:00
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "E|ZWGST",
|
|
|
|
"expression": "grel:value.replace('BB','Brt 1').replace('BZ','Bn 3').replace('DD','D 161').replace('EH','D 275').replace('GC','Gla 1').replace('PL','Pl 11')",
|
|
|
|
"onError": "set-to-blank",
|
2020-07-21 23:09:58 +02:00
|
|
|
"newColumnName": "7100B",
|
|
|
|
"columnInsertIndex": 3
|
2020-07-13 12:42:14 +02:00
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# --------------------------------- 07 7100f --------------------------------- #
|
|
|
|
|
|
|
|
# spec_B_E_13
|
|
|
|
echo "Zweigstelle 7100f..."
|
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "E|ZWGST",
|
|
|
|
"expression": "grel:value",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "7100f",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
|
|
|
then
|
|
|
|
log "transformed ${p} (${projects[$p]})"
|
|
|
|
else
|
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# --------------------------------- 07 209Aa --------------------------------- #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# spec_B_E_07
|
|
|
|
echo "Standort 209Aa..."
|
2020-07-13 12:42:14 +02:00
|
|
|
if curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
2020-07-22 11:00:38 +02:00
|
|
|
--data-urlencode "operations@-" \
|
|
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
|
|
<< "JSON"
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"op": "core/column-addition",
|
|
|
|
"engineConfig": {
|
|
|
|
"facets": [],
|
|
|
|
"mode": "row-based"
|
|
|
|
},
|
|
|
|
"baseColumnName": "E|STA1",
|
|
|
|
"expression": "grel:value",
|
|
|
|
"onError": "set-to-blank",
|
|
|
|
"newColumnName": "209Aa",
|
|
|
|
"columnInsertIndex": 3
|
|
|
|
}
|
|
|
|
]
|
|
|
|
JSON
|
2020-07-13 12:42:14 +02:00
|
|
|
then
|
2020-07-22 11:00:38 +02:00
|
|
|
log "transformed ${p} (${projects[$p]})"
|
2020-07-13 12:42:14 +02:00
|
|
|
else
|
2020-07-22 11:00:38 +02:00
|
|
|
error "transform ${p} (${projects[$p]}) failed!"
|
2020-07-13 12:42:14 +02:00
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# ================================== EXPORT ================================== #
|
2020-07-13 12:42:14 +02:00
|
|
|
|
2020-07-22 11:00:38 +02:00
|
|
|
# Export der PICA3-Spalten als CSV
|
|
|
|
format="csv"
|
|
|
|
echo "export ${p} to ${format} file using template..."
|
2020-07-13 12:42:14 +02:00
|
|
|
IFS= read -r -d '' template << "TEMPLATE"
|
|
|
|
{{
|
2020-07-22 11:00:38 +02:00
|
|
|
with(
|
|
|
|
[
|
|
|
|
'2199',
|
|
|
|
'0100',
|
|
|
|
'7100B',
|
|
|
|
'7100f',
|
|
|
|
'209Aa'
|
|
|
|
],
|
|
|
|
columns,
|
|
|
|
if(
|
|
|
|
row.index == 0,
|
|
|
|
forEach(
|
|
|
|
columns,
|
|
|
|
cn,
|
|
|
|
cn.escape('csv')
|
|
|
|
).join(',')
|
|
|
|
+ '\n'
|
|
|
|
+ with(
|
|
|
|
forEach(
|
|
|
|
columns,
|
|
|
|
cn,
|
|
|
|
forNonBlank(
|
|
|
|
cells[cn].value,
|
|
|
|
v,
|
|
|
|
v.escape('csv'),
|
|
|
|
'␀'
|
|
|
|
)
|
|
|
|
).join(',').replace('␀',''),
|
|
|
|
r,
|
|
|
|
if(
|
|
|
|
isNonBlank(r.split(',').join(',')),
|
|
|
|
r + '\n',
|
|
|
|
''
|
|
|
|
)
|
|
|
|
),
|
|
|
|
with(
|
|
|
|
forEach(
|
|
|
|
columns,
|
|
|
|
cn,
|
|
|
|
forNonBlank(
|
|
|
|
cells[cn].value,
|
|
|
|
v,
|
|
|
|
v.escape('csv'),
|
|
|
|
'␀'
|
|
|
|
)
|
|
|
|
).join(',').replace('␀',''),
|
|
|
|
r,
|
|
|
|
if(
|
|
|
|
isNonBlank(r.split(',').join(',')),
|
|
|
|
r + '\n',
|
|
|
|
''
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
2020-07-13 12:42:14 +02:00
|
|
|
}}
|
|
|
|
TEMPLATE
|
|
|
|
if echo "${template}" | head -c -2 | curl -fs \
|
|
|
|
--data project="${projects[$p]}" \
|
|
|
|
--data format="template" \
|
|
|
|
--data prefix="" \
|
|
|
|
--data suffix="" \
|
|
|
|
--data separator="" \
|
|
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
|
|
--data-urlencode template@- \
|
|
|
|
"${endpoint}/command/core/export-rows" \
|
|
|
|
> "${workspace}/${p}.${format}"
|
|
|
|
then
|
|
|
|
log "exported ${p} (${projects[$p]}) to ${workspace}/${p}.${format}"
|
|
|
|
else
|
|
|
|
error "export of ${p} (${projects[$p]}) failed!"
|
|
|
|
fi
|
|
|
|
echo
|
|
|
|
|
|
|
|
# ================================== FINISH ================================== #
|
|
|
|
|
|
|
|
refine_stop; echo
|