832 lines
22 KiB
Bash
Executable File
832 lines
22 KiB
Bash
Executable File
#!/bin/bash
|
|
# Alephino Vorverarbeitung
|
|
# - Exporte (Titel und Exemplare) von einer der Bibliotheken importieren
|
|
# - in Tabellenformat umwandeln
|
|
# - Exemplarinformationen an Titel anhängen
|
|
# - als TSV exportieren
|
|
|
|
# =============================== ENVIRONMENT ================================ #
|
|
|
|
# source the main script
|
|
source "${BASH_SOURCE%/*}/../bash-refine.sh" || exit 1
|
|
|
|
# read input
|
|
if [[ $2 ]]; then
|
|
titel="$(basename "$1" .txt)"
|
|
projects[$titel]="$(readlink -e "$1")"
|
|
exemplare="$(basename "$2" .txt)"
|
|
projects[$exemplare]="$(readlink -e "$2")"
|
|
else
|
|
echo 1>&2 "Please provide path to input files (1. Titel, 2. Exemplare)"; exit 1
|
|
fi
|
|
|
|
# check requirements, set trap, create workdir and tee to logfile
|
|
init
|
|
|
|
# ================================= STARTUP ================================== #
|
|
|
|
checkpoint "Startup"; echo
|
|
|
|
# print environment variables
|
|
printenv | grep REFINE; echo
|
|
|
|
# start OpenRefine server
|
|
refine_start; echo
|
|
|
|
# ================================== IMPORT ================================== #
|
|
|
|
checkpoint "Import"; echo
|
|
|
|
# Fixed-width text files
|
|
# Columns: 5
|
|
# Character encoding: UTF-8
|
|
# Store blank rows deaktivieren
|
|
|
|
echo "import file" "${projects[$titel]}" "..."
|
|
if curl -fs --write-out "%{redirect_url}\n" \
|
|
--form project-file="@${projects[$titel]}" \
|
|
--form project-name="${titel}" \
|
|
--form format="text/line-based/fixed-width" \
|
|
--form options='{
|
|
"encoding":"UTF-8",
|
|
"columnWidths":[5],
|
|
"ignoreLines":-1,
|
|
"headerLines":0,
|
|
"skipDataLines":0,
|
|
"limit":-1,
|
|
"guessCellValueTypes":false,
|
|
"storeBlankRows":false,
|
|
"storeBlankCellsAsNulls":true,
|
|
"includeFileSources":false
|
|
}' \
|
|
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
|
> "${workdir}/${titel}.id"
|
|
then
|
|
log "imported ${projects[$titel]} as ${titel}"
|
|
else
|
|
error "import of ${projects[$titel]} failed!"
|
|
fi
|
|
refine_store "${titel}" "${workdir}/${titel}.id" || error "import of ${titel} failed!"
|
|
echo
|
|
|
|
echo "import file" "${projects[$exemplare]}" "..."
|
|
if curl -fs --write-out "%{redirect_url}\n" \
|
|
--form project-file="@${projects[$exemplare]}" \
|
|
--form project-name="${exemplare}" \
|
|
--form format="text/line-based/fixed-width" \
|
|
--form options='{
|
|
"encoding":"UTF-8",
|
|
"columnWidths":[5],
|
|
"ignoreLines":-1,
|
|
"headerLines":0,
|
|
"skipDataLines":0,
|
|
"limit":-1,
|
|
"guessCellValueTypes":false,
|
|
"storeBlankRows":false,
|
|
"storeBlankCellsAsNulls":true,
|
|
"includeFileSources":false
|
|
}' \
|
|
"${endpoint}/command/core/create-project-from-upload$(refine_csrf)" \
|
|
> "${workdir}/${exemplare}.id"
|
|
then
|
|
log "imported ${projects[$exemplare]} as ${exemplare}"
|
|
else
|
|
error "import of ${projects[$exemplare]} failed!"
|
|
fi
|
|
refine_store "${exemplare}" "${workdir}/${exemplare}.id" || error "import of ${exemplare} failed!"
|
|
echo
|
|
|
|
# ================================ TRANSFORM ================================= #
|
|
|
|
checkpoint "Transform"; echo
|
|
|
|
# ----------------------- Feldnamen um M bzw. E ergänzen --------------------- #
|
|
|
|
echo "Feldnamen um M bzw. E ergänzen..."
|
|
if curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/text-transform",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "Column 1",
|
|
"expression": "grel:'M|' + value.replace(' ','')",
|
|
"onError": "keep-original",
|
|
"repeat": false,
|
|
"repeatCount": 10,
|
|
"description": "Text transform on cells in column Column 1 using expression grel:'M|' + value.trim()"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${titel} (${projects[$titel]})"
|
|
else
|
|
error "transform ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
if curl -fs \
|
|
--data project="${projects[$exemplare]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/text-transform",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "Column 1",
|
|
"expression": "grel:'E|' + value.replace(' ','')",
|
|
"onError": "keep-original",
|
|
"repeat": false,
|
|
"repeatCount": 10,
|
|
"description": "Text transform on cells in column Column 1 using expression grel:'E|' + value.trim()"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${exemplare} (${projects[$exemplare]})"
|
|
else
|
|
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
# -------------------------------- Sortieren --------------------------------- #
|
|
|
|
echo "Datensätze und Feldnamen sortieren..."
|
|
if curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/column-addition",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "Column 1",
|
|
"expression": "value",
|
|
"columnName": "Column 1",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": "M|IDN",
|
|
"l": "M|IDN"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
},
|
|
"baseColumnName": "Column 2",
|
|
"expression": "grel:value",
|
|
"onError": "set-to-blank",
|
|
"newColumnName": "id",
|
|
"columnInsertIndex": 2,
|
|
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
|
|
},
|
|
{
|
|
"op": "core/column-move",
|
|
"columnName": "id",
|
|
"index": 0,
|
|
"description": "Move column id to position 0"
|
|
},
|
|
{
|
|
"op": "core/fill-down",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "id",
|
|
"description": "Fill down cells in column id"
|
|
},
|
|
{
|
|
"op": "core/row-reorder",
|
|
"mode": "row-based",
|
|
"sorting": {
|
|
"criteria": [
|
|
{
|
|
"valueType": "string",
|
|
"column": "id",
|
|
"blankPosition": 2,
|
|
"errorPosition": 1,
|
|
"reverse": false,
|
|
"caseSensitive": false
|
|
},
|
|
{
|
|
"valueType": "string",
|
|
"column": "Column 1",
|
|
"blankPosition": 2,
|
|
"errorPosition": 1,
|
|
"reverse": false,
|
|
"caseSensitive": false
|
|
}
|
|
]
|
|
},
|
|
"description": "Reorder rows"
|
|
},
|
|
{
|
|
"op": "core/column-removal",
|
|
"columnName": "id",
|
|
"description": "Remove column id"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${titel} (${projects[$titel]})"
|
|
else
|
|
error "transform ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
if curl -fs \
|
|
--data project="${projects[$exemplare]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/column-addition",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "Column 1",
|
|
"expression": "value",
|
|
"columnName": "Column 1",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": "E|IDN",
|
|
"l": "E|IDN"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
},
|
|
"baseColumnName": "Column 2",
|
|
"expression": "grel:value",
|
|
"onError": "set-to-blank",
|
|
"newColumnName": "id",
|
|
"columnInsertIndex": 2,
|
|
"description": "Create column id at index 2 based on column Column 2 using expression grel:value"
|
|
},
|
|
{
|
|
"op": "core/column-move",
|
|
"columnName": "id",
|
|
"index": 0,
|
|
"description": "Move column id to position 0"
|
|
},
|
|
{
|
|
"op": "core/fill-down",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "id",
|
|
"description": "Fill down cells in column id"
|
|
},
|
|
{
|
|
"op": "core/row-reorder",
|
|
"mode": "row-based",
|
|
"sorting": {
|
|
"criteria": [
|
|
{
|
|
"valueType": "string",
|
|
"column": "id",
|
|
"blankPosition": 2,
|
|
"errorPosition": 1,
|
|
"reverse": false,
|
|
"caseSensitive": false
|
|
},
|
|
{
|
|
"valueType": "string",
|
|
"column": "Column 1",
|
|
"blankPosition": 2,
|
|
"errorPosition": 1,
|
|
"reverse": false,
|
|
"caseSensitive": false
|
|
}
|
|
]
|
|
},
|
|
"description": "Reorder rows"
|
|
},
|
|
{
|
|
"op": "core/column-removal",
|
|
"columnName": "id",
|
|
"description": "Remove column id"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${exemplare} (${projects[$exemplare]})"
|
|
else
|
|
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
# --------------------- Mehrfachbelegungen zusammenführen -------------------- #
|
|
|
|
# - Column 1 > Edit cells > Blank down
|
|
# - Column 2 > Edit cells > join multi-valued cells... > ␟
|
|
|
|
echo "Mehrfachbelegungen zusammenführen..."
|
|
if curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/blank-down",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "Column 1",
|
|
"description": "Blank down cells in column Column 1"
|
|
},
|
|
{
|
|
"op": "core/multivalued-cell-join",
|
|
"columnName": "Column 2",
|
|
"keyColumnName": "Column 1",
|
|
"separator": "␟",
|
|
"description": "Join multi-valued cells in column Column 2"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${titel} (${projects[$titel]})"
|
|
else
|
|
error "transform ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
if curl -fs \
|
|
--data project="${projects[$exemplare]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/blank-down",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "Column 1",
|
|
"description": "Blank down cells in column Column 1"
|
|
},
|
|
{
|
|
"op": "core/multivalued-cell-join",
|
|
"columnName": "Column 2",
|
|
"keyColumnName": "Column 1",
|
|
"separator": "␟",
|
|
"description": "Join multi-valued cells in column Column 2"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${exemplare} (${projects[$exemplare]})"
|
|
else
|
|
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
# ---------------------- Nicht benötigte Felder löschen ---------------------- #
|
|
|
|
echo "Nicht benötigte Felder löschen..."
|
|
if curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/row-removal",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "Column 1",
|
|
"expression": "value",
|
|
"columnName": "Column 1",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": "M|025_",
|
|
"l": "M|025_"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|025e",
|
|
"l": "M|025e"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|004",
|
|
"l": "M|004"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|011",
|
|
"l": "M|011"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|026_",
|
|
"l": "M|026_"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|026a",
|
|
"l": "M|026a"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|026d",
|
|
"l": "M|026d"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|026g",
|
|
"l": "M|026g"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|030",
|
|
"l": "M|030"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|037z",
|
|
"l": "M|037z"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|038b",
|
|
"l": "M|038b"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|070",
|
|
"l": "M|070"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|073",
|
|
"l": "M|073"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|076z",
|
|
"l": "M|076z"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|080",
|
|
"l": "M|080"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|800s",
|
|
"l": "M|800s"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|802",
|
|
"l": "M|802"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "M|808b",
|
|
"l": "M|808b"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
}
|
|
},
|
|
{
|
|
"op": "core/row-removal",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "text",
|
|
"name": "Column 1",
|
|
"columnName": "Column 1",
|
|
"query": "^M\\|9",
|
|
"mode": "regex",
|
|
"caseSensitive": false,
|
|
"invert": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
}
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${titel} (${projects[$titel]})"
|
|
else
|
|
error "transform ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
if curl -fs \
|
|
--data project="${projects[$exemplare]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/row-removal",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "Column 1",
|
|
"expression": "value",
|
|
"columnName": "Column 1",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": "E|A02",
|
|
"l": "E|A02"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "E|A86",
|
|
"l": "E|A86"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "E|SUB",
|
|
"l": "E|SUB"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "E|FMT",
|
|
"l": "E|FMT"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "E|CAT",
|
|
"l": "E|CAT"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "E|027",
|
|
"l": "E|027"
|
|
}
|
|
},
|
|
{
|
|
"v": {
|
|
"v": "E|123",
|
|
"l": "E|123"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
}
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${exemplare} (${projects[$exemplare]})"
|
|
else
|
|
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
|
|
# ------------------------------- Transponieren ------------------------------ #
|
|
|
|
# - Column 1 > Transpose > Columnize by key/value columns... > OK
|
|
|
|
echo "Transponieren..."
|
|
if curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/key-value-columnize",
|
|
"keyColumnName": "Column 1",
|
|
"valueColumnName": "Column 2",
|
|
"noteColumnName": "",
|
|
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${titel} (${projects[$titel]})"
|
|
else
|
|
error "transform ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
if curl -fs \
|
|
--data project="${projects[$exemplare]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/key-value-columnize",
|
|
"keyColumnName": "Column 1",
|
|
"valueColumnName": "Column 2",
|
|
"noteColumnName": "",
|
|
"description": "Columnize by key column Column 1 and value column Column 2 with note column "
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${exemplare} (${projects[$exemplare]})"
|
|
else
|
|
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
# ---------------------------- Titel-ID separieren --------------------------- #
|
|
|
|
echo "Titel-ID separieren..."
|
|
if curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/column-addition",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"baseColumnName": "M|IDN",
|
|
"expression": "grel:value.replace(/^0+/,'')",
|
|
"onError": "set-to-blank",
|
|
"newColumnName": "id",
|
|
"columnInsertIndex": 12,
|
|
"description": "Create column id at index 12 based on column M|IDN using expression grel:value.replace(/^0+/,'')"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${titel} (${projects[$titel]})"
|
|
else
|
|
error "transform ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
if curl -fs \
|
|
--data project="${projects[$exemplare]}" \
|
|
--data-urlencode "operations@-" \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null \
|
|
<< "JSON"
|
|
[
|
|
{
|
|
"op": "core/column-addition",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"baseColumnName": "E|BIB",
|
|
"expression": "grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')",
|
|
"onError": "set-to-blank",
|
|
"newColumnName": "titel_id",
|
|
"columnInsertIndex": 18,
|
|
"description": "Create column titel_id at index 18 based on column E|BIB using expression grel:value.split('\u001f')[0].slice(1).replace(/^0+/,'')"
|
|
}
|
|
]
|
|
JSON
|
|
then
|
|
log "transformed ${exemplare} (${projects[$exemplare]})"
|
|
else
|
|
error "transform ${exemplare} (${projects[$exemplare]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
# ---------------------------- Exemplare anreichern -------------------------- #
|
|
|
|
echo "Exemplare anreichern..."
|
|
columns=( "E|001" "E|002a" "E|003" "E|004" "E|027" "E|030" "E|050" "E|100" "E|115" "E|120" "E|123" "E|A02" "E|A72" "E|A73" "E|A87" "E|A91" "E|A95" "E|BIB" "E|CAT" "E|FMT" "E|IDN" "E|LDR" "E|STA" "E|SUB" "E|105" "E|107" "E|A94" "E|125" "E|072" "E|A98" "E|HOL" "E|A86" "E|A63" "E|A70" "E|A83" "E|A85" "E|ABO" "E|A97" "E|A82" "E|002" "E|ORD" )
|
|
for column in "${columns[@]}"; do
|
|
cat << JSON >> "${workdir}/${titel}.tmp"
|
|
[
|
|
{
|
|
"op": "core/column-addition",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"baseColumnName": "id",
|
|
"expression": "grel:forEach(value.cross('${exemplare}','titel_id'),r,forNonBlank(r.cells['${column}'].value,v,v,'')).join('␞')",
|
|
"onError": "set-to-blank",
|
|
"newColumnName": "${column}",
|
|
"columnInsertIndex": 13
|
|
},
|
|
{
|
|
"op": "core/multivalued-cell-split",
|
|
"columnName": "${column}",
|
|
"keyColumnName": "M|001",
|
|
"mode": "separator",
|
|
"separator": "␞",
|
|
"regex": false
|
|
}
|
|
]
|
|
JSON
|
|
done
|
|
if "${jq}" -s add "${workdir}/${titel}.tmp" | curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data-urlencode operations@- \
|
|
"${endpoint}/command/core/apply-operations$(refine_csrf)" > /dev/null
|
|
then
|
|
log "transformed ${titel} (${projects[$titel]})"
|
|
rm "${workdir}/${titel}.tmp"
|
|
else
|
|
error "transform ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
# ================================== EXPORT ================================== #
|
|
|
|
checkpoint "Export"; echo
|
|
|
|
format="tsv"
|
|
p="${titel%%-*}" # Projektname ohne Zusatz
|
|
echo "export ${titel} to ${format} file..."
|
|
if curl -fs \
|
|
--data project="${projects[$titel]}" \
|
|
--data format="${format}" \
|
|
--data engine='{"facets":[],"mode":"row-based"}' \
|
|
"${endpoint}/command/core/export-rows" \
|
|
> "${workdir}/${p}.${format}"
|
|
then
|
|
log "exported ${titel} (${projects[$titel]}) to ${workdir}/${p}.${format}"
|
|
else
|
|
error "export of ${titel} (${projects[$titel]}) failed!"
|
|
fi
|
|
echo
|
|
|
|
# ================================== FINISH ================================== #
|
|
|
|
checkpoint "Finish"; echo
|
|
|
|
# stop OpenRefine server
|
|
refine_stop; echo
|
|
|
|
# calculate run time based on checkpoints
|
|
checkpoint_stats; echo
|
|
|
|
# word count on all files in workdir
|
|
count_output
|