Updates kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md
Auto commit by GitBook Editor
This commit is contained in:
parent
35eb3c9f3f
commit
a58556f8d4
|
@ -34,6 +34,8 @@ Haben Sie eigene Vorstellungen? Falls nicht, dann orientieren Sie sich an [Dubli
|
|||
|
||||
## Anleitung zur Transformation von MARC21 mit OpenRefine
|
||||
|
||||
JSON-Transformationsregeln für Schritte 2 bis 5: [openrefine-marc.json](https://github.com/felixlohmeier/kurs-bibliotheks-und-archivinformatik/blob/master/openrefine/openrefine-marc.json)
|
||||
|
||||
### Step 1: Import MARC files
|
||||
|
||||
Two options:
|
||||
|
|
|
@ -0,0 +1,298 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"description": "Remove column record",
|
||||
"columnName": "record"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"description": "Remove column record - datafield",
|
||||
"columnName": "record - datafield"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"description": "Move column record - leader to position 0",
|
||||
"columnName": "record - leader",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - controlfield using expression grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - controlfield",
|
||||
"expression": "grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"description": "Remove column record - controlfield - tag",
|
||||
"columnName": "record - controlfield - tag"
|
||||
},
|
||||
{
|
||||
"op": "core/multivalued-cell-join",
|
||||
"description": "Join multi-valued cells in column record - controlfield",
|
||||
"columnName": "record - controlfield",
|
||||
"keyColumnName": "record - leader",
|
||||
"separator": "␞"
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"description": "Create column tmp at index 1 based on column record - leader using expression grel:\"\"",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"newColumnName": "tmp",
|
||||
"columnInsertIndex": 1,
|
||||
"baseColumnName": "record - leader",
|
||||
"expression": "grel:\"\"",
|
||||
"onError": "set-to-blank"
|
||||
},
|
||||
{
|
||||
"op": "core/transpose-columns-into-rows",
|
||||
"description": "Transpose cells in 2 column(s) starting with record - leader into rows in one new column named leader",
|
||||
"startColumnName": "record - leader",
|
||||
"columnCount": 2,
|
||||
"ignoreBlankCells": true,
|
||||
"fillDown": true,
|
||||
"combinedColumnName": "leader",
|
||||
"prependColumnName": false,
|
||||
"separator": ":"
|
||||
},
|
||||
{
|
||||
"op": "core/blank-down",
|
||||
"description": "Blank down cells in column record - controlfield",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - controlfield"
|
||||
},
|
||||
{
|
||||
"op": "core/column-split",
|
||||
"description": "Split column record - controlfield by separator",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - controlfield",
|
||||
"guessCellType": true,
|
||||
"removeOriginalColumn": true,
|
||||
"mode": "separator",
|
||||
"separator": "␞",
|
||||
"regex": false,
|
||||
"maxColumns": 0
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - datafield - ind1 using expression null",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": [
|
||||
{
|
||||
"omitError": false,
|
||||
"expression": "isBlank(value)",
|
||||
"selectBlank": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectError": false,
|
||||
"invert": false,
|
||||
"name": "leader",
|
||||
"omitBlank": false,
|
||||
"type": "list",
|
||||
"columnName": "leader"
|
||||
}
|
||||
]
|
||||
},
|
||||
"columnName": "record - datafield - ind1",
|
||||
"expression": "null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - datafield - ind2 using expression null",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": [
|
||||
{
|
||||
"omitError": false,
|
||||
"expression": "isBlank(value)",
|
||||
"selectBlank": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectError": false,
|
||||
"invert": false,
|
||||
"name": "leader",
|
||||
"omitBlank": false,
|
||||
"type": "list",
|
||||
"columnName": "leader"
|
||||
}
|
||||
]
|
||||
},
|
||||
"columnName": "record - datafield - ind2",
|
||||
"expression": "null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - datafield - subfield - code using expression null",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": [
|
||||
{
|
||||
"omitError": false,
|
||||
"expression": "isBlank(value)",
|
||||
"selectBlank": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectError": false,
|
||||
"invert": false,
|
||||
"name": "leader",
|
||||
"omitBlank": false,
|
||||
"type": "list",
|
||||
"columnName": "leader"
|
||||
}
|
||||
]
|
||||
},
|
||||
"columnName": "record - datafield - subfield - code",
|
||||
"expression": "null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/transpose-columns-into-rows",
|
||||
"description": "Transpose cells in columns starting with record - controlfield 1 into rows in one new column named tmp",
|
||||
"startColumnName": "record - controlfield 1",
|
||||
"columnCount": -1,
|
||||
"ignoreBlankCells": true,
|
||||
"fillDown": false,
|
||||
"combinedColumnName": "tmp",
|
||||
"prependColumnName": false,
|
||||
"separator": ":"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - datafield - tag using expression grel:cells[\"tmp\"].value.split(\"␟\")[0]",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - datafield - tag",
|
||||
"expression": "grel:cells[\"tmp\"].value.split(\"␟\")[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - datafield - subfield using expression grel:cells[\"tmp\"].value.split(\"␟\")[1]",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - datafield - subfield",
|
||||
"expression": "grel:cells[\"tmp\"].value.split(\"␟\")[1]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"description": "Remove column tmp",
|
||||
"columnName": "tmp"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"description": "Move column leader to position 5",
|
||||
"columnName": "leader",
|
||||
"index": 5
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"description": "Create column index at index 1 based on column record - datafield - tag using expression grel:forNonBlank(value,v,row.record.index,null)",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"newColumnName": "index",
|
||||
"columnInsertIndex": 1,
|
||||
"baseColumnName": "record - datafield - tag",
|
||||
"expression": "grel:forNonBlank(value,v,row.record.index,null)",
|
||||
"onError": "set-to-blank"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"description": "Move column index to position 0",
|
||||
"columnName": "index",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - datafield - ind1 using expression grel:row.record.cells[\"record - datafield - ind1\"].value[0]",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - datafield - ind1",
|
||||
"expression": "grel:row.record.cells[\"record - datafield - ind1\"].value[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"description": "Text transform on cells in column record - datafield - ind2 using expression grel:row.record.cells[\"record - datafield - ind2\"].value[0]",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - datafield - ind2",
|
||||
"expression": "grel:row.record.cells[\"record - datafield - ind2\"].value[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10
|
||||
},
|
||||
{
|
||||
"op": "core/fill-down",
|
||||
"description": "Fill down cells in column record - datafield - tag",
|
||||
"engineConfig": {
|
||||
"mode": "row-based",
|
||||
"facets": []
|
||||
},
|
||||
"columnName": "record - datafield - tag"
|
||||
},
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"description": "Move column leader to position 0",
|
||||
"columnName": "leader",
|
||||
"index": 0
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue