Updates kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md
Auto commit by GitBook Editor
This commit is contained in:
parent
35eb3c9f3f
commit
a58556f8d4
|
@ -34,6 +34,8 @@ Haben Sie eigene Vorstellungen? Falls nicht, dann orientieren Sie sich an [Dubli
|
||||||
|
|
||||||
## Anleitung zur Transformation von MARC21 mit OpenRefine
|
## Anleitung zur Transformation von MARC21 mit OpenRefine
|
||||||
|
|
||||||
|
JSON-Transformationsregeln für Schritte 2 bis 5: [openrefine-marc.json](https://github.com/felixlohmeier/kurs-bibliotheks-und-archivinformatik/blob/master/openrefine/openrefine-marc.json)
|
||||||
|
|
||||||
### Step 1: Import MARC files
|
### Step 1: Import MARC files
|
||||||
|
|
||||||
Two options:
|
Two options:
|
||||||
|
|
|
@ -0,0 +1,298 @@
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"op": "core/column-removal",
|
||||||
|
"description": "Remove column record",
|
||||||
|
"columnName": "record"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-removal",
|
||||||
|
"description": "Remove column record - datafield",
|
||||||
|
"columnName": "record - datafield"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-move",
|
||||||
|
"description": "Move column record - leader to position 0",
|
||||||
|
"columnName": "record - leader",
|
||||||
|
"index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - controlfield using expression grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - controlfield",
|
||||||
|
"expression": "grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-removal",
|
||||||
|
"description": "Remove column record - controlfield - tag",
|
||||||
|
"columnName": "record - controlfield - tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/multivalued-cell-join",
|
||||||
|
"description": "Join multi-valued cells in column record - controlfield",
|
||||||
|
"columnName": "record - controlfield",
|
||||||
|
"keyColumnName": "record - leader",
|
||||||
|
"separator": "␞"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-addition",
|
||||||
|
"description": "Create column tmp at index 1 based on column record - leader using expression grel:\"\"",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"newColumnName": "tmp",
|
||||||
|
"columnInsertIndex": 1,
|
||||||
|
"baseColumnName": "record - leader",
|
||||||
|
"expression": "grel:\"\"",
|
||||||
|
"onError": "set-to-blank"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/transpose-columns-into-rows",
|
||||||
|
"description": "Transpose cells in 2 column(s) starting with record - leader into rows in one new column named leader",
|
||||||
|
"startColumnName": "record - leader",
|
||||||
|
"columnCount": 2,
|
||||||
|
"ignoreBlankCells": true,
|
||||||
|
"fillDown": true,
|
||||||
|
"combinedColumnName": "leader",
|
||||||
|
"prependColumnName": false,
|
||||||
|
"separator": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/blank-down",
|
||||||
|
"description": "Blank down cells in column record - controlfield",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - controlfield"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-split",
|
||||||
|
"description": "Split column record - controlfield by separator",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - controlfield",
|
||||||
|
"guessCellType": true,
|
||||||
|
"removeOriginalColumn": true,
|
||||||
|
"mode": "separator",
|
||||||
|
"separator": "␞",
|
||||||
|
"regex": false,
|
||||||
|
"maxColumns": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - datafield - ind1 using expression null",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": [
|
||||||
|
{
|
||||||
|
"omitError": false,
|
||||||
|
"expression": "isBlank(value)",
|
||||||
|
"selectBlank": false,
|
||||||
|
"selection": [
|
||||||
|
{
|
||||||
|
"v": {
|
||||||
|
"v": false,
|
||||||
|
"l": "false"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"selectError": false,
|
||||||
|
"invert": false,
|
||||||
|
"name": "leader",
|
||||||
|
"omitBlank": false,
|
||||||
|
"type": "list",
|
||||||
|
"columnName": "leader"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - ind1",
|
||||||
|
"expression": "null",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - datafield - ind2 using expression null",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": [
|
||||||
|
{
|
||||||
|
"omitError": false,
|
||||||
|
"expression": "isBlank(value)",
|
||||||
|
"selectBlank": false,
|
||||||
|
"selection": [
|
||||||
|
{
|
||||||
|
"v": {
|
||||||
|
"v": false,
|
||||||
|
"l": "false"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"selectError": false,
|
||||||
|
"invert": false,
|
||||||
|
"name": "leader",
|
||||||
|
"omitBlank": false,
|
||||||
|
"type": "list",
|
||||||
|
"columnName": "leader"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - ind2",
|
||||||
|
"expression": "null",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - datafield - subfield - code using expression null",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": [
|
||||||
|
{
|
||||||
|
"omitError": false,
|
||||||
|
"expression": "isBlank(value)",
|
||||||
|
"selectBlank": false,
|
||||||
|
"selection": [
|
||||||
|
{
|
||||||
|
"v": {
|
||||||
|
"v": false,
|
||||||
|
"l": "false"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"selectError": false,
|
||||||
|
"invert": false,
|
||||||
|
"name": "leader",
|
||||||
|
"omitBlank": false,
|
||||||
|
"type": "list",
|
||||||
|
"columnName": "leader"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - subfield - code",
|
||||||
|
"expression": "null",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/transpose-columns-into-rows",
|
||||||
|
"description": "Transpose cells in columns starting with record - controlfield 1 into rows in one new column named tmp",
|
||||||
|
"startColumnName": "record - controlfield 1",
|
||||||
|
"columnCount": -1,
|
||||||
|
"ignoreBlankCells": true,
|
||||||
|
"fillDown": false,
|
||||||
|
"combinedColumnName": "tmp",
|
||||||
|
"prependColumnName": false,
|
||||||
|
"separator": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - datafield - tag using expression grel:cells[\"tmp\"].value.split(\"␟\")[0]",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - tag",
|
||||||
|
"expression": "grel:cells[\"tmp\"].value.split(\"␟\")[0]",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - datafield - subfield using expression grel:cells[\"tmp\"].value.split(\"␟\")[1]",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - subfield",
|
||||||
|
"expression": "grel:cells[\"tmp\"].value.split(\"␟\")[1]",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-removal",
|
||||||
|
"description": "Remove column tmp",
|
||||||
|
"columnName": "tmp"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-move",
|
||||||
|
"description": "Move column leader to position 5",
|
||||||
|
"columnName": "leader",
|
||||||
|
"index": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-addition",
|
||||||
|
"description": "Create column index at index 1 based on column record - datafield - tag using expression grel:forNonBlank(value,v,row.record.index,null)",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"newColumnName": "index",
|
||||||
|
"columnInsertIndex": 1,
|
||||||
|
"baseColumnName": "record - datafield - tag",
|
||||||
|
"expression": "grel:forNonBlank(value,v,row.record.index,null)",
|
||||||
|
"onError": "set-to-blank"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-move",
|
||||||
|
"description": "Move column index to position 0",
|
||||||
|
"columnName": "index",
|
||||||
|
"index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - datafield - ind1 using expression grel:row.record.cells[\"record - datafield - ind1\"].value[0]",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - ind1",
|
||||||
|
"expression": "grel:row.record.cells[\"record - datafield - ind1\"].value[0]",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/text-transform",
|
||||||
|
"description": "Text transform on cells in column record - datafield - ind2 using expression grel:row.record.cells[\"record - datafield - ind2\"].value[0]",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - ind2",
|
||||||
|
"expression": "grel:row.record.cells[\"record - datafield - ind2\"].value[0]",
|
||||||
|
"onError": "keep-original",
|
||||||
|
"repeat": false,
|
||||||
|
"repeatCount": 10
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/fill-down",
|
||||||
|
"description": "Fill down cells in column record - datafield - tag",
|
||||||
|
"engineConfig": {
|
||||||
|
"mode": "row-based",
|
||||||
|
"facets": []
|
||||||
|
},
|
||||||
|
"columnName": "record - datafield - tag"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"op": "core/column-move",
|
||||||
|
"description": "Move column leader to position 0",
|
||||||
|
"columnName": "leader",
|
||||||
|
"index": 0
|
||||||
|
}
|
||||||
|
]
|
Loading…
Reference in New Issue