Updates kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md

Auto commit by GitBook Editor
This commit is contained in:
Felix Lohmeier 2017-11-02 14:13:28 +00:00
parent 35eb3c9f3f
commit a58556f8d4
2 changed files with 300 additions and 0 deletions

View File

@ -34,6 +34,8 @@ Haben Sie eigene Vorstellungen? Falls nicht, dann orientieren Sie sich an [Dubli
## Anleitung zur Transformation von MARC21 mit OpenRefine ## Anleitung zur Transformation von MARC21 mit OpenRefine
JSON-Transformationsregeln für Schritte 2 bis 5: [openrefine-marc.json](https://github.com/felixlohmeier/kurs-bibliotheks-und-archivinformatik/blob/master/openrefine/openrefine-marc.json)
### Step 1: Import MARC files ### Step 1: Import MARC files
Two options: Two options:

View File

@ -0,0 +1,298 @@
[
{
"op": "core/column-removal",
"description": "Remove column record",
"columnName": "record"
},
{
"op": "core/column-removal",
"description": "Remove column record - datafield",
"columnName": "record - datafield"
},
{
"op": "core/column-move",
"description": "Move column record - leader to position 0",
"columnName": "record - leader",
"index": 0
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - controlfield using expression grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - controlfield",
"expression": "grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"description": "Remove column record - controlfield - tag",
"columnName": "record - controlfield - tag"
},
{
"op": "core/multivalued-cell-join",
"description": "Join multi-valued cells in column record - controlfield",
"columnName": "record - controlfield",
"keyColumnName": "record - leader",
"separator": "␞"
},
{
"op": "core/column-addition",
"description": "Create column tmp at index 1 based on column record - leader using expression grel:\"\"",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"newColumnName": "tmp",
"columnInsertIndex": 1,
"baseColumnName": "record - leader",
"expression": "grel:\"\"",
"onError": "set-to-blank"
},
{
"op": "core/transpose-columns-into-rows",
"description": "Transpose cells in 2 column(s) starting with record - leader into rows in one new column named leader",
"startColumnName": "record - leader",
"columnCount": 2,
"ignoreBlankCells": true,
"fillDown": true,
"combinedColumnName": "leader",
"prependColumnName": false,
"separator": ":"
},
{
"op": "core/blank-down",
"description": "Blank down cells in column record - controlfield",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - controlfield"
},
{
"op": "core/column-split",
"description": "Split column record - controlfield by separator",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - controlfield",
"guessCellType": true,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "␞",
"regex": false,
"maxColumns": 0
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - datafield - ind1 using expression null",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "leader",
"omitBlank": false,
"type": "list",
"columnName": "leader"
}
]
},
"columnName": "record - datafield - ind1",
"expression": "null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - datafield - ind2 using expression null",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "leader",
"omitBlank": false,
"type": "list",
"columnName": "leader"
}
]
},
"columnName": "record - datafield - ind2",
"expression": "null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - datafield - subfield - code using expression null",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "leader",
"omitBlank": false,
"type": "list",
"columnName": "leader"
}
]
},
"columnName": "record - datafield - subfield - code",
"expression": "null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/transpose-columns-into-rows",
"description": "Transpose cells in columns starting with record - controlfield 1 into rows in one new column named tmp",
"startColumnName": "record - controlfield 1",
"columnCount": -1,
"ignoreBlankCells": true,
"fillDown": false,
"combinedColumnName": "tmp",
"prependColumnName": false,
"separator": ":"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - datafield - tag using expression grel:cells[\"tmp\"].value.split(\"␟\")[0]",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - datafield - tag",
"expression": "grel:cells[\"tmp\"].value.split(\"␟\")[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - datafield - subfield using expression grel:cells[\"tmp\"].value.split(\"␟\")[1]",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - datafield - subfield",
"expression": "grel:cells[\"tmp\"].value.split(\"␟\")[1]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"description": "Remove column tmp",
"columnName": "tmp"
},
{
"op": "core/column-move",
"description": "Move column leader to position 5",
"columnName": "leader",
"index": 5
},
{
"op": "core/column-addition",
"description": "Create column index at index 1 based on column record - datafield - tag using expression grel:forNonBlank(value,v,row.record.index,null)",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"newColumnName": "index",
"columnInsertIndex": 1,
"baseColumnName": "record - datafield - tag",
"expression": "grel:forNonBlank(value,v,row.record.index,null)",
"onError": "set-to-blank"
},
{
"op": "core/column-move",
"description": "Move column index to position 0",
"columnName": "index",
"index": 0
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - datafield - ind1 using expression grel:row.record.cells[\"record - datafield - ind1\"].value[0]",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - datafield - ind1",
"expression": "grel:row.record.cells[\"record - datafield - ind1\"].value[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column record - datafield - ind2 using expression grel:row.record.cells[\"record - datafield - ind2\"].value[0]",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - datafield - ind2",
"expression": "grel:row.record.cells[\"record - datafield - ind2\"].value[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/fill-down",
"description": "Fill down cells in column record - datafield - tag",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "record - datafield - tag"
},
{
"op": "core/column-move",
"description": "Move column leader to position 0",
"columnName": "leader",
"index": 0
}
]