From a58556f8d4d1911dfa3b1739d83c37aefe175778 Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Thu, 2 Nov 2017 14:13:28 +0000 Subject: [PATCH] Updates kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md Auto commit by GitBook Editor --- ...-verarbeitung-von-marc21-mit-openrefine.md | 2 + openrefine/openrefine-marc.json | 298 ++++++++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 openrefine/openrefine-marc.json diff --git a/kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md b/kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md index 2656f8e..0ca392b 100644 --- a/kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md +++ b/kapitel-3/35-verarbeitung-von-marc21-mit-openrefine.md @@ -34,6 +34,8 @@ Haben Sie eigene Vorstellungen? Falls nicht, dann orientieren Sie sich an [Dubli ## Anleitung zur Transformation von MARC21 mit OpenRefine +JSON-Transformationsregeln für Schritte 2 bis 5: [openrefine-marc.json](https://github.com/felixlohmeier/kurs-bibliotheks-und-archivinformatik/blob/master/openrefine/openrefine-marc.json) + ### Step 1: Import MARC files Two options: diff --git a/openrefine/openrefine-marc.json b/openrefine/openrefine-marc.json new file mode 100644 index 0000000..5538b27 --- /dev/null +++ b/openrefine/openrefine-marc.json @@ -0,0 +1,298 @@ +[ + { + "op": "core/column-removal", + "description": "Remove column record", + "columnName": "record" + }, + { + "op": "core/column-removal", + "description": "Remove column record - datafield", + "columnName": "record - datafield" + }, + { + "op": "core/column-move", + "description": "Move column record - leader to position 0", + "columnName": "record - leader", + "index": 0 + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - controlfield using expression grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - controlfield", + "expression": "grel:cells[\"record - controlfield - tag\"].value + \"␟\" + value", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/column-removal", + "description": "Remove column record - controlfield - tag", + "columnName": "record - controlfield - tag" + }, + { + "op": "core/multivalued-cell-join", + "description": "Join multi-valued cells in column record - controlfield", + "columnName": "record - controlfield", + "keyColumnName": "record - leader", + "separator": "␞" + }, + { + "op": "core/column-addition", + "description": "Create column tmp at index 1 based on column record - leader using expression grel:\"\"", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "newColumnName": "tmp", + "columnInsertIndex": 1, + "baseColumnName": "record - leader", + "expression": "grel:\"\"", + "onError": "set-to-blank" + }, + { + "op": "core/transpose-columns-into-rows", + "description": "Transpose cells in 2 column(s) starting with record - leader into rows in one new column named leader", + "startColumnName": "record - leader", + "columnCount": 2, + "ignoreBlankCells": true, + "fillDown": true, + "combinedColumnName": "leader", + "prependColumnName": false, + "separator": ":" + }, + { + "op": "core/blank-down", + "description": "Blank down cells in column record - controlfield", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - controlfield" + }, + { + "op": "core/column-split", + "description": "Split column record - controlfield by separator", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - controlfield", + "guessCellType": true, + "removeOriginalColumn": true, + "mode": "separator", + "separator": "␞", + "regex": false, + "maxColumns": 0 + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - datafield - ind1 using expression null", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "isBlank(value)", + "selectBlank": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectError": false, + "invert": false, + "name": "leader", + "omitBlank": false, + "type": "list", + "columnName": "leader" + } + ] + }, + "columnName": "record - datafield - ind1", + "expression": "null", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - datafield - ind2 using expression null", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "isBlank(value)", + "selectBlank": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectError": false, + "invert": false, + "name": "leader", + "omitBlank": false, + "type": "list", + "columnName": "leader" + } + ] + }, + "columnName": "record - datafield - ind2", + "expression": "null", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - datafield - subfield - code using expression null", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "isBlank(value)", + "selectBlank": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectError": false, + "invert": false, + "name": "leader", + "omitBlank": false, + "type": "list", + "columnName": "leader" + } + ] + }, + "columnName": "record - datafield - subfield - code", + "expression": "null", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/transpose-columns-into-rows", + "description": "Transpose cells in columns starting with record - controlfield 1 into rows in one new column named tmp", + "startColumnName": "record - controlfield 1", + "columnCount": -1, + "ignoreBlankCells": true, + "fillDown": false, + "combinedColumnName": "tmp", + "prependColumnName": false, + "separator": ":" + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - datafield - tag using expression grel:cells[\"tmp\"].value.split(\"␟\")[0]", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - datafield - tag", + "expression": "grel:cells[\"tmp\"].value.split(\"␟\")[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - datafield - subfield using expression grel:cells[\"tmp\"].value.split(\"␟\")[1]", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - datafield - subfield", + "expression": "grel:cells[\"tmp\"].value.split(\"␟\")[1]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/column-removal", + "description": "Remove column tmp", + "columnName": "tmp" + }, + { + "op": "core/column-move", + "description": "Move column leader to position 5", + "columnName": "leader", + "index": 5 + }, + { + "op": "core/column-addition", + "description": "Create column index at index 1 based on column record - datafield - tag using expression grel:forNonBlank(value,v,row.record.index,null)", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "newColumnName": "index", + "columnInsertIndex": 1, + "baseColumnName": "record - datafield - tag", + "expression": "grel:forNonBlank(value,v,row.record.index,null)", + "onError": "set-to-blank" + }, + { + "op": "core/column-move", + "description": "Move column index to position 0", + "columnName": "index", + "index": 0 + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - datafield - ind1 using expression grel:row.record.cells[\"record - datafield - ind1\"].value[0]", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - datafield - ind1", + "expression": "grel:row.record.cells[\"record - datafield - ind1\"].value[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column record - datafield - ind2 using expression grel:row.record.cells[\"record - datafield - ind2\"].value[0]", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - datafield - ind2", + "expression": "grel:row.record.cells[\"record - datafield - ind2\"].value[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/fill-down", + "description": "Fill down cells in column record - datafield - tag", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "record - datafield - tag" + }, + { + "op": "core/column-move", + "description": "Move column leader to position 0", + "columnName": "leader", + "index": 0 + } +]