From 18736d5ab17aedb0999726ff1b004cbc40a0445b Mon Sep 17 00:00:00 2001 From: Felix Lohmeier Date: Tue, 16 May 2017 09:12:02 +0000 Subject: [PATCH] Updates anwendungsfall-marc21/vorverarbeitung-mit-marcedit-und-openrefine.md Auto commit by GitBook Editor --- ...erarbeitung-mit-marcedit-und-openrefine.md | 5 + openrefine/marc.json | 163 +++++++++++ openrefine/marc_alternativ.json | 255 ++++++++++++++++++ 3 files changed, 423 insertions(+) create mode 100644 openrefine/marc.json create mode 100644 openrefine/marc_alternativ.json diff --git a/anwendungsfall-marc21/vorverarbeitung-mit-marcedit-und-openrefine.md b/anwendungsfall-marc21/vorverarbeitung-mit-marcedit-und-openrefine.md index a97ae62..8a5938b 100644 --- a/anwendungsfall-marc21/vorverarbeitung-mit-marcedit-und-openrefine.md +++ b/anwendungsfall-marc21/vorverarbeitung-mit-marcedit-und-openrefine.md @@ -35,5 +35,10 @@ Führen Sie folgende Transformationsschritte in OpenRefine durch: * Show: 5 rows * Show as: records +## Optional: Transformationsschritte als JSON-Konfiguration + +* Alle Transformationsschritte oben als JSON-Konfiguration: [openrefine/marc.json](/openrefine/marc.json) +* In der Summerschool erarbeitete Alternativlösung: [openrefine/marc\_alternativ.json](/openrefine/marc_alternativ.json) + diff --git a/openrefine/marc.json b/openrefine/marc.json new file mode 100644 index 0000000..9d9907f --- /dev/null +++ b/openrefine/marc.json @@ -0,0 +1,163 @@ +[ + { + "op": "core/column-removal", + "description": "Remove column Column", + "columnName": "Column" + }, + { + "op": "core/column-addition", + "description": "Create column Subfields at index 4 based on column Content using expression grel:forEach(value.split(\"$\"),v,get(v,0)).join(\"$\")", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "mode": "text", + "caseSensitive": false, + "query": "$", + "name": "Content", + "type": "text", + "columnName": "Content" + } + ] + }, + "newColumnName": "Subfields", + "columnInsertIndex": 4, + "baseColumnName": "Content", + "expression": "grel:forEach(value.split(\"$\"),v,get(v,0)).join(\"$\")", + "onError": "set-to-blank" + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column Content using expression grel:forEach(value.split(\"$\"),v,slice(v,1)).join(\"$\")", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "mode": "text", + "caseSensitive": false, + "query": "$", + "name": "Content", + "type": "text", + "columnName": "Content" + } + ] + }, + "columnName": "Content", + "expression": "grel:forEach(value.split(\"$\"),v,slice(v,1)).join(\"$\")", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/multivalued-cell-split", + "description": "Split multi-valued cells in column Content", + "columnName": "Content", + "keyColumnName": "RecordNumber", + "separator": "$", + "mode": "plain" + }, + { + "op": "core/multivalued-cell-split", + "description": "Split multi-valued cells in column Subfields", + "columnName": "Subfields", + "keyColumnName": "RecordNumber", + "separator": "$", + "mode": "plain" + }, + { + "op": "core/fill-down", + "description": "Fill down cells in column RecordNumber", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "isBlank(value)", + "selectBlank": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectError": false, + "invert": false, + "name": "Subfields", + "omitBlank": false, + "type": "list", + "columnName": "Subfields" + } + ] + }, + "columnName": "RecordNumber" + }, + { + "op": "core/fill-down", + "description": "Fill down cells in column Tags", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "isBlank(value)", + "selectBlank": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectError": false, + "invert": false, + "name": "Subfields", + "omitBlank": false, + "type": "list", + "columnName": "Subfields" + } + ] + }, + "columnName": "Tags" + }, + { + "op": "core/fill-down", + "description": "Fill down cells in column Indicators", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "isBlank(value)", + "selectBlank": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectError": false, + "invert": false, + "name": "Subfields", + "omitBlank": false, + "type": "list", + "columnName": "Subfields" + } + ] + }, + "columnName": "Indicators" + }, + { + "op": "core/blank-down", + "description": "Blank down cells in column RecordNumber", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "RecordNumber" + } +] diff --git a/openrefine/marc_alternativ.json b/openrefine/marc_alternativ.json new file mode 100644 index 0000000..2eb2fb0 --- /dev/null +++ b/openrefine/marc_alternativ.json @@ -0,0 +1,255 @@ +[ + { + "op": "core/column-removal", + "description": "Remove column Column", + "columnName": "Column" + }, + { + "op": "core/row-star", + "description": "Star rows", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "mode": "text", + "caseSensitive": false, + "query": "$", + "name": "Content", + "type": "text", + "columnName": "Content" + } + ] + }, + "starred": true + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column Content using expression grel:value.slice(1)", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "mode": "text", + "caseSensitive": false, + "query": "$", + "name": "Content", + "type": "text", + "columnName": "Content" + } + ] + }, + "columnName": "Content", + "expression": "grel:value.slice(1)", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/multivalued-cell-split", + "description": "Split multi-valued cells in column Content", + "columnName": "Content", + "keyColumnName": "RecordNumber", + "separator": "$", + "mode": "plain" + }, + { + "op": "core/row-star", + "description": "Star rows", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "isBlank(value)", + "selectBlank": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectError": false, + "invert": false, + "name": "RecordNumber", + "omitBlank": false, + "type": "list", + "columnName": "RecordNumber" + } + ] + }, + "starred": true + }, + { + "op": "core/column-addition", + "description": "Create column Subfields at index 4 based on column Content using expression grel:value.get(0)", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "row.starred", + "selectBlank": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectError": false, + "invert": false, + "name": "Starred Rows", + "omitBlank": false, + "type": "list", + "columnName": "" + } + ] + }, + "newColumnName": "Subfields", + "columnInsertIndex": 4, + "baseColumnName": "Content", + "expression": "grel:value.get(0)", + "onError": "set-to-blank" + }, + { + "op": "core/text-transform", + "description": "Text transform on cells in column Content using expression grel:value.slice(1)", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "row.starred", + "selectBlank": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectError": false, + "invert": false, + "name": "Starred Rows", + "omitBlank": false, + "type": "list", + "columnName": "" + } + ] + }, + "columnName": "Content", + "expression": "grel:value.slice(1)", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/fill-down", + "description": "Fill down cells in column RecordNumber", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "row.starred", + "selectBlank": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectError": false, + "invert": false, + "name": "Starred Rows", + "omitBlank": false, + "type": "list", + "columnName": "" + } + ] + }, + "columnName": "RecordNumber" + }, + { + "op": "core/fill-down", + "description": "Fill down cells in column Tags", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "row.starred", + "selectBlank": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectError": false, + "invert": false, + "name": "Starred Rows", + "omitBlank": false, + "type": "list", + "columnName": "" + } + ] + }, + "columnName": "Tags" + }, + { + "op": "core/fill-down", + "description": "Fill down cells in column Indicators", + "engineConfig": { + "mode": "row-based", + "facets": [ + { + "omitError": false, + "expression": "row.starred", + "selectBlank": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectError": false, + "invert": false, + "name": "Starred Rows", + "omitBlank": false, + "type": "list", + "columnName": "" + } + ] + }, + "columnName": "Indicators" + }, + { + "op": "core/row-star", + "description": "Unstar rows", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "starred": false + }, + { + "op": "core/blank-down", + "description": "Blank down cells in column RecordNumber", + "engineConfig": { + "mode": "row-based", + "facets": [] + }, + "columnName": "RecordNumber" + } +]