Updates anwendungsfall-marc21/vorverarbeitung-mit-marcedit-und-openrefine.md

Auto commit by GitBook Editor
This commit is contained in:
Felix Lohmeier 2017-05-16 09:12:02 +00:00
parent 3705523fca
commit 18736d5ab1
3 changed files with 423 additions and 0 deletions

View File

@ -35,5 +35,10 @@ Führen Sie folgende Transformationsschritte in OpenRefine durch:
* Show: 5 rows
* Show as: records
## Optional: Transformationsschritte als JSON-Konfiguration
* Alle Transformationsschritte oben als JSON-Konfiguration: [openrefine/marc.json](/openrefine/marc.json)
* In der Summerschool erarbeitete Alternativlösung: [openrefine/marc\_alternativ.json](/openrefine/marc_alternativ.json)

163
openrefine/marc.json Normal file
View File

@ -0,0 +1,163 @@
[
{
"op": "core/column-removal",
"description": "Remove column Column",
"columnName": "Column"
},
{
"op": "core/column-addition",
"description": "Create column Subfields at index 4 based on column Content using expression grel:forEach(value.split(\"$\"),v,get(v,0)).join(\"$\")",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "$",
"name": "Content",
"type": "text",
"columnName": "Content"
}
]
},
"newColumnName": "Subfields",
"columnInsertIndex": 4,
"baseColumnName": "Content",
"expression": "grel:forEach(value.split(\"$\"),v,get(v,0)).join(\"$\")",
"onError": "set-to-blank"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Content using expression grel:forEach(value.split(\"$\"),v,slice(v,1)).join(\"$\")",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "$",
"name": "Content",
"type": "text",
"columnName": "Content"
}
]
},
"columnName": "Content",
"expression": "grel:forEach(value.split(\"$\"),v,slice(v,1)).join(\"$\")",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/multivalued-cell-split",
"description": "Split multi-valued cells in column Content",
"columnName": "Content",
"keyColumnName": "RecordNumber",
"separator": "$",
"mode": "plain"
},
{
"op": "core/multivalued-cell-split",
"description": "Split multi-valued cells in column Subfields",
"columnName": "Subfields",
"keyColumnName": "RecordNumber",
"separator": "$",
"mode": "plain"
},
{
"op": "core/fill-down",
"description": "Fill down cells in column RecordNumber",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "Subfields",
"omitBlank": false,
"type": "list",
"columnName": "Subfields"
}
]
},
"columnName": "RecordNumber"
},
{
"op": "core/fill-down",
"description": "Fill down cells in column Tags",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "Subfields",
"omitBlank": false,
"type": "list",
"columnName": "Subfields"
}
]
},
"columnName": "Tags"
},
{
"op": "core/fill-down",
"description": "Fill down cells in column Indicators",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectError": false,
"invert": false,
"name": "Subfields",
"omitBlank": false,
"type": "list",
"columnName": "Subfields"
}
]
},
"columnName": "Indicators"
},
{
"op": "core/blank-down",
"description": "Blank down cells in column RecordNumber",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "RecordNumber"
}
]

View File

@ -0,0 +1,255 @@
[
{
"op": "core/column-removal",
"description": "Remove column Column",
"columnName": "Column"
},
{
"op": "core/row-star",
"description": "Star rows",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "$",
"name": "Content",
"type": "text",
"columnName": "Content"
}
]
},
"starred": true
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Content using expression grel:value.slice(1)",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"mode": "text",
"caseSensitive": false,
"query": "$",
"name": "Content",
"type": "text",
"columnName": "Content"
}
]
},
"columnName": "Content",
"expression": "grel:value.slice(1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/multivalued-cell-split",
"description": "Split multi-valued cells in column Content",
"columnName": "Content",
"keyColumnName": "RecordNumber",
"separator": "$",
"mode": "plain"
},
{
"op": "core/row-star",
"description": "Star rows",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "isBlank(value)",
"selectBlank": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectError": false,
"invert": false,
"name": "RecordNumber",
"omitBlank": false,
"type": "list",
"columnName": "RecordNumber"
}
]
},
"starred": true
},
{
"op": "core/column-addition",
"description": "Create column Subfields at index 4 based on column Content using expression grel:value.get(0)",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "row.starred",
"selectBlank": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectError": false,
"invert": false,
"name": "Starred Rows",
"omitBlank": false,
"type": "list",
"columnName": ""
}
]
},
"newColumnName": "Subfields",
"columnInsertIndex": 4,
"baseColumnName": "Content",
"expression": "grel:value.get(0)",
"onError": "set-to-blank"
},
{
"op": "core/text-transform",
"description": "Text transform on cells in column Content using expression grel:value.slice(1)",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "row.starred",
"selectBlank": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectError": false,
"invert": false,
"name": "Starred Rows",
"omitBlank": false,
"type": "list",
"columnName": ""
}
]
},
"columnName": "Content",
"expression": "grel:value.slice(1)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/fill-down",
"description": "Fill down cells in column RecordNumber",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "row.starred",
"selectBlank": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectError": false,
"invert": false,
"name": "Starred Rows",
"omitBlank": false,
"type": "list",
"columnName": ""
}
]
},
"columnName": "RecordNumber"
},
{
"op": "core/fill-down",
"description": "Fill down cells in column Tags",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "row.starred",
"selectBlank": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectError": false,
"invert": false,
"name": "Starred Rows",
"omitBlank": false,
"type": "list",
"columnName": ""
}
]
},
"columnName": "Tags"
},
{
"op": "core/fill-down",
"description": "Fill down cells in column Indicators",
"engineConfig": {
"mode": "row-based",
"facets": [
{
"omitError": false,
"expression": "row.starred",
"selectBlank": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectError": false,
"invert": false,
"name": "Starred Rows",
"omitBlank": false,
"type": "list",
"columnName": ""
}
]
},
"columnName": "Indicators"
},
{
"op": "core/row-star",
"description": "Unstar rows",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"starred": false
},
{
"op": "core/blank-down",
"description": "Blank down cells in column RecordNumber",
"engineConfig": {
"mode": "row-based",
"facets": []
},
"columnName": "RecordNumber"
}
]