diff --git a/Taskfile.yml b/Taskfile.yml
index 098f3d1..fd0083a 100644
--- a/Taskfile.yml
+++ b/Taskfile.yml
@@ -2,12 +2,11 @@
version: '3'
-#silent: true
output: prefixed
includes:
siegen: ./tasks/siegen.yml
-# wuppertal: ./tasks/wuppertal.yml
+ wuppertal: ./tasks/wuppertal.yml
vars:
DATE: '{{ now | date "2006-01-02"}}'
@@ -31,5 +30,5 @@ tasks:
- sh: test -x "$OPENREFINE_CLIENT"
msg: "requirement openrefine-client missing"
deps:
-# - task: wuppertal:default
+ - task: wuppertal:default
- task: siegen:default
diff --git a/rules/wuppertal/ddc.json b/rules/wuppertal/ddc.json
new file mode 100644
index 0000000..7892fda
--- /dev/null
+++ b/rules/wuppertal/ddc.json
@@ -0,0 +1,79 @@
+[
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "text",
+ "name": "dc:subject",
+ "columnName": "dc:subject",
+ "query": "^\\d\\d\\d",
+ "mode": "regex",
+ "caseSensitive": false,
+ "invert": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "columnName": "dc:subject",
+ "expression": "grel:null",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:subject using expression grel:null"
+ },
+ {
+ "op": "core/row-removal",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "Blank Rows",
+ "expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
+ "columnName": "",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": "true",
+ "l": "true"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "description": "Remove rows"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "setSpec",
+ "expression": "grel:value.split(':').reverse()[0]",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column setSpec using expression grel:value.split(':').reverse()[0]"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "setSpec",
+ "expression": "grel:value + '000'[0,3-value.length()]",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column setSpec using expression grel:value + '000'[0,3-value.length()]"
+ }
+]
diff --git a/rules/wuppertal/hbz.json b/rules/wuppertal/hbz.json
new file mode 100644
index 0000000..6649d3a
--- /dev/null
+++ b/rules/wuppertal/hbz.json
@@ -0,0 +1,49 @@
+[
+ {
+ "op": "core/column-addition-by-fetching-urls",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "baseColumnName": "urn",
+ "urlExpression": "grel:'https://lobid.org/resources/search?q=urn:\\\"' + value + '\\\"'",
+ "onError": "set-to-blank",
+ "newColumnName": "lobid",
+ "columnInsertIndex": 5,
+ "delay": 0,
+ "cacheResponses": true,
+ "httpHeadersJson": [
+ {
+ "name": "authorization",
+ "value": ""
+ },
+ {
+ "name": "user-agent",
+ "value": "OpenRefine 3.4 [6443506]"
+ },
+ {
+ "name": "accept",
+ "value": "*/*"
+ }
+ ],
+ "description": "Create column lobid at index 5 by fetching URLs based on column urn using expression grel:'https://lobid.org/resources/search?q=urn:\\\"' + value + '\\\"'"
+ },
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "baseColumnName": "lobid",
+ "expression": "grel:value.parseJson().member[0].hbzId",
+ "onError": "set-to-blank",
+ "newColumnName": "hbz",
+ "columnInsertIndex": 6,
+ "description": "Create column hbz at index 6 based on column lobid using expression grel:value.parseJson().member[0].hbzId"
+ },
+ {
+ "op": "core/column-removal",
+ "columnName": "lobid",
+ "description": "Remove column lobid"
+ }
+]
diff --git a/rules/wuppertal/html.json b/rules/wuppertal/html.json
new file mode 100644
index 0000000..935329c
--- /dev/null
+++ b/rules/wuppertal/html.json
@@ -0,0 +1,119 @@
+[
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:description",
+ "expression": "grel:value.replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:description using expression grel:value.replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:value.replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:value.replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:source",
+ "expression": "grel:value.replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:source using expression grel:value.replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','').replace('','')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:value.\nreplace('0','₀').\nreplace('1','₁').\nreplace('2','₂').\nreplace('3','₃').\nreplace('4','₄').\nreplace('5','₅').\nreplace('6','₆').\nreplace('7','₇').\nreplace('8','₈').\nreplace('9','₉').\nreplace('+','₊').\nreplace('-','₋').\nreplace('=','₌').\nreplace('(','₍').\nreplace(')','₎').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('o','ₒ').\nreplace('x','ₓ').\nreplace('ə','ₔ').\nreplace('h','ₕ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('p','ₚ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('h','ₕ').\nreplace('i','ᵢ').\nreplace('j','ⱼ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('o','ₒ').\nreplace('p','ₚ').\nreplace('r','ᵣ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('u','ᵤ').\nreplace('v','ᵥ').\nreplace('x','ₓ').\nreplace('β','ᵦ').\nreplace('γ','ᵧ').\nreplace('ρ','ᵨ').\nreplace('φ','ᵩ').\nreplace('χ','ᵪ').\nreplace('0','⁰').\nreplace('1','¹').\nreplace('2','²').\nreplace('3','³').\nreplace('4','⁴').\nreplace('5','⁵').\nreplace('6','⁶').\nreplace('7','⁷').\nreplace('8','⁸').\nreplace('9','⁹').\nreplace('+','⁺').\nreplace('-','⁻').\nreplace('=','⁼').\nreplace('(','⁽').\nreplace(')','⁾').\nreplace('n','ⁿ').\nreplace('i','ⁱ').\nreplace('A','ᴬ').\nreplace('B','ᴮ').\nreplace('D','ᴰ').\nreplace('E','ᴱ').\nreplace('G','ᴳ').\nreplace('H','ᴴ').\nreplace('I','ᴵ').\nreplace('J','ᴶ').\nreplace('K','ᴷ').\nreplace('L','ᴸ').\nreplace('M','ᴹ').\nreplace('N','ᴺ').\nreplace('O','ᴼ').\nreplace('P','ᴾ').\nreplace('R','ᴿ').\nreplace('T','ᵀ').\nreplace('U','ᵁ').\nreplace('V','ⱽ').\nreplace('W','ᵂ').\nreplace('a','ᵃ').\nreplace('b','ᵇ').\nreplace('c','ᶜ').\nreplace('d','ᵈ').\nreplace('e','ᵉ').\nreplace('f','ᶠ').\nreplace('g','ᵍ').\nreplace('h','ʰ').\nreplace('i','ⁱ').\nreplace('j','ʲ').\nreplace('k','ᵏ').\nreplace('l','ˡ').\nreplace('m','ᵐ').\nreplace('n','ⁿ').\nreplace('o','ᵒ').\nreplace('p','ᵖ').\nreplace('r','ʳ').\nreplace('s','ˢ').\nreplace('t','ᵗ').\nreplace('u','ᵘ').\nreplace('v','ᵛ').\nreplace('w','ʷ').\nreplace('x','ˣ').\nreplace('y','ʸ').\nreplace('z','ᶻ').\nreplace('β','ᵝ').\nreplace('γ','ᵞ').\nreplace('δ','ᵟ').\nreplace('ε','ᵋ').\nreplace('θ','ᶿ').\nreplace('ι','ᶥ').\nreplace('υ','ᶹ').\nreplace('φ','ᵠ').\nreplace('χ','ᵡ')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:value.\nreplace('0','₀').\nreplace('1','₁').\nreplace('2','₂').\nreplace('3','₃').\nreplace('4','₄').\nreplace('5','₅').\nreplace('6','₆').\nreplace('7','₇').\nreplace('8','₈').\nreplace('9','₉').\nreplace('+','₊').\nreplace('-','₋').\nreplace('=','₌').\nreplace('(','₍').\nreplace(')','₎').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('o','ₒ').\nreplace('x','ₓ').\nreplace('ə','ₔ').\nreplace('h','ₕ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('p','ₚ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('h','ₕ').\nreplace('i','ᵢ').\nreplace('j','ⱼ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('o','ₒ').\nreplace('p','ₚ').\nreplace('r','ᵣ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('u','ᵤ').\nreplace('v','ᵥ').\nreplace('x','ₓ').\nreplace('β','ᵦ').\nreplace('γ','ᵧ').\nreplace('ρ','ᵨ').\nreplace('φ','ᵩ').\nreplace('χ','ᵪ').\nreplace('0','⁰').\nreplace('1','¹').\nreplace('2','²').\nreplace('3','³').\nreplace('4','⁴').\nreplace('5','⁵').\nreplace('6','⁶').\nreplace('7','⁷').\nreplace('8','⁸').\nreplace('9','⁹').\nreplace('+','⁺').\nreplace('-','⁻').\nreplace('=','⁼').\nreplace('(','⁽').\nreplace(')','⁾').\nreplace('n','ⁿ').\nreplace('i','ⁱ').\nreplace('A','ᴬ').\nreplace('B','ᴮ').\nreplace('D','ᴰ').\nreplace('E','ᴱ').\nreplace('G','ᴳ').\nreplace('H','ᴴ').\nreplace('I','ᴵ').\nreplace('J','ᴶ').\nreplace('K','ᴷ').\nreplace('L','ᴸ').\nreplace('M','ᴹ').\nreplace('N','ᴺ').\nreplace('O','ᴼ').\nreplace('P','ᴾ').\nreplace('R','ᴿ').\nreplace('T','ᵀ').\nreplace('U','ᵁ').\nreplace('V','ⱽ').\nreplace('W','ᵂ').\nreplace('a','ᵃ').\nreplace('b','ᵇ').\nreplace('c','ᶜ').\nreplace('d','ᵈ').\nreplace('e','ᵉ').\nreplace('f','ᶠ').\nreplace('g','ᵍ').\nreplace('h','ʰ').\nreplace('i','ⁱ').\nreplace('j','ʲ').\nreplace('k','ᵏ').\nreplace('l','ˡ').\nreplace('m','ᵐ').\nreplace('n','ⁿ').\nreplace('o','ᵒ').\nreplace('p','ᵖ').\nreplace('r','ʳ').\nreplace('s','ˢ').\nreplace('t','ᵗ').\nreplace('u','ᵘ').\nreplace('v','ᵛ').\nreplace('w','ʷ').\nreplace('x','ˣ').\nreplace('y','ʸ').\nreplace('z','ᶻ').\nreplace('β','ᵝ').\nreplace('γ','ᵞ').\nreplace('δ','ᵟ').\nreplace('ε','ᵋ').\nreplace('θ','ᶿ').\nreplace('ι','ᶥ').\nreplace('υ','ᶹ').\nreplace('φ','ᵠ').\nreplace('χ','ᵡ')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:description",
+ "expression": "grel:value.\nreplace('0','₀').\nreplace('1','₁').\nreplace('2','₂').\nreplace('3','₃').\nreplace('4','₄').\nreplace('5','₅').\nreplace('6','₆').\nreplace('7','₇').\nreplace('8','₈').\nreplace('9','₉').\nreplace('+','₊').\nreplace('-','₋').\nreplace('=','₌').\nreplace('(','₍').\nreplace(')','₎').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('o','ₒ').\nreplace('x','ₓ').\nreplace('ə','ₔ').\nreplace('h','ₕ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('p','ₚ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('h','ₕ').\nreplace('i','ᵢ').\nreplace('j','ⱼ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('o','ₒ').\nreplace('p','ₚ').\nreplace('r','ᵣ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('u','ᵤ').\nreplace('v','ᵥ').\nreplace('x','ₓ').\nreplace('β','ᵦ').\nreplace('γ','ᵧ').\nreplace('ρ','ᵨ').\nreplace('φ','ᵩ').\nreplace('χ','ᵪ').\nreplace('0','⁰').\nreplace('1','¹').\nreplace('2','²').\nreplace('3','³').\nreplace('4','⁴').\nreplace('5','⁵').\nreplace('6','⁶').\nreplace('7','⁷').\nreplace('8','⁸').\nreplace('9','⁹').\nreplace('+','⁺').\nreplace('-','⁻').\nreplace('=','⁼').\nreplace('(','⁽').\nreplace(')','⁾').\nreplace('n','ⁿ').\nreplace('i','ⁱ').\nreplace('A','ᴬ').\nreplace('B','ᴮ').\nreplace('D','ᴰ').\nreplace('E','ᴱ').\nreplace('G','ᴳ').\nreplace('H','ᴴ').\nreplace('I','ᴵ').\nreplace('J','ᴶ').\nreplace('K','ᴷ').\nreplace('L','ᴸ').\nreplace('M','ᴹ').\nreplace('N','ᴺ').\nreplace('O','ᴼ').\nreplace('P','ᴾ').\nreplace('R','ᴿ').\nreplace('T','ᵀ').\nreplace('U','ᵁ').\nreplace('V','ⱽ').\nreplace('W','ᵂ').\nreplace('a','ᵃ').\nreplace('b','ᵇ').\nreplace('c','ᶜ').\nreplace('d','ᵈ').\nreplace('e','ᵉ').\nreplace('f','ᶠ').\nreplace('g','ᵍ').\nreplace('h','ʰ').\nreplace('i','ⁱ').\nreplace('j','ʲ').\nreplace('k','ᵏ').\nreplace('l','ˡ').\nreplace('m','ᵐ').\nreplace('n','ⁿ').\nreplace('o','ᵒ').\nreplace('p','ᵖ').\nreplace('r','ʳ').\nreplace('s','ˢ').\nreplace('t','ᵗ').\nreplace('u','ᵘ').\nreplace('v','ᵛ').\nreplace('w','ʷ').\nreplace('x','ˣ').\nreplace('y','ʸ').\nreplace('z','ᶻ').\nreplace('β','ᵝ').\nreplace('γ','ᵞ').\nreplace('δ','ᵟ').\nreplace('ε','ᵋ').\nreplace('θ','ᶿ').\nreplace('ι','ᶥ').\nreplace('υ','ᶹ').\nreplace('φ','ᵠ').\nreplace('χ','ᵡ')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:description using expression grel:value.\nreplace('0','₀').\nreplace('1','₁').\nreplace('2','₂').\nreplace('3','₃').\nreplace('4','₄').\nreplace('5','₅').\nreplace('6','₆').\nreplace('7','₇').\nreplace('8','₈').\nreplace('9','₉').\nreplace('+','₊').\nreplace('-','₋').\nreplace('=','₌').\nreplace('(','₍').\nreplace(')','₎').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('o','ₒ').\nreplace('x','ₓ').\nreplace('ə','ₔ').\nreplace('h','ₕ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('p','ₚ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('h','ₕ').\nreplace('i','ᵢ').\nreplace('j','ⱼ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('o','ₒ').\nreplace('p','ₚ').\nreplace('r','ᵣ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('u','ᵤ').\nreplace('v','ᵥ').\nreplace('x','ₓ').\nreplace('β','ᵦ').\nreplace('γ','ᵧ').\nreplace('ρ','ᵨ').\nreplace('φ','ᵩ').\nreplace('χ','ᵪ').\nreplace('0','⁰').\nreplace('1','¹').\nreplace('2','²').\nreplace('3','³').\nreplace('4','⁴').\nreplace('5','⁵').\nreplace('6','⁶').\nreplace('7','⁷').\nreplace('8','⁸').\nreplace('9','⁹').\nreplace('+','⁺').\nreplace('-','⁻').\nreplace('=','⁼').\nreplace('(','⁽').\nreplace(')','⁾').\nreplace('n','ⁿ').\nreplace('i','ⁱ').\nreplace('A','ᴬ').\nreplace('B','ᴮ').\nreplace('D','ᴰ').\nreplace('E','ᴱ').\nreplace('G','ᴳ').\nreplace('H','ᴴ').\nreplace('I','ᴵ').\nreplace('J','ᴶ').\nreplace('K','ᴷ').\nreplace('L','ᴸ').\nreplace('M','ᴹ').\nreplace('N','ᴺ').\nreplace('O','ᴼ').\nreplace('P','ᴾ').\nreplace('R','ᴿ').\nreplace('T','ᵀ').\nreplace('U','ᵁ').\nreplace('V','ⱽ').\nreplace('W','ᵂ').\nreplace('a','ᵃ').\nreplace('b','ᵇ').\nreplace('c','ᶜ').\nreplace('d','ᵈ').\nreplace('e','ᵉ').\nreplace('f','ᶠ').\nreplace('g','ᵍ').\nreplace('h','ʰ').\nreplace('i','ⁱ').\nreplace('j','ʲ').\nreplace('k','ᵏ').\nreplace('l','ˡ').\nreplace('m','ᵐ').\nreplace('n','ⁿ').\nreplace('o','ᵒ').\nreplace('p','ᵖ').\nreplace('r','ʳ').\nreplace('s','ˢ').\nreplace('t','ᵗ').\nreplace('u','ᵘ').\nreplace('v','ᵛ').\nreplace('w','ʷ').\nreplace('x','ˣ').\nreplace('y','ʸ').\nreplace('z','ᶻ').\nreplace('β','ᵝ').\nreplace('γ','ᵞ').\nreplace('δ','ᵟ').\nreplace('ε','ᵋ').\nreplace('θ','ᶿ').\nreplace('ι','ᶥ').\nreplace('υ','ᶹ').\nreplace('φ','ᵠ').\nreplace('χ','ᵡ')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:source",
+ "expression": "grel:value.\nreplace('0','₀').\nreplace('1','₁').\nreplace('2','₂').\nreplace('3','₃').\nreplace('4','₄').\nreplace('5','₅').\nreplace('6','₆').\nreplace('7','₇').\nreplace('8','₈').\nreplace('9','₉').\nreplace('+','₊').\nreplace('-','₋').\nreplace('=','₌').\nreplace('(','₍').\nreplace(')','₎').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('o','ₒ').\nreplace('x','ₓ').\nreplace('ə','ₔ').\nreplace('h','ₕ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('p','ₚ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('h','ₕ').\nreplace('i','ᵢ').\nreplace('j','ⱼ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('o','ₒ').\nreplace('p','ₚ').\nreplace('r','ᵣ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('u','ᵤ').\nreplace('v','ᵥ').\nreplace('x','ₓ').\nreplace('β','ᵦ').\nreplace('γ','ᵧ').\nreplace('ρ','ᵨ').\nreplace('φ','ᵩ').\nreplace('χ','ᵪ').\nreplace('0','⁰').\nreplace('1','¹').\nreplace('2','²').\nreplace('3','³').\nreplace('4','⁴').\nreplace('5','⁵').\nreplace('6','⁶').\nreplace('7','⁷').\nreplace('8','⁸').\nreplace('9','⁹').\nreplace('+','⁺').\nreplace('-','⁻').\nreplace('=','⁼').\nreplace('(','⁽').\nreplace(')','⁾').\nreplace('n','ⁿ').\nreplace('i','ⁱ').\nreplace('A','ᴬ').\nreplace('B','ᴮ').\nreplace('D','ᴰ').\nreplace('E','ᴱ').\nreplace('G','ᴳ').\nreplace('H','ᴴ').\nreplace('I','ᴵ').\nreplace('J','ᴶ').\nreplace('K','ᴷ').\nreplace('L','ᴸ').\nreplace('M','ᴹ').\nreplace('N','ᴺ').\nreplace('O','ᴼ').\nreplace('P','ᴾ').\nreplace('R','ᴿ').\nreplace('T','ᵀ').\nreplace('U','ᵁ').\nreplace('V','ⱽ').\nreplace('W','ᵂ').\nreplace('a','ᵃ').\nreplace('b','ᵇ').\nreplace('c','ᶜ').\nreplace('d','ᵈ').\nreplace('e','ᵉ').\nreplace('f','ᶠ').\nreplace('g','ᵍ').\nreplace('h','ʰ').\nreplace('i','ⁱ').\nreplace('j','ʲ').\nreplace('k','ᵏ').\nreplace('l','ˡ').\nreplace('m','ᵐ').\nreplace('n','ⁿ').\nreplace('o','ᵒ').\nreplace('p','ᵖ').\nreplace('r','ʳ').\nreplace('s','ˢ').\nreplace('t','ᵗ').\nreplace('u','ᵘ').\nreplace('v','ᵛ').\nreplace('w','ʷ').\nreplace('x','ˣ').\nreplace('y','ʸ').\nreplace('z','ᶻ').\nreplace('β','ᵝ').\nreplace('γ','ᵞ').\nreplace('δ','ᵟ').\nreplace('ε','ᵋ').\nreplace('θ','ᶿ').\nreplace('ι','ᶥ').\nreplace('υ','ᶹ').\nreplace('φ','ᵠ').\nreplace('χ','ᵡ')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:source using expression grel:value.\nreplace('0','₀').\nreplace('1','₁').\nreplace('2','₂').\nreplace('3','₃').\nreplace('4','₄').\nreplace('5','₅').\nreplace('6','₆').\nreplace('7','₇').\nreplace('8','₈').\nreplace('9','₉').\nreplace('+','₊').\nreplace('-','₋').\nreplace('=','₌').\nreplace('(','₍').\nreplace(')','₎').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('o','ₒ').\nreplace('x','ₓ').\nreplace('ə','ₔ').\nreplace('h','ₕ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('p','ₚ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('a','ₐ').\nreplace('e','ₑ').\nreplace('h','ₕ').\nreplace('i','ᵢ').\nreplace('j','ⱼ').\nreplace('k','ₖ').\nreplace('l','ₗ').\nreplace('m','ₘ').\nreplace('n','ₙ').\nreplace('o','ₒ').\nreplace('p','ₚ').\nreplace('r','ᵣ').\nreplace('s','ₛ').\nreplace('t','ₜ').\nreplace('u','ᵤ').\nreplace('v','ᵥ').\nreplace('x','ₓ').\nreplace('β','ᵦ').\nreplace('γ','ᵧ').\nreplace('ρ','ᵨ').\nreplace('φ','ᵩ').\nreplace('χ','ᵪ').\nreplace('0','⁰').\nreplace('1','¹').\nreplace('2','²').\nreplace('3','³').\nreplace('4','⁴').\nreplace('5','⁵').\nreplace('6','⁶').\nreplace('7','⁷').\nreplace('8','⁸').\nreplace('9','⁹').\nreplace('+','⁺').\nreplace('-','⁻').\nreplace('=','⁼').\nreplace('(','⁽').\nreplace(')','⁾').\nreplace('n','ⁿ').\nreplace('i','ⁱ').\nreplace('A','ᴬ').\nreplace('B','ᴮ').\nreplace('D','ᴰ').\nreplace('E','ᴱ').\nreplace('G','ᴳ').\nreplace('H','ᴴ').\nreplace('I','ᴵ').\nreplace('J','ᴶ').\nreplace('K','ᴷ').\nreplace('L','ᴸ').\nreplace('M','ᴹ').\nreplace('N','ᴺ').\nreplace('O','ᴼ').\nreplace('P','ᴾ').\nreplace('R','ᴿ').\nreplace('T','ᵀ').\nreplace('U','ᵁ').\nreplace('V','ⱽ').\nreplace('W','ᵂ').\nreplace('a','ᵃ').\nreplace('b','ᵇ').\nreplace('c','ᶜ').\nreplace('d','ᵈ').\nreplace('e','ᵉ').\nreplace('f','ᶠ').\nreplace('g','ᵍ').\nreplace('h','ʰ').\nreplace('i','ⁱ').\nreplace('j','ʲ').\nreplace('k','ᵏ').\nreplace('l','ˡ').\nreplace('m','ᵐ').\nreplace('n','ⁿ').\nreplace('o','ᵒ').\nreplace('p','ᵖ').\nreplace('r','ʳ').\nreplace('s','ˢ').\nreplace('t','ᵗ').\nreplace('u','ᵘ').\nreplace('v','ᵛ').\nreplace('w','ʷ').\nreplace('x','ˣ').\nreplace('y','ʸ').\nreplace('z','ᶻ').\nreplace('β','ᵝ').\nreplace('γ','ᵞ').\nreplace('δ','ᵟ').\nreplace('ε','ᵋ').\nreplace('θ','ᶿ').\nreplace('ι','ᶥ').\nreplace('υ','ᶹ').\nreplace('φ','ᵠ').\nreplace('χ','ᵡ')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "record-based"
+ },
+ "columnName": "dc:description",
+ "expression": "grel:value.replace('
','␟').replace('
','␟').replace('
','␟- ').parseHtml().htmlText().replace('␟','\n').trim()",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:description using expression grel:value.replace('
','␟').replace('','␟').replace('
','␟- ').parseHtml().htmlText().replace('␟','\n').trim()"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "record-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:value.parseHtml().htmlText()",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:value.parseHtml().htmlText()"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "record-based"
+ },
+ "columnName": "dc:source",
+ "expression": "grel:value.parseHtml().htmlText()",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:source using expression grel:value.parseHtml().htmlText()"
+ }
+]
diff --git a/rules/wuppertal/identifier.json b/rules/wuppertal/identifier.json
new file mode 100644
index 0000000..90ad0d6
--- /dev/null
+++ b/rules/wuppertal/identifier.json
@@ -0,0 +1,71 @@
+[
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "text",
+ "name": "dc:identifier",
+ "columnName": "dc:identifier",
+ "query": "^urn:nbn",
+ "mode": "regex",
+ "caseSensitive": false,
+ "invert": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:identifier",
+ "expression": "grel:value",
+ "onError": "set-to-blank",
+ "newColumnName": "urn",
+ "columnInsertIndex": 2,
+ "description": "Create column urn at index 2 based on column dc:identifier using expression grel:value"
+ },
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "text",
+ "name": "dc:identifier",
+ "columnName": "dc:identifier",
+ "query": "doi.org/",
+ "mode": "text",
+ "caseSensitive": false,
+ "invert": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:identifier",
+ "expression": "grel:value.replace('https://doi.org/','')",
+ "onError": "set-to-blank",
+ "newColumnName": "doi",
+ "columnInsertIndex": 2,
+ "description": "Create column doi at index 2 based on column dc:identifier using expression grel:value"
+ },
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "text",
+ "name": "dc:identifier",
+ "columnName": "dc:identifier",
+ "query": "\\.pdf$",
+ "mode": "regex",
+ "caseSensitive": false,
+ "invert": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:identifier",
+ "expression": "grel:value",
+ "onError": "set-to-blank",
+ "newColumnName": "pdf",
+ "columnInsertIndex": 2,
+ "description": "Create column pdf at index 2 based on column dc:identifier using expression grel:value"
+ }
+]
diff --git a/rules/wuppertal/join.json b/rules/wuppertal/join.json
new file mode 100644
index 0000000..8519448
--- /dev/null
+++ b/rules/wuppertal/join.json
@@ -0,0 +1,394 @@
+[
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "id",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column id using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "url",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column url using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:identifier",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:identifier using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "doi",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column doi using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "topic",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column topic using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "urn",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column urn using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:description",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:description using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:type",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:subject",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:subject using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:publisher",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:publisher using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "ioo",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column ioo using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:language",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:language using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:format",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:format using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title/xml:lang",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:description/xml:lang",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:description/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "cc",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column cc using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:source",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:source using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:contributor",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:contributor using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:creator",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:creator using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:date",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:date using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:coverage/xml:lang",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:coverage/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:coverage",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:coverage using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:rights/xml:lang",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:rights/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:rights",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:rights using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:source/xml:lang",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:source/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "datestamp",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column datestamp using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "setSpec",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column setSpec using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
+ },
+ {
+ "op": "core/row-removal",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "Blank Rows",
+ "expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
+ "columnName": "",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": "true",
+ "l": "true"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "description": "Remove rows"
+ }
+]
diff --git a/rules/wuppertal/language.json b/rules/wuppertal/language.json
new file mode 100644
index 0000000..5882d6b
--- /dev/null
+++ b/rules/wuppertal/language.json
@@ -0,0 +1,54 @@
+[
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:language",
+ "expression": "grel:forEach(value.split('␞'),v,v.replace(/^deu$/,'ger')).join('␞')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:language using expression grel:forEach(value.split('␞'),v,v.replace(/^deu$/,'ger')).join('␞')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:coverage/xml:lang",
+ "expression": "grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:coverage/xml:lang using expression grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:description/xml:lang",
+ "expression": "grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:description/xml:lang using expression grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title/xml:lang",
+ "expression": "grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title/xml:lang using expression grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')"
+ }
+]
diff --git a/rules/wuppertal/linkcheck.json b/rules/wuppertal/linkcheck.json
new file mode 100644
index 0000000..c3dea55
--- /dev/null
+++ b/rules/wuppertal/linkcheck.json
@@ -0,0 +1,15 @@
+[
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "baseColumnName": "url",
+ "expression": "jython:import httplib\nimport urlparse\nstatus = []\nfor x in value.split(u'\\u241e'):\n url = urlparse.urlparse(x)\n conn = httplib.HTTPConnection(url[1])\n conn.request(\"HEAD\", url[2])\n res = conn.getresponse()\n status.append(str(res.status))\nreturn ','.join(status)",
+ "onError": "set-to-blank",
+ "newColumnName": "linkcheck",
+ "columnInsertIndex": 3,
+ "description": "Create column linkcheck at index 3 based on column url using expression jython:import httplib\nimport urlparse\nstatus = []\nfor x in value.split(u'\\u241e'):\n url = urlparse.urlparse(x)\n conn = httplib.HTTPConnection(url[1])\n conn.request(\"HEAD\", url[2])\n res = conn.getresponse()\n status.append(str(res.status))\nreturn ','.join(status)"
+ }
+]
diff --git a/rules/wuppertal/nbn.json b/rules/wuppertal/nbn.json
new file mode 100644
index 0000000..289aceb
--- /dev/null
+++ b/rules/wuppertal/nbn.json
@@ -0,0 +1,181 @@
+[
+ {
+ "op": "core/column-addition-by-fetching-urls",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "id",
+ "expression": "isBlank(value)",
+ "columnName": "urn",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": false,
+ "l": "false"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "urn",
+ "urlExpression": "grel:'https://nbn-resolving.org/process-urn-form?identifier=' + value + '&verb=FULL&xml=on'",
+ "onError": "set-to-blank",
+ "newColumnName": "nbn-resolving",
+ "columnInsertIndex": 2,
+ "delay": 0,
+ "cacheResponses": true,
+ "httpHeadersJson": [
+ {
+ "name": "authorization",
+ "value": ""
+ },
+ {
+ "name": "user-agent",
+ "value": "OpenRefine 3.4 [6443506]"
+ },
+ {
+ "name": "accept",
+ "value": "*/*"
+ }
+ ],
+ "description": "Create column nbn-resolving at index 2 by fetching URLs based on column urn using expression grel:'https://nbn-resolving.org/process-urn-form?identifier=' + value + '&verb=FULL&xml=on'"
+ },
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "urn",
+ "expression": "isBlank(value)",
+ "columnName": "urn",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": false,
+ "l": "false"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "nbn-resolving",
+ "expression": "grel:value.parseXml().select('pidef|pidef pidef|data pidef|resolving_information pidef|url_info pidef|url')[0].htmlText()",
+ "onError": "set-to-blank",
+ "newColumnName": "url",
+ "columnInsertIndex": 3,
+ "description": "Create column url at index 3 based on column nbn-resolving using expression grel:value.parseXml().select('pidef|pidef pidef|data pidef|resolving_information pidef|url_info pidef|url')[0].htmlText()"
+ },
+ {
+ "op": "core/column-removal",
+ "columnName": "nbn-resolving",
+ "description": "Remove column nbn-resolving"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "url",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column url using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "pdf",
+ "expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column pdf using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "id",
+ "expression": "isBlank(value)",
+ "columnName": "id",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": false,
+ "l": "false"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "columnName": "url",
+ "expression": "grel:if(value.contains('.pdf'),value,cells['pdf'].value)",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column url using expression grel:if(value.contains('.pdf'),value,cells['pdf'].value)"
+ },
+ {
+ "op": "core/column-removal",
+ "columnName": "pdf",
+ "description": "Remove column pdf"
+ },
+ {
+ "op": "core/row-removal",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "url",
+ "expression": "grel:row.record.cells['url'].value.join('').contains('.pdf')",
+ "columnName": "url",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": false,
+ "l": "false"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "record-based"
+ },
+ "description": "Remove rows"
+ }
+]
diff --git a/rules/wuppertal/nonsort.json b/rules/wuppertal/nonsort.json
new file mode 100644
index 0000000..72cc897
--- /dev/null
+++ b/rules/wuppertal/nonsort.json
@@ -0,0 +1,48 @@
+[
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:title",
+ "expression": "grel:with(['a', 'das', 'dem', 'den', 'der', 'des', 'die', 'ein', 'eine', 'einem', 'einen', 'einer', 'eines', 'the'],x,if(inArray(x,value.split(' ')[0].toLowercase()),value.split(' ')[0] + ' ',''))",
+ "onError": "set-to-blank",
+ "newColumnName": "nonsort",
+ "columnInsertIndex": 14,
+ "description": "Create column nonsort at index 14 based on column dc:title using expression grel:with(['a', 'das', 'dem', 'den', 'der', 'des', 'die', 'ein', 'eine', 'einem', 'einen', 'einer', 'eines', 'the'],x,if(inArray(x,value.split(' ')[0].toLowercase()),value.split(' ')[0] + ' ',''))"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "nonsort",
+ "expression": "isBlank(value)",
+ "columnName": "nonsort",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": false,
+ "l": "false"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:value.split(' ').slice(1).join(' ')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:value.split(' ').slice(1).join(' ')"
+ }
+]
diff --git a/rules/wuppertal/publisher.json b/rules/wuppertal/publisher.json
new file mode 100644
index 0000000..7f603a6
--- /dev/null
+++ b/rules/wuppertal/publisher.json
@@ -0,0 +1,35 @@
+[
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "id",
+ "expression": "isBlank(value)",
+ "columnName": "id",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": false,
+ "l": "false"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:identifier",
+ "expression": "grel:\"Bergische Universität Wuppertal\"",
+ "onError": "set-to-blank",
+ "newColumnName": "dc:publisher",
+ "columnInsertIndex": 2,
+ "description": "Create column dc:publisher at index 2 based on column dc:identifier using expression grel:\"Bergische Universität Wuppertal\""
+ }
+]
diff --git a/rules/wuppertal/rights-cc.json b/rules/wuppertal/rights-cc.json
new file mode 100644
index 0000000..88a1869
--- /dev/null
+++ b/rules/wuppertal/rights-cc.json
@@ -0,0 +1,15 @@
+[
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "cc",
+ "expression": "grel:value.replace('https://creativecommons.org/licenses/','CC ').replace('/',' ').trim().toUppercase()",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column cc using expression grel:value.replace('https://creativecommons.org/licenses/','CC ').replace('/',' ').trim().toUppercase()"
+ }
+]
diff --git a/rules/wuppertal/rights.json b/rules/wuppertal/rights.json
new file mode 100644
index 0000000..19944c5
--- /dev/null
+++ b/rules/wuppertal/rights.json
@@ -0,0 +1,25 @@
+[
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "text",
+ "name": "dc:rights",
+ "columnName": "dc:rights",
+ "query": "creativecommons",
+ "mode": "text",
+ "caseSensitive": false,
+ "invert": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:rights",
+ "expression": "grel:filter(value.ngram(1),v,v.contains('creativecommons.org'))[0].replace(/\\.$/,'').replace('(','').replace(')','').replace('http://','https://').replace('deed.de','')",
+ "onError": "set-to-blank",
+ "newColumnName": "cc",
+ "columnInsertIndex": 20,
+ "description": "Create column cc at index 20 based on column dc:rights using expression grel:filter(value.ngram(1),v,v.contains('creativecommons.org'))[0].replace(/\\.$/,'').replace('(','').replace(')','').replace('http://','https://').replace('deed.de','')"
+ }
+]
diff --git a/rules/wuppertal/subjects.json b/rules/wuppertal/subjects.json
new file mode 100644
index 0000000..d323182
--- /dev/null
+++ b/rules/wuppertal/subjects.json
@@ -0,0 +1,48 @@
+[
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "text",
+ "name": "dc:subject",
+ "columnName": "dc:subject",
+ "query": "Fakultät",
+ "mode": "text",
+ "caseSensitive": false,
+ "invert": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:subject",
+ "expression": "grel:value.replace(' » Dissertationen','')",
+ "onError": "set-to-blank",
+ "newColumnName": "ioo",
+ "columnInsertIndex": 5,
+ "description": "Create column ioo at index 5 based on column dc:subject using expression grel:value.replace(' » Dissertationen','')"
+ },
+ {
+ "op": "core/column-addition",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "text",
+ "name": "dc:subject",
+ "columnName": "dc:subject",
+ "query": "Fakultät",
+ "mode": "text",
+ "caseSensitive": false,
+ "invert": true
+ }
+ ],
+ "mode": "row-based"
+ },
+ "baseColumnName": "dc:subject",
+ "expression": "grel:value",
+ "onError": "set-to-blank",
+ "newColumnName": "topic",
+ "columnInsertIndex": 5,
+ "description": "Create column topic at index 5 based on column dc:subject using expression grel:value"
+ }
+]
diff --git a/rules/wuppertal/subtitle.json b/rules/wuppertal/subtitle.json
new file mode 100644
index 0000000..e839ed3
--- /dev/null
+++ b/rules/wuppertal/subtitle.json
@@ -0,0 +1,161 @@
+[
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "dc:title/xml:lang",
+ "expression": "value",
+ "columnName": "dc:title/xml:lang",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": "de␞de",
+ "l": "de␞de"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:value.replace('␞','␟')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:value.replace('␞','␟')"
+ },
+ {
+ "op": "core/mass-edit",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title/xml:lang",
+ "expression": "value",
+ "edits": [
+ {
+ "from": [
+ "de␞de"
+ ],
+ "fromBlank": false,
+ "fromError": false,
+ "to": "de"
+ }
+ ],
+ "description": "Mass edit cells in column dc:title/xml:lang"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "dc:title/xml:lang",
+ "expression": "value",
+ "columnName": "dc:title/xml:lang",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": "en␞en",
+ "l": "en␞en"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:value.replace('␞','␟')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:value.replace('␞','␟')"
+ },
+ {
+ "op": "core/mass-edit",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title/xml:lang",
+ "expression": "value",
+ "edits": [
+ {
+ "from": [
+ "en␞en"
+ ],
+ "fromBlank": false,
+ "fromError": false,
+ "to": "en"
+ }
+ ],
+ "description": "Mass edit cells in column dc:title/xml:lang"
+ },
+ {
+ "op": "core/text-transform",
+ "engineConfig": {
+ "facets": [
+ {
+ "type": "list",
+ "name": "dc:title/xml:lang",
+ "expression": "value",
+ "columnName": "dc:title/xml:lang",
+ "invert": false,
+ "omitBlank": false,
+ "omitError": false,
+ "selection": [
+ {
+ "v": {
+ "v": "es␞es",
+ "l": "es␞es"
+ }
+ }
+ ],
+ "selectBlank": false,
+ "selectError": false
+ }
+ ],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title",
+ "expression": "grel:value.replace('␞','␟')",
+ "onError": "keep-original",
+ "repeat": false,
+ "repeatCount": 10,
+ "description": "Text transform on cells in column dc:title using expression grel:value.replace('␞','␟')"
+ },
+ {
+ "op": "core/mass-edit",
+ "engineConfig": {
+ "facets": [],
+ "mode": "row-based"
+ },
+ "columnName": "dc:title/xml:lang",
+ "expression": "value",
+ "edits": [
+ {
+ "from": [
+ "es␞es"
+ ],
+ "fromBlank": false,
+ "fromError": false,
+ "to": "es"
+ }
+ ],
+ "description": "Mass edit cells in column dc:title/xml:lang"
+ }
+]
diff --git a/rules/wuppertal/template.txt b/rules/wuppertal/template.txt
new file mode 100644
index 0000000..800f2b9
--- /dev/null
+++ b/rules/wuppertal/template.txt
@@ -0,0 +1,70 @@
+
+
+
+
+ {{forEachIndex(cells['dc:title'].value.split('␞'), i, v, '
+ ' + forNonBlank(cells['nonsort'].value, z,'
+ ' + z.escape('xml') + '', '') + '
+ '+v.split('␟')[0].escape('xml')+''+forNonBlank(v.split('␟')[1], v, '
+ ' + v.escape('xml') + '', '')+'
+ ').join('')}}
+
+ {{cells['dc:creator'].value.escape('xml')}}
+ {{cells['dc:creator'].value.split(',')[0].escape('xml')}}{{forNonBlank(cells['dc:creator'].value.split(',')[1].trim(),v,'
+ ' + v.escape('xml') + '','')}}
+
+ aut
+
+ {{forNonBlank(cells['dc:contributor'].value,x,forEach(x.split('␞'),v,'
+
+ '+ v.escape('xml') +'
+ ' + v.split(',')[0].escape('xml') + '
+ ' + v.split(',')[1].trim().escape('xml') + '
+
+ ctb
+
+ ').join(''),'')}}
+ text
+ doctoralThesis{{forNonBlank(cells['dc:date'].value,v,'
+
+ ' + v.escape('xml') + '
+ ','')}}{{forNonBlank(cells['dc:language'].value,v,'
+
+ ' + v.escape('xml') + '
+ ','')}}{{forNonBlank(cells['dc:description'].value, x, forEachIndex(x.split('␞'), i, v, '
+ ' + v.escape('xml') + '').join(''),'')}}{{forNonBlank(cells['topic'].value,x,'
+ ' + forEach(x.split('␞'),v,'
+ ' + v.escape('xml') + '').join('') + '
+ ','')}}{{forNonBlank(cells['setSpec'].value,x,forEach(x.split('␞'),v,'
+ ' + v.escape('xml') + '').join(''),'')}}{{forNonBlank(cells['ioo'].value,x,forEach(x.split('␞'),v,'
+ ').join(''),'')}}
+ {{cells['urn'].value.escape('xml')}}{{forNonBlank(cells['doi'].value,v,'
+ ' + v.escape('xml') + '','')}}{{forNonBlank(cells['hbz'].value,v,'
+ ' + v.escape('xml') + '','')}}{{forNonBlank(cells['cc'].value,v,'
+ ' + v.escape('xml') + '','')}}
+
+ wuppertal_elpub_{{cells['id'].value.split(':').reverse()[0].escape('xml')}}
+
+
+ oaDoctoralThesis
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/rules/wuppertal/vorverarbeitung.json b/rules/wuppertal/vorverarbeitung.json
new file mode 100644
index 0000000..26a3ab5
--- /dev/null
+++ b/rules/wuppertal/vorverarbeitung.json
@@ -0,0 +1,139 @@
+[
+ {
+ "op": "core/column-move",
+ "columnName": "Record - header - identifier",
+ "index": 0,
+ "description": "Move column Record - header - identifier to position 0"
+ },
+ {
+ "op": "core/column-removal",
+ "columnName": "Record - metadata - oai_dc:dc - xsi:schemaLocation",
+ "description": "Remove column Record - metadata - oai_dc:dc - xsi:schemaLocation"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - header - identifier",
+ "newColumnName": "id",
+ "description": "Rename column Record - header - identifier to id"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:identifier",
+ "newColumnName": "dc:identifier",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:identifier to dc:identifier"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:subject",
+ "newColumnName": "dc:subject",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:subject to dc:subject"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:type",
+ "newColumnName": "dc:type",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:type to dc:type"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:description",
+ "newColumnName": "dc:description",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:description to dc:description"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:description - xml:lang",
+ "newColumnName": "dc:description/xml:lang",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:description - xml:lang to dc:description/xml:lang"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:title",
+ "newColumnName": "dc:title",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:title to dc:title"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:title - xml:lang",
+ "newColumnName": "dc:title/xml:lang",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:title - xml:lang to dc:title/xml:lang"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:format",
+ "newColumnName": "dc:format",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:format to dc:format"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:language",
+ "newColumnName": "dc:language",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:language to dc:language"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:date",
+ "newColumnName": "dc:date",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:date to dc:date"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:creator",
+ "newColumnName": "dc:creator",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:creator to dc:creator"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:contributor",
+ "newColumnName": "dc:contributor",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:contributor to dc:contributor"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:source",
+ "newColumnName": "dc:source",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:source to dc:source"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:source - xml:lang",
+ "newColumnName": "dc:source/xml:lang",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:source - xml:lang to dc:source/xml:lang"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:rights",
+ "newColumnName": "dc:rights",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:rights to dc:rights"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:rights - xml:lang",
+ "newColumnName": "dc:rights/xml:lang",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:rights - xml:lang to dc:rights/xml:lang"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:coverage",
+ "newColumnName": "dc:coverage",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:coverage to dc:coverage"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - metadata - oai_dc:dc - dc:coverage - xml:lang",
+ "newColumnName": "dc:coverage/xml:lang",
+ "description": "Rename column Record - metadata - oai_dc:dc - dc:coverage - xml:lang to dc:coverage/xml:lang"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - header - setSpec",
+ "newColumnName": "setSpec",
+ "description": "Rename column Record - header - setSpec to setSpec"
+ },
+ {
+ "op": "core/column-rename",
+ "oldColumnName": "Record - header - datestamp",
+ "newColumnName": "datestamp",
+ "description": "Rename column Record - header - datestamp to datestamp"
+ }
+]
diff --git a/tasks/wuppertal.yml b/tasks/wuppertal.yml
new file mode 100644
index 0000000..3541290
--- /dev/null
+++ b/tasks/wuppertal.yml
@@ -0,0 +1,130 @@
+# https://taskfile.dev
+
+version: '3'
+
+tasks:
+ default:
+ desc: harvesten und transformieren
+ deps: [harvest]
+ cmds:
+ - task: refine
+ - task: check
+ - task: split
+ - task: validate
+ - task: zip
+
+ harvest:
+ desc: nur harvesten
+ dir: data/wuppertal/harvest
+ cmds:
+ - METHA_DIR=$PWD metha-sync --format oai_dc http://elpub.bib.uni-wuppertal.de/servlets/OAIDataProvider
+ - METHA_DIR=$PWD metha-cat --format oai_dc http://elpub.bib.uni-wuppertal.de/servlets/OAIDataProvider > wuppertal.xml
+
+ refine:
+ dir: data/wuppertal/refine
+ ignore_error: true # provisorisch verwaisten Java-Prozess bei Exit vermeiden https://github.com/go-task/task/issues/141
+ env:
+ PORT: 3335
+ RAM: 8G
+ PROJECT: wuppertal
+ cmds:
+ # OpenRefine starten
+ - $OPENREFINE -v warn -p $PORT -m $RAM -d $PWD > openrefine.log 2>&1 &
+ - timeout 30s bash -c "until curl -s http://localhost:$PORT | cat | grep -q -o OpenRefine ; do sleep 1; done"
+ # Import (erfordert absoluten Pfad zur XML-Datei)
+ - $OPENREFINE_CLIENT -P $PORT --create "$(readlink -e ../harvest/wuppertal.xml)" --recordPath Records --recordPath Record --storeEmptyStrings false --trimStrings true --projectName $PROJECT
+ # Vorverarbeitung: Identifier in erste Spalte; nicht benötigte Spalten (ohne differenzierende Merkmale) löschen; verbleibende Spalten umbenennen (Pfad entfernen)
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/vorverarbeitung.json $PROJECT
+ # Entfernen von HTML-Tags und Transformation von subscript und superscript in Unicode (betrifft dc:description, dc:source und dc:title)
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/html.json $PROJECT
+ # DDC einheitlich auf drei Ziffern vereinheitlichen (betrifft dc:subjects und oai:setSpec)
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/ddc.json $PROJECT
+ # dc:publisher setzen
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/publisher.json $PROJECT
+ # URNs, DOIs und PDF-Links aus dc:identifier extrahieren
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/identifier.json $PROJECT
+ # Direktlinks generieren durch Abgleich der URNs mit nbn-resolving und Datensätze ohne Direktlink auf ein PDF löschen
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/nbn.json $PROJECT
+ # Aufteilung dc:subject in ioo und topic
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/subjects.json $PROJECT
+ # Standardisierte Rechteangaben Teil 1 (Links zu CC-Lizenzen)
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/rights.json $PROJECT
+ # Datenstruktur für Templating vorbereiten: Pro Zeile ein Datensatz und leere Zeilen löschen
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/join.json $PROJECT
+ # Zusammenführung gleichsprachiger Titelangaben zu Title/Subtitle
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/subtitle.json $PROJECT
+ # Sprachangaben nach ISO-639-2b (betrifft dc:language sowie die xml:lang Attribute für dc:coverage, dc:description und dc:title)
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/language.json $PROJECT
+ # Standardisierte Rechteangaben Teil 2 (Canonical Name für CC-Lizenzen)
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/rights-cc.json $PROJECT
+ # Anreicherung HT-Nummer via lobid-resources
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/hbz.json $PROJECT
+ # Sortierung mods:nonSort für das erste Element in dc:title
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/nonsort.json $PROJECT
+ # Links prüfen: HTTP status code ermitteln (z.B. 200)
+ - $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/linkcheck.json $PROJECT
+ # Export in METS:MODS mit Templating
+ - |
+ $OPENREFINE_CLIENT -P $PORT --export --template "$(< ../../../rules/wuppertal/template.txt)" --rowSeparator "
+
+ " --output wuppertal.txt $PROJECT
+ # Export für Debugging
+ - $OPENREFINE_CLIENT -P $PORT --export --output wuppertal-debug.tsv $PROJECT
+ # OpenRefine beenden
+ - ps -o start,etime,%mem,%cpu,rss -p $(lsof -t -i:$PORT) # Statistik
+ - kill -9 $(lsof -t -i:$PORT) # SIGKILL (-9) verhindert unnötigen Speichervorgang
+ - rm -rf ./*.project* && rm -f workspace.json # temporäre Dateien von OpenRefine löschen
+ sources:
+ - ../harvest/wuppertal.xml
+ - ../../../rules/wuppertal/*.json
+ - ../../../rules/wuppertal/template.txt
+# - ../../../rules/common/*.json
+ generates:
+ - wuppertal.txt
+ - wuppertal-debug.tsv
+
+ check:
+ dir: data/wuppertal/refine
+ cmds:
+ # Logdatei von OpenRefine auf Warnungen und Fehlermeldungen prüfen
+ - if grep -i 'exception\|error' openrefine.log; then echo 1>&2 "log contains warnings!" && exit 1; fi
+
+ split:
+ dir: data/wuppertal/split
+ cmds:
+ # in Einzeldateien aufteilen
+ - csplit -q ../refine/wuppertal.txt --suppress-matched '//' "{*}"
+ # ggf. vorhandene XML-Dateien löschen
+ - rm -f *.xml
+ # Identifier als Dateinamen
+ - for f in xx*; do mv "$f" "$(xmllint --xpath "//*[local-name(.) = 'recordIdentifier']/text()" "$f").xml"; done
+ sources:
+ - ../refine/wuppertal.txt
+ generates:
+ - ./*.xml
+
+ validate:
+ dir: data/wuppertal/
+ cmds:
+ # Validierung gegen METS Schema
+ - wget -q -nc https://www.loc.gov/standards/mets/mets.xsd
+ - xmllint --schema mets.xsd --noout split/*.xml > validate.log 2>&1
+ sources:
+ - split/*.xml
+ generates:
+ - validate.log
+
+ zip:
+ dir: data/wuppertal/
+ cmds:
+ # ZIP-Archiv mit Zeitstempel erstellen
+ - zip -q -FS -j wuppertal_{{.DATE}}.zip split/*.xml
+ sources:
+ - split/*.xml
+ generates:
+ - wuppertal_{{.DATE}}.zip
+
+ delete:
+ desc: cache löschen
+ cmds:
+ - rm -rf data/wuppertal