Automatisierung Elpub Wuppertal
This commit is contained in:
parent
2b223349bc
commit
0e84484338
|
@ -2,12 +2,11 @@
|
|||
|
||||
version: '3'
|
||||
|
||||
#silent: true
|
||||
output: prefixed
|
||||
|
||||
includes:
|
||||
siegen: ./tasks/siegen.yml
|
||||
# wuppertal: ./tasks/wuppertal.yml
|
||||
wuppertal: ./tasks/wuppertal.yml
|
||||
|
||||
vars:
|
||||
DATE: '{{ now | date "2006-01-02"}}'
|
||||
|
@ -31,5 +30,5 @@ tasks:
|
|||
- sh: test -x "$OPENREFINE_CLIENT"
|
||||
msg: "requirement openrefine-client missing"
|
||||
deps:
|
||||
# - task: wuppertal:default
|
||||
- task: wuppertal:default
|
||||
- task: siegen:default
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:subject",
|
||||
"columnName": "dc:subject",
|
||||
"query": "^\\d\\d\\d",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:subject",
|
||||
"expression": "grel:null",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:subject using expression grel:null"
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Blank Rows",
|
||||
"expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
|
||||
"columnName": "",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "true",
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"description": "Remove rows"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "setSpec",
|
||||
"expression": "grel:value.split(':').reverse()[0]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column setSpec using expression grel:value.split(':').reverse()[0]"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "setSpec",
|
||||
"expression": "grel:value + '000'[0,3-value.length()]",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column setSpec using expression grel:value + '000'[0,3-value.length()]"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,49 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition-by-fetching-urls",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "urn",
|
||||
"urlExpression": "grel:'https://lobid.org/resources/search?q=urn:\\\"' + value + '\\\"'",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "lobid",
|
||||
"columnInsertIndex": 5,
|
||||
"delay": 0,
|
||||
"cacheResponses": true,
|
||||
"httpHeadersJson": [
|
||||
{
|
||||
"name": "authorization",
|
||||
"value": ""
|
||||
},
|
||||
{
|
||||
"name": "user-agent",
|
||||
"value": "OpenRefine 3.4 [6443506]"
|
||||
},
|
||||
{
|
||||
"name": "accept",
|
||||
"value": "*/*"
|
||||
}
|
||||
],
|
||||
"description": "Create column lobid at index 5 by fetching URLs based on column urn using expression grel:'https://lobid.org/resources/search?q=urn:\\\"' + value + '\\\"'"
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "lobid",
|
||||
"expression": "grel:value.parseJson().member[0].hbzId",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "hbz",
|
||||
"columnInsertIndex": 6,
|
||||
"description": "Create column hbz at index 6 based on column lobid using expression grel:value.parseJson().member[0].hbzId"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "lobid",
|
||||
"description": "Remove column lobid"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,119 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:description",
|
||||
"expression": "grel:value.replace('<small>','').replace('</small>','').replace('<big>','').replace('</big>','').replace('<i>','').replace('</i>','').replace('<SUB>','<sub>').replace('</SUB>','</sub>').replace('<SUP>','<sup>').replace('</SUP>','</sup>')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:description using expression grel:value.replace('<small>','').replace('</small>','').replace('<big>','').replace('</big>','').replace('<i>','').replace('</i>','').replace('<SUB>','<sub>').replace('</SUB>','</sub>').replace('<SUP>','<sup>').replace('</SUP>','</sup>')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.replace('<small>','').replace('</small>','').replace('<big>','').replace('</big>','').replace('<i>','').replace('</i>','').replace('<SUB>','<sub>').replace('</SUB>','</sub>').replace('<SUP>','<sup>').replace('</SUP>','</sup>')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.replace('<small>','').replace('</small>','').replace('<big>','').replace('</big>','').replace('<i>','').replace('</i>','').replace('<SUB>','<sub>').replace('</SUB>','</sub>').replace('<SUP>','<sup>').replace('</SUP>','</sup>')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:source",
|
||||
"expression": "grel:value.replace('<small>','').replace('</small>','').replace('<big>','').replace('</big>','').replace('<i>','').replace('</i>','').replace('<SUB>','<sub>').replace('</SUB>','</sub>').replace('<SUP>','<sup>').replace('</SUP>','</sup>')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:source using expression grel:value.replace('<small>','').replace('</small>','').replace('<big>','').replace('</big>','').replace('<i>','').replace('</i>','').replace('<SUB>','<sub>').replace('</SUB>','</sub>').replace('<SUP>','<sup>').replace('</SUP>','</sup>')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.\nreplace('<sub>0</sub>','₀').\nreplace('<sub>1</sub>','₁').\nreplace('<sub>2</sub>','₂').\nreplace('<sub>3</sub>','₃').\nreplace('<sub>4</sub>','₄').\nreplace('<sub>5</sub>','₅').\nreplace('<sub>6</sub>','₆').\nreplace('<sub>7</sub>','₇').\nreplace('<sub>8</sub>','₈').\nreplace('<sub>9</sub>','₉').\nreplace('<sub>+</sub>','₊').\nreplace('<sub>-</sub>','₋').\nreplace('<sub>=</sub>','₌').\nreplace('<sub>(</sub>','₍').\nreplace('<sub>)</sub>','₎').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>ə</sub>','ₔ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>i</sub>','ᵢ').\nreplace('<sub>j</sub>','ⱼ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>r</sub>','ᵣ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>u</sub>','ᵤ').\nreplace('<sub>v</sub>','ᵥ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>β</sub>','ᵦ').\nreplace('<sub>γ</sub>','ᵧ').\nreplace('<sub>ρ</sub>','ᵨ').\nreplace('<sub>φ</sub>','ᵩ').\nreplace('<sub>χ</sub>','ᵪ').\nreplace('<sup>0</sup>','⁰').\nreplace('<sup>1</sup>','¹').\nreplace('<sup>2</sup>','²').\nreplace('<sup>3</sup>','³').\nreplace('<sup>4</sup>','⁴').\nreplace('<sup>5</sup>','⁵').\nreplace('<sup>6</sup>','⁶').\nreplace('<sup>7</sup>','⁷').\nreplace('<sup>8</sup>','⁸').\nreplace('<sup>9</sup>','⁹').\nreplace('<sup>+</sup>','⁺').\nreplace('<sup>-</sup>','⁻').\nreplace('<sup>=</sup>','⁼').\nreplace('<sup>(</sup>','⁽').\nreplace('<sup>)</sup>','⁾').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>A</sup>','ᴬ').\nreplace('<sup>B</sup>','ᴮ').\nreplace('<sup>D</sup>','ᴰ').\nreplace('<sup>E</sup>','ᴱ').\nreplace('<sup>G</sup>','ᴳ').\nreplace('<sup>H</sup>','ᴴ').\nreplace('<sup>I</sup>','ᴵ').\nreplace('<sup>J</sup>','ᴶ').\nreplace('<sup>K</sup>','ᴷ').\nreplace('<sup>L</sup>','ᴸ').\nreplace('<sup>M</sup>','ᴹ').\nreplace('<sup>N</sup>','ᴺ').\nreplace('<sup>O</sup>','ᴼ').\nreplace('<sup>P</sup>','ᴾ').\nreplace('<sup>R</sup>','ᴿ').\nreplace('<sup>T</sup>','ᵀ').\nreplace('<sup>U</sup>','ᵁ').\nreplace('<sup>V</sup>','ⱽ').\nreplace('<sup>W</sup>','ᵂ').\nreplace('<sup>a</sup>','ᵃ').\nreplace('<sup>b</sup>','ᵇ').\nreplace('<sup>c</sup>','ᶜ').\nreplace('<sup>d</sup>','ᵈ').\nreplace('<sup>e</sup>','ᵉ').\nreplace('<sup>f</sup>','ᶠ').\nreplace('<sup>g</sup>','ᵍ').\nreplace('<sup>h</sup>','ʰ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>j</sup>','ʲ').\nreplace('<sup>k</sup>','ᵏ').\nreplace('<sup>l</sup>','ˡ').\nreplace('<sup>m</sup>','ᵐ').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>o</sup>','ᵒ').\nreplace('<sup>p</sup>','ᵖ').\nreplace('<sup>r</sup>','ʳ').\nreplace('<sup>s</sup>','ˢ').\nreplace('<sup>t</sup>','ᵗ').\nreplace('<sup>u</sup>','ᵘ').\nreplace('<sup>v</sup>','ᵛ').\nreplace('<sup>w</sup>','ʷ').\nreplace('<sup>x</sup>','ˣ').\nreplace('<sup>y</sup>','ʸ').\nreplace('<sup>z</sup>','ᶻ').\nreplace('<sup>β</sup>','ᵝ').\nreplace('<sup>γ</sup>','ᵞ').\nreplace('<sup>δ</sup>','ᵟ').\nreplace('<sup>ε</sup>','ᵋ').\nreplace('<sup>θ</sup>','ᶿ').\nreplace('<sup>ι</sup>','ᶥ').\nreplace('<sup>υ</sup>','ᶹ').\nreplace('<sup>φ</sup>','ᵠ').\nreplace('<sup>χ</sup>','ᵡ')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.\nreplace('<sub>0</sub>','₀').\nreplace('<sub>1</sub>','₁').\nreplace('<sub>2</sub>','₂').\nreplace('<sub>3</sub>','₃').\nreplace('<sub>4</sub>','₄').\nreplace('<sub>5</sub>','₅').\nreplace('<sub>6</sub>','₆').\nreplace('<sub>7</sub>','₇').\nreplace('<sub>8</sub>','₈').\nreplace('<sub>9</sub>','₉').\nreplace('<sub>+</sub>','₊').\nreplace('<sub>-</sub>','₋').\nreplace('<sub>=</sub>','₌').\nreplace('<sub>(</sub>','₍').\nreplace('<sub>)</sub>','₎').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>ə</sub>','ₔ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>i</sub>','ᵢ').\nreplace('<sub>j</sub>','ⱼ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>r</sub>','ᵣ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>u</sub>','ᵤ').\nreplace('<sub>v</sub>','ᵥ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>β</sub>','ᵦ').\nreplace('<sub>γ</sub>','ᵧ').\nreplace('<sub>ρ</sub>','ᵨ').\nreplace('<sub>φ</sub>','ᵩ').\nreplace('<sub>χ</sub>','ᵪ').\nreplace('<sup>0</sup>','⁰').\nreplace('<sup>1</sup>','¹').\nreplace('<sup>2</sup>','²').\nreplace('<sup>3</sup>','³').\nreplace('<sup>4</sup>','⁴').\nreplace('<sup>5</sup>','⁵').\nreplace('<sup>6</sup>','⁶').\nreplace('<sup>7</sup>','⁷').\nreplace('<sup>8</sup>','⁸').\nreplace('<sup>9</sup>','⁹').\nreplace('<sup>+</sup>','⁺').\nreplace('<sup>-</sup>','⁻').\nreplace('<sup>=</sup>','⁼').\nreplace('<sup>(</sup>','⁽').\nreplace('<sup>)</sup>','⁾').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>A</sup>','ᴬ').\nreplace('<sup>B</sup>','ᴮ').\nreplace('<sup>D</sup>','ᴰ').\nreplace('<sup>E</sup>','ᴱ').\nreplace('<sup>G</sup>','ᴳ').\nreplace('<sup>H</sup>','ᴴ').\nreplace('<sup>I</sup>','ᴵ').\nreplace('<sup>J</sup>','ᴶ').\nreplace('<sup>K</sup>','ᴷ').\nreplace('<sup>L</sup>','ᴸ').\nreplace('<sup>M</sup>','ᴹ').\nreplace('<sup>N</sup>','ᴺ').\nreplace('<sup>O</sup>','ᴼ').\nreplace('<sup>P</sup>','ᴾ').\nreplace('<sup>R</sup>','ᴿ').\nreplace('<sup>T</sup>','ᵀ').\nreplace('<sup>U</sup>','ᵁ').\nreplace('<sup>V</sup>','ⱽ').\nreplace('<sup>W</sup>','ᵂ').\nreplace('<sup>a</sup>','ᵃ').\nreplace('<sup>b</sup>','ᵇ').\nreplace('<sup>c</sup>','ᶜ').\nreplace('<sup>d</sup>','ᵈ').\nreplace('<sup>e</sup>','ᵉ').\nreplace('<sup>f</sup>','ᶠ').\nreplace('<sup>g</sup>','ᵍ').\nreplace('<sup>h</sup>','ʰ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>j</sup>','ʲ').\nreplace('<sup>k</sup>','ᵏ').\nreplace('<sup>l</sup>','ˡ').\nreplace('<sup>m</sup>','ᵐ').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>o</sup>','ᵒ').\nreplace('<sup>p</sup>','ᵖ').\nreplace('<sup>r</sup>','ʳ').\nreplace('<sup>s</sup>','ˢ').\nreplace('<sup>t</sup>','ᵗ').\nreplace('<sup>u</sup>','ᵘ').\nreplace('<sup>v</sup>','ᵛ').\nreplace('<sup>w</sup>','ʷ').\nreplace('<sup>x</sup>','ˣ').\nreplace('<sup>y</sup>','ʸ').\nreplace('<sup>z</sup>','ᶻ').\nreplace('<sup>β</sup>','ᵝ').\nreplace('<sup>γ</sup>','ᵞ').\nreplace('<sup>δ</sup>','ᵟ').\nreplace('<sup>ε</sup>','ᵋ').\nreplace('<sup>θ</sup>','ᶿ').\nreplace('<sup>ι</sup>','ᶥ').\nreplace('<sup>υ</sup>','ᶹ').\nreplace('<sup>φ</sup>','ᵠ').\nreplace('<sup>χ</sup>','ᵡ')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:description",
|
||||
"expression": "grel:value.\nreplace('<sub>0</sub>','₀').\nreplace('<sub>1</sub>','₁').\nreplace('<sub>2</sub>','₂').\nreplace('<sub>3</sub>','₃').\nreplace('<sub>4</sub>','₄').\nreplace('<sub>5</sub>','₅').\nreplace('<sub>6</sub>','₆').\nreplace('<sub>7</sub>','₇').\nreplace('<sub>8</sub>','₈').\nreplace('<sub>9</sub>','₉').\nreplace('<sub>+</sub>','₊').\nreplace('<sub>-</sub>','₋').\nreplace('<sub>=</sub>','₌').\nreplace('<sub>(</sub>','₍').\nreplace('<sub>)</sub>','₎').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>ə</sub>','ₔ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>i</sub>','ᵢ').\nreplace('<sub>j</sub>','ⱼ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>r</sub>','ᵣ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>u</sub>','ᵤ').\nreplace('<sub>v</sub>','ᵥ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>β</sub>','ᵦ').\nreplace('<sub>γ</sub>','ᵧ').\nreplace('<sub>ρ</sub>','ᵨ').\nreplace('<sub>φ</sub>','ᵩ').\nreplace('<sub>χ</sub>','ᵪ').\nreplace('<sup>0</sup>','⁰').\nreplace('<sup>1</sup>','¹').\nreplace('<sup>2</sup>','²').\nreplace('<sup>3</sup>','³').\nreplace('<sup>4</sup>','⁴').\nreplace('<sup>5</sup>','⁵').\nreplace('<sup>6</sup>','⁶').\nreplace('<sup>7</sup>','⁷').\nreplace('<sup>8</sup>','⁸').\nreplace('<sup>9</sup>','⁹').\nreplace('<sup>+</sup>','⁺').\nreplace('<sup>-</sup>','⁻').\nreplace('<sup>=</sup>','⁼').\nreplace('<sup>(</sup>','⁽').\nreplace('<sup>)</sup>','⁾').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>A</sup>','ᴬ').\nreplace('<sup>B</sup>','ᴮ').\nreplace('<sup>D</sup>','ᴰ').\nreplace('<sup>E</sup>','ᴱ').\nreplace('<sup>G</sup>','ᴳ').\nreplace('<sup>H</sup>','ᴴ').\nreplace('<sup>I</sup>','ᴵ').\nreplace('<sup>J</sup>','ᴶ').\nreplace('<sup>K</sup>','ᴷ').\nreplace('<sup>L</sup>','ᴸ').\nreplace('<sup>M</sup>','ᴹ').\nreplace('<sup>N</sup>','ᴺ').\nreplace('<sup>O</sup>','ᴼ').\nreplace('<sup>P</sup>','ᴾ').\nreplace('<sup>R</sup>','ᴿ').\nreplace('<sup>T</sup>','ᵀ').\nreplace('<sup>U</sup>','ᵁ').\nreplace('<sup>V</sup>','ⱽ').\nreplace('<sup>W</sup>','ᵂ').\nreplace('<sup>a</sup>','ᵃ').\nreplace('<sup>b</sup>','ᵇ').\nreplace('<sup>c</sup>','ᶜ').\nreplace('<sup>d</sup>','ᵈ').\nreplace('<sup>e</sup>','ᵉ').\nreplace('<sup>f</sup>','ᶠ').\nreplace('<sup>g</sup>','ᵍ').\nreplace('<sup>h</sup>','ʰ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>j</sup>','ʲ').\nreplace('<sup>k</sup>','ᵏ').\nreplace('<sup>l</sup>','ˡ').\nreplace('<sup>m</sup>','ᵐ').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>o</sup>','ᵒ').\nreplace('<sup>p</sup>','ᵖ').\nreplace('<sup>r</sup>','ʳ').\nreplace('<sup>s</sup>','ˢ').\nreplace('<sup>t</sup>','ᵗ').\nreplace('<sup>u</sup>','ᵘ').\nreplace('<sup>v</sup>','ᵛ').\nreplace('<sup>w</sup>','ʷ').\nreplace('<sup>x</sup>','ˣ').\nreplace('<sup>y</sup>','ʸ').\nreplace('<sup>z</sup>','ᶻ').\nreplace('<sup>β</sup>','ᵝ').\nreplace('<sup>γ</sup>','ᵞ').\nreplace('<sup>δ</sup>','ᵟ').\nreplace('<sup>ε</sup>','ᵋ').\nreplace('<sup>θ</sup>','ᶿ').\nreplace('<sup>ι</sup>','ᶥ').\nreplace('<sup>υ</sup>','ᶹ').\nreplace('<sup>φ</sup>','ᵠ').\nreplace('<sup>χ</sup>','ᵡ')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:description using expression grel:value.\nreplace('<sub>0</sub>','₀').\nreplace('<sub>1</sub>','₁').\nreplace('<sub>2</sub>','₂').\nreplace('<sub>3</sub>','₃').\nreplace('<sub>4</sub>','₄').\nreplace('<sub>5</sub>','₅').\nreplace('<sub>6</sub>','₆').\nreplace('<sub>7</sub>','₇').\nreplace('<sub>8</sub>','₈').\nreplace('<sub>9</sub>','₉').\nreplace('<sub>+</sub>','₊').\nreplace('<sub>-</sub>','₋').\nreplace('<sub>=</sub>','₌').\nreplace('<sub>(</sub>','₍').\nreplace('<sub>)</sub>','₎').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>ə</sub>','ₔ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>i</sub>','ᵢ').\nreplace('<sub>j</sub>','ⱼ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>r</sub>','ᵣ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>u</sub>','ᵤ').\nreplace('<sub>v</sub>','ᵥ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>β</sub>','ᵦ').\nreplace('<sub>γ</sub>','ᵧ').\nreplace('<sub>ρ</sub>','ᵨ').\nreplace('<sub>φ</sub>','ᵩ').\nreplace('<sub>χ</sub>','ᵪ').\nreplace('<sup>0</sup>','⁰').\nreplace('<sup>1</sup>','¹').\nreplace('<sup>2</sup>','²').\nreplace('<sup>3</sup>','³').\nreplace('<sup>4</sup>','⁴').\nreplace('<sup>5</sup>','⁵').\nreplace('<sup>6</sup>','⁶').\nreplace('<sup>7</sup>','⁷').\nreplace('<sup>8</sup>','⁸').\nreplace('<sup>9</sup>','⁹').\nreplace('<sup>+</sup>','⁺').\nreplace('<sup>-</sup>','⁻').\nreplace('<sup>=</sup>','⁼').\nreplace('<sup>(</sup>','⁽').\nreplace('<sup>)</sup>','⁾').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>A</sup>','ᴬ').\nreplace('<sup>B</sup>','ᴮ').\nreplace('<sup>D</sup>','ᴰ').\nreplace('<sup>E</sup>','ᴱ').\nreplace('<sup>G</sup>','ᴳ').\nreplace('<sup>H</sup>','ᴴ').\nreplace('<sup>I</sup>','ᴵ').\nreplace('<sup>J</sup>','ᴶ').\nreplace('<sup>K</sup>','ᴷ').\nreplace('<sup>L</sup>','ᴸ').\nreplace('<sup>M</sup>','ᴹ').\nreplace('<sup>N</sup>','ᴺ').\nreplace('<sup>O</sup>','ᴼ').\nreplace('<sup>P</sup>','ᴾ').\nreplace('<sup>R</sup>','ᴿ').\nreplace('<sup>T</sup>','ᵀ').\nreplace('<sup>U</sup>','ᵁ').\nreplace('<sup>V</sup>','ⱽ').\nreplace('<sup>W</sup>','ᵂ').\nreplace('<sup>a</sup>','ᵃ').\nreplace('<sup>b</sup>','ᵇ').\nreplace('<sup>c</sup>','ᶜ').\nreplace('<sup>d</sup>','ᵈ').\nreplace('<sup>e</sup>','ᵉ').\nreplace('<sup>f</sup>','ᶠ').\nreplace('<sup>g</sup>','ᵍ').\nreplace('<sup>h</sup>','ʰ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>j</sup>','ʲ').\nreplace('<sup>k</sup>','ᵏ').\nreplace('<sup>l</sup>','ˡ').\nreplace('<sup>m</sup>','ᵐ').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>o</sup>','ᵒ').\nreplace('<sup>p</sup>','ᵖ').\nreplace('<sup>r</sup>','ʳ').\nreplace('<sup>s</sup>','ˢ').\nreplace('<sup>t</sup>','ᵗ').\nreplace('<sup>u</sup>','ᵘ').\nreplace('<sup>v</sup>','ᵛ').\nreplace('<sup>w</sup>','ʷ').\nreplace('<sup>x</sup>','ˣ').\nreplace('<sup>y</sup>','ʸ').\nreplace('<sup>z</sup>','ᶻ').\nreplace('<sup>β</sup>','ᵝ').\nreplace('<sup>γ</sup>','ᵞ').\nreplace('<sup>δ</sup>','ᵟ').\nreplace('<sup>ε</sup>','ᵋ').\nreplace('<sup>θ</sup>','ᶿ').\nreplace('<sup>ι</sup>','ᶥ').\nreplace('<sup>υ</sup>','ᶹ').\nreplace('<sup>φ</sup>','ᵠ').\nreplace('<sup>χ</sup>','ᵡ')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:source",
|
||||
"expression": "grel:value.\nreplace('<sub>0</sub>','₀').\nreplace('<sub>1</sub>','₁').\nreplace('<sub>2</sub>','₂').\nreplace('<sub>3</sub>','₃').\nreplace('<sub>4</sub>','₄').\nreplace('<sub>5</sub>','₅').\nreplace('<sub>6</sub>','₆').\nreplace('<sub>7</sub>','₇').\nreplace('<sub>8</sub>','₈').\nreplace('<sub>9</sub>','₉').\nreplace('<sub>+</sub>','₊').\nreplace('<sub>-</sub>','₋').\nreplace('<sub>=</sub>','₌').\nreplace('<sub>(</sub>','₍').\nreplace('<sub>)</sub>','₎').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>ə</sub>','ₔ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>i</sub>','ᵢ').\nreplace('<sub>j</sub>','ⱼ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>r</sub>','ᵣ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>u</sub>','ᵤ').\nreplace('<sub>v</sub>','ᵥ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>β</sub>','ᵦ').\nreplace('<sub>γ</sub>','ᵧ').\nreplace('<sub>ρ</sub>','ᵨ').\nreplace('<sub>φ</sub>','ᵩ').\nreplace('<sub>χ</sub>','ᵪ').\nreplace('<sup>0</sup>','⁰').\nreplace('<sup>1</sup>','¹').\nreplace('<sup>2</sup>','²').\nreplace('<sup>3</sup>','³').\nreplace('<sup>4</sup>','⁴').\nreplace('<sup>5</sup>','⁵').\nreplace('<sup>6</sup>','⁶').\nreplace('<sup>7</sup>','⁷').\nreplace('<sup>8</sup>','⁸').\nreplace('<sup>9</sup>','⁹').\nreplace('<sup>+</sup>','⁺').\nreplace('<sup>-</sup>','⁻').\nreplace('<sup>=</sup>','⁼').\nreplace('<sup>(</sup>','⁽').\nreplace('<sup>)</sup>','⁾').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>A</sup>','ᴬ').\nreplace('<sup>B</sup>','ᴮ').\nreplace('<sup>D</sup>','ᴰ').\nreplace('<sup>E</sup>','ᴱ').\nreplace('<sup>G</sup>','ᴳ').\nreplace('<sup>H</sup>','ᴴ').\nreplace('<sup>I</sup>','ᴵ').\nreplace('<sup>J</sup>','ᴶ').\nreplace('<sup>K</sup>','ᴷ').\nreplace('<sup>L</sup>','ᴸ').\nreplace('<sup>M</sup>','ᴹ').\nreplace('<sup>N</sup>','ᴺ').\nreplace('<sup>O</sup>','ᴼ').\nreplace('<sup>P</sup>','ᴾ').\nreplace('<sup>R</sup>','ᴿ').\nreplace('<sup>T</sup>','ᵀ').\nreplace('<sup>U</sup>','ᵁ').\nreplace('<sup>V</sup>','ⱽ').\nreplace('<sup>W</sup>','ᵂ').\nreplace('<sup>a</sup>','ᵃ').\nreplace('<sup>b</sup>','ᵇ').\nreplace('<sup>c</sup>','ᶜ').\nreplace('<sup>d</sup>','ᵈ').\nreplace('<sup>e</sup>','ᵉ').\nreplace('<sup>f</sup>','ᶠ').\nreplace('<sup>g</sup>','ᵍ').\nreplace('<sup>h</sup>','ʰ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>j</sup>','ʲ').\nreplace('<sup>k</sup>','ᵏ').\nreplace('<sup>l</sup>','ˡ').\nreplace('<sup>m</sup>','ᵐ').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>o</sup>','ᵒ').\nreplace('<sup>p</sup>','ᵖ').\nreplace('<sup>r</sup>','ʳ').\nreplace('<sup>s</sup>','ˢ').\nreplace('<sup>t</sup>','ᵗ').\nreplace('<sup>u</sup>','ᵘ').\nreplace('<sup>v</sup>','ᵛ').\nreplace('<sup>w</sup>','ʷ').\nreplace('<sup>x</sup>','ˣ').\nreplace('<sup>y</sup>','ʸ').\nreplace('<sup>z</sup>','ᶻ').\nreplace('<sup>β</sup>','ᵝ').\nreplace('<sup>γ</sup>','ᵞ').\nreplace('<sup>δ</sup>','ᵟ').\nreplace('<sup>ε</sup>','ᵋ').\nreplace('<sup>θ</sup>','ᶿ').\nreplace('<sup>ι</sup>','ᶥ').\nreplace('<sup>υ</sup>','ᶹ').\nreplace('<sup>φ</sup>','ᵠ').\nreplace('<sup>χ</sup>','ᵡ')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:source using expression grel:value.\nreplace('<sub>0</sub>','₀').\nreplace('<sub>1</sub>','₁').\nreplace('<sub>2</sub>','₂').\nreplace('<sub>3</sub>','₃').\nreplace('<sub>4</sub>','₄').\nreplace('<sub>5</sub>','₅').\nreplace('<sub>6</sub>','₆').\nreplace('<sub>7</sub>','₇').\nreplace('<sub>8</sub>','₈').\nreplace('<sub>9</sub>','₉').\nreplace('<sub>+</sub>','₊').\nreplace('<sub>-</sub>','₋').\nreplace('<sub>=</sub>','₌').\nreplace('<sub>(</sub>','₍').\nreplace('<sub>)</sub>','₎').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>ə</sub>','ₔ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>a</sub>','ₐ').\nreplace('<sub>e</sub>','ₑ').\nreplace('<sub>h</sub>','ₕ').\nreplace('<sub>i</sub>','ᵢ').\nreplace('<sub>j</sub>','ⱼ').\nreplace('<sub>k</sub>','ₖ').\nreplace('<sub>l</sub>','ₗ').\nreplace('<sub>m</sub>','ₘ').\nreplace('<sub>n</sub>','ₙ').\nreplace('<sub>o</sub>','ₒ').\nreplace('<sub>p</sub>','ₚ').\nreplace('<sub>r</sub>','ᵣ').\nreplace('<sub>s</sub>','ₛ').\nreplace('<sub>t</sub>','ₜ').\nreplace('<sub>u</sub>','ᵤ').\nreplace('<sub>v</sub>','ᵥ').\nreplace('<sub>x</sub>','ₓ').\nreplace('<sub>β</sub>','ᵦ').\nreplace('<sub>γ</sub>','ᵧ').\nreplace('<sub>ρ</sub>','ᵨ').\nreplace('<sub>φ</sub>','ᵩ').\nreplace('<sub>χ</sub>','ᵪ').\nreplace('<sup>0</sup>','⁰').\nreplace('<sup>1</sup>','¹').\nreplace('<sup>2</sup>','²').\nreplace('<sup>3</sup>','³').\nreplace('<sup>4</sup>','⁴').\nreplace('<sup>5</sup>','⁵').\nreplace('<sup>6</sup>','⁶').\nreplace('<sup>7</sup>','⁷').\nreplace('<sup>8</sup>','⁸').\nreplace('<sup>9</sup>','⁹').\nreplace('<sup>+</sup>','⁺').\nreplace('<sup>-</sup>','⁻').\nreplace('<sup>=</sup>','⁼').\nreplace('<sup>(</sup>','⁽').\nreplace('<sup>)</sup>','⁾').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>A</sup>','ᴬ').\nreplace('<sup>B</sup>','ᴮ').\nreplace('<sup>D</sup>','ᴰ').\nreplace('<sup>E</sup>','ᴱ').\nreplace('<sup>G</sup>','ᴳ').\nreplace('<sup>H</sup>','ᴴ').\nreplace('<sup>I</sup>','ᴵ').\nreplace('<sup>J</sup>','ᴶ').\nreplace('<sup>K</sup>','ᴷ').\nreplace('<sup>L</sup>','ᴸ').\nreplace('<sup>M</sup>','ᴹ').\nreplace('<sup>N</sup>','ᴺ').\nreplace('<sup>O</sup>','ᴼ').\nreplace('<sup>P</sup>','ᴾ').\nreplace('<sup>R</sup>','ᴿ').\nreplace('<sup>T</sup>','ᵀ').\nreplace('<sup>U</sup>','ᵁ').\nreplace('<sup>V</sup>','ⱽ').\nreplace('<sup>W</sup>','ᵂ').\nreplace('<sup>a</sup>','ᵃ').\nreplace('<sup>b</sup>','ᵇ').\nreplace('<sup>c</sup>','ᶜ').\nreplace('<sup>d</sup>','ᵈ').\nreplace('<sup>e</sup>','ᵉ').\nreplace('<sup>f</sup>','ᶠ').\nreplace('<sup>g</sup>','ᵍ').\nreplace('<sup>h</sup>','ʰ').\nreplace('<sup>i</sup>','ⁱ').\nreplace('<sup>j</sup>','ʲ').\nreplace('<sup>k</sup>','ᵏ').\nreplace('<sup>l</sup>','ˡ').\nreplace('<sup>m</sup>','ᵐ').\nreplace('<sup>n</sup>','ⁿ').\nreplace('<sup>o</sup>','ᵒ').\nreplace('<sup>p</sup>','ᵖ').\nreplace('<sup>r</sup>','ʳ').\nreplace('<sup>s</sup>','ˢ').\nreplace('<sup>t</sup>','ᵗ').\nreplace('<sup>u</sup>','ᵘ').\nreplace('<sup>v</sup>','ᵛ').\nreplace('<sup>w</sup>','ʷ').\nreplace('<sup>x</sup>','ˣ').\nreplace('<sup>y</sup>','ʸ').\nreplace('<sup>z</sup>','ᶻ').\nreplace('<sup>β</sup>','ᵝ').\nreplace('<sup>γ</sup>','ᵞ').\nreplace('<sup>δ</sup>','ᵟ').\nreplace('<sup>ε</sup>','ᵋ').\nreplace('<sup>θ</sup>','ᶿ').\nreplace('<sup>ι</sup>','ᶥ').\nreplace('<sup>υ</sup>','ᶹ').\nreplace('<sup>φ</sup>','ᵠ').\nreplace('<sup>χ</sup>','ᵡ')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"columnName": "dc:description",
|
||||
"expression": "grel:value.replace('<br>','␟').replace('<p>','␟').replace('<li>','␟- ').parseHtml().htmlText().replace('␟','\n').trim()",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:description using expression grel:value.replace('<br>','␟').replace('<p>','␟').replace('<li>','␟- ').parseHtml().htmlText().replace('␟','\n').trim()"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.parseHtml().htmlText()",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.parseHtml().htmlText()"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"columnName": "dc:source",
|
||||
"expression": "grel:value.parseHtml().htmlText()",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:source using expression grel:value.parseHtml().htmlText()"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,71 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:identifier",
|
||||
"columnName": "dc:identifier",
|
||||
"query": "^urn:nbn",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:identifier",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "urn",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column urn at index 2 based on column dc:identifier using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:identifier",
|
||||
"columnName": "dc:identifier",
|
||||
"query": "doi.org/",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:identifier",
|
||||
"expression": "grel:value.replace('https://doi.org/','')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "doi",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column doi at index 2 based on column dc:identifier using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:identifier",
|
||||
"columnName": "dc:identifier",
|
||||
"query": "\\.pdf$",
|
||||
"mode": "regex",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:identifier",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "pdf",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column pdf at index 2 based on column dc:identifier using expression grel:value"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,394 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column id using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "url",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column url using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:identifier",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:identifier using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "doi",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column doi using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "topic",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column topic using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "urn",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column urn using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:description",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:description using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:type",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:subject",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:subject using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:publisher",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:publisher using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ioo",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ioo using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:language",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:language using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:format",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:format using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:description/xml:lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:description/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "cc",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column cc using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:source",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:source using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:contributor",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:contributor using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:creator",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:creator using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:date",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:date using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:coverage/xml:lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:coverage/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:coverage",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:coverage using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:rights/xml:lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:rights/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:rights",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:rights using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:source/xml:lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:source/xml:lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "datestamp",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column datestamp using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "setSpec",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column setSpec using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Blank Rows",
|
||||
"expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
|
||||
"columnName": "",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "true",
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"description": "Remove rows"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,54 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:language",
|
||||
"expression": "grel:forEach(value.split('␞'),v,v.replace(/^deu$/,'ger')).join('␞')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:language using expression grel:forEach(value.split('␞'),v,v.replace(/^deu$/,'ger')).join('␞')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:coverage/xml:lang",
|
||||
"expression": "grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:coverage/xml:lang using expression grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:description/xml:lang",
|
||||
"expression": "grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:description/xml:lang using expression grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"expression": "grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title/xml:lang using expression grel:forEach(value.split('␞'),v,v.replace(/^de$/,'ger').replace(/^en$/,'eng').replace(/^es$/,'spa').replace(/^fr$/,'fre').replace(/^it$/,'ita').replace(/^sk$/,'slo')).join('␞')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "url",
|
||||
"expression": "jython:import httplib\nimport urlparse\nstatus = []\nfor x in value.split(u'\\u241e'):\n url = urlparse.urlparse(x)\n conn = httplib.HTTPConnection(url[1])\n conn.request(\"HEAD\", url[2])\n res = conn.getresponse()\n status.append(str(res.status))\nreturn ','.join(status)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "linkcheck",
|
||||
"columnInsertIndex": 3,
|
||||
"description": "Create column linkcheck at index 3 based on column url using expression jython:import httplib\nimport urlparse\nstatus = []\nfor x in value.split(u'\\u241e'):\n url = urlparse.urlparse(x)\n conn = httplib.HTTPConnection(url[1])\n conn.request(\"HEAD\", url[2])\n res = conn.getresponse()\n status.append(str(res.status))\nreturn ','.join(status)"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,181 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition-by-fetching-urls",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "urn",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "urn",
|
||||
"urlExpression": "grel:'https://nbn-resolving.org/process-urn-form?identifier=' + value + '&verb=FULL&xml=on'",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "nbn-resolving",
|
||||
"columnInsertIndex": 2,
|
||||
"delay": 0,
|
||||
"cacheResponses": true,
|
||||
"httpHeadersJson": [
|
||||
{
|
||||
"name": "authorization",
|
||||
"value": ""
|
||||
},
|
||||
{
|
||||
"name": "user-agent",
|
||||
"value": "OpenRefine 3.4 [6443506]"
|
||||
},
|
||||
{
|
||||
"name": "accept",
|
||||
"value": "*/*"
|
||||
}
|
||||
],
|
||||
"description": "Create column nbn-resolving at index 2 by fetching URLs based on column urn using expression grel:'https://nbn-resolving.org/process-urn-form?identifier=' + value + '&verb=FULL&xml=on'"
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "urn",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "urn",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "nbn-resolving",
|
||||
"expression": "grel:value.parseXml().select('pidef|pidef pidef|data pidef|resolving_information pidef|url_info pidef|url')[0].htmlText()",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "url",
|
||||
"columnInsertIndex": 3,
|
||||
"description": "Create column url at index 3 based on column nbn-resolving using expression grel:value.parseXml().select('pidef|pidef pidef|data pidef|resolving_information pidef|url_info pidef|url')[0].htmlText()"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "nbn-resolving",
|
||||
"description": "Remove column nbn-resolving"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "url",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column url using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "pdf",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column pdf using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "url",
|
||||
"expression": "grel:if(value.contains('.pdf'),value,cells['pdf'].value)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column url using expression grel:if(value.contains('.pdf'),value,cells['pdf'].value)"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "pdf",
|
||||
"description": "Remove column pdf"
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "url",
|
||||
"expression": "grel:row.record.cells['url'].value.join('').contains('.pdf')",
|
||||
"columnName": "url",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"description": "Remove rows"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,48 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:title",
|
||||
"expression": "grel:with(['a', 'das', 'dem', 'den', 'der', 'des', 'die', 'ein', 'eine', 'einem', 'einen', 'einer', 'eines', 'the'],x,if(inArray(x,value.split(' ')[0].toLowercase()),value.split(' ')[0] + ' ',''))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "nonsort",
|
||||
"columnInsertIndex": 14,
|
||||
"description": "Create column nonsort at index 14 based on column dc:title using expression grel:with(['a', 'das', 'dem', 'den', 'der', 'des', 'die', 'ein', 'eine', 'einem', 'einen', 'einer', 'eines', 'the'],x,if(inArray(x,value.split(' ')[0].toLowercase()),value.split(' ')[0] + ' ',''))"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "nonsort",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "nonsort",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.split(' ').slice(1).join(' ')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.split(' ').slice(1).join(' ')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,35 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:identifier",
|
||||
"expression": "grel:\"Bergische Universität Wuppertal\"",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "dc:publisher",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column dc:publisher at index 2 based on column dc:identifier using expression grel:\"Bergische Universität Wuppertal\""
|
||||
}
|
||||
]
|
|
@ -0,0 +1,15 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "cc",
|
||||
"expression": "grel:value.replace('https://creativecommons.org/licenses/','CC ').replace('/',' ').trim().toUppercase()",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column cc using expression grel:value.replace('https://creativecommons.org/licenses/','CC ').replace('/',' ').trim().toUppercase()"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,25 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:rights",
|
||||
"columnName": "dc:rights",
|
||||
"query": "creativecommons",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:rights",
|
||||
"expression": "grel:filter(value.ngram(1),v,v.contains('creativecommons.org'))[0].replace(/\\.$/,'').replace('(','').replace(')','').replace('http://','https://').replace('deed.de','')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "cc",
|
||||
"columnInsertIndex": 20,
|
||||
"description": "Create column cc at index 20 based on column dc:rights using expression grel:filter(value.ngram(1),v,v.contains('creativecommons.org'))[0].replace(/\\.$/,'').replace('(','').replace(')','').replace('http://','https://').replace('deed.de','')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,48 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:subject",
|
||||
"columnName": "dc:subject",
|
||||
"query": "Fakultät",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:subject",
|
||||
"expression": "grel:value.replace(' » Dissertationen','')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "ioo",
|
||||
"columnInsertIndex": 5,
|
||||
"description": "Create column ioo at index 5 based on column dc:subject using expression grel:value.replace(' » Dissertationen','')"
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:subject",
|
||||
"columnName": "dc:subject",
|
||||
"query": "Fakultät",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": true
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:subject",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "topic",
|
||||
"columnInsertIndex": 5,
|
||||
"description": "Create column topic at index 5 based on column dc:subject using expression grel:value"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,161 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:title/xml:lang",
|
||||
"expression": "value",
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "de␞de",
|
||||
"l": "de␞de"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.replace('␞','␟')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.replace('␞','␟')"
|
||||
},
|
||||
{
|
||||
"op": "core/mass-edit",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"expression": "value",
|
||||
"edits": [
|
||||
{
|
||||
"from": [
|
||||
"de␞de"
|
||||
],
|
||||
"fromBlank": false,
|
||||
"fromError": false,
|
||||
"to": "de"
|
||||
}
|
||||
],
|
||||
"description": "Mass edit cells in column dc:title/xml:lang"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:title/xml:lang",
|
||||
"expression": "value",
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "en␞en",
|
||||
"l": "en␞en"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.replace('␞','␟')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.replace('␞','␟')"
|
||||
},
|
||||
{
|
||||
"op": "core/mass-edit",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"expression": "value",
|
||||
"edits": [
|
||||
{
|
||||
"from": [
|
||||
"en␞en"
|
||||
],
|
||||
"fromBlank": false,
|
||||
"fromError": false,
|
||||
"to": "en"
|
||||
}
|
||||
],
|
||||
"description": "Mass edit cells in column dc:title/xml:lang"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:title/xml:lang",
|
||||
"expression": "value",
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "es␞es",
|
||||
"l": "es␞es"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.replace('␞','␟')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.replace('␞','␟')"
|
||||
},
|
||||
{
|
||||
"op": "core/mass-edit",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title/xml:lang",
|
||||
"expression": "value",
|
||||
"edits": [
|
||||
{
|
||||
"from": [
|
||||
"es␞es"
|
||||
],
|
||||
"fromBlank": false,
|
||||
"fromError": false,
|
||||
"to": "es"
|
||||
}
|
||||
],
|
||||
"description": "Mass edit cells in column dc:title/xml:lang"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,70 @@
|
|||
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:mods="http://www.loc.gov/mods/v3"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||
<mets:dmdSec ID="DMD_wuppertal_elpub_{{cells['id'].value.split(':').reverse()[0].escape('xml')}}">
|
||||
<mets:mdWrap MIMETYPE="text/xml" MDTYPE="MODS">
|
||||
<mets:xmlData>
|
||||
<mods xmlns="http://www.loc.gov/mods/v3" version="3.7"
|
||||
xmlns:vl="http://visuallibrary.net/vl">{{forEachIndex(cells['dc:title'].value.split('␞'), i, v, '
|
||||
<titleInfo' + forNonBlank(cells['dc:title/xml:lang'].value.split('␞')[i], v, ' lang="' + v.escape('xml') + '"', '') + if(i != 0, ' type="translated"', '') + '>' + forNonBlank(cells['nonsort'].value, z,'
|
||||
<nonSort>' + z.escape('xml') + '</nonSort>', '') + '
|
||||
<title>'+v.split('␟')[0].escape('xml')+'</title>'+forNonBlank(v.split('␟')[1], v, '
|
||||
<subTitle>' + v.escape('xml') + '</subTitle>', '')+'
|
||||
</titleInfo>').join('')}}
|
||||
<name type="personal">
|
||||
<displayForm>{{cells['dc:creator'].value.escape('xml')}}</displayForm>
|
||||
<namePart type="family">{{cells['dc:creator'].value.split(',')[0].escape('xml')}}</namePart>{{forNonBlank(cells['dc:creator'].value.split(',')[1].trim(),v,'
|
||||
<namePart type="given">' + v.escape('xml') + '</namePart>','')}}
|
||||
<role>
|
||||
<roleTerm type="code" authority="marcrelator">aut</roleTerm>
|
||||
</role>
|
||||
</name>{{forNonBlank(cells['dc:contributor'].value,x,forEach(x.split('␞'),v,'
|
||||
<name type="personal">
|
||||
<displayForm>'+ v.escape('xml') +'</displayForm>
|
||||
<namePart type="family">' + v.split(',')[0].escape('xml') + '</namePart>
|
||||
<namePart type="given">' + v.split(',')[1].trim().escape('xml') + '</namePart>
|
||||
<role>
|
||||
<roleTerm type="code" authority="marcrelator">ctb</roleTerm>
|
||||
</role>
|
||||
</name>').join(''),'')}}
|
||||
<typeOfResource>text</typeOfResource>
|
||||
<genre authority="dini">doctoralThesis</genre>{{forNonBlank(cells['dc:date'].value,v,'
|
||||
<originInfo>
|
||||
<dateIssued encoding="w3cdtf" keyDate="yes">' + v.escape('xml') + '</dateIssued>
|
||||
</originInfo>','')}}{{forNonBlank(cells['dc:language'].value,v,'
|
||||
<language>
|
||||
<languageTerm type="code" authority="iso639-2b">' + v.escape('xml') + '</languageTerm>
|
||||
</language>','')}}{{forNonBlank(cells['dc:description'].value, x, forEachIndex(x.split('␞'), i, v, '
|
||||
<abstract type="content"' + forNonBlank(cells['dc:description/xml:lang'].value.split('␞')[i], z, ' lang="' + z.escape('xml') + '"','') + '>' + v.escape('xml') + '</abstract>').join(''),'')}}{{forNonBlank(cells['topic'].value,x,'
|
||||
<subject>' + forEach(x.split('␞'),v,'
|
||||
<topic>' + v.escape('xml') + '</topic>').join('') + '
|
||||
</subject>','')}}{{forNonBlank(cells['setSpec'].value,x,forEach(x.split('␞'),v,'
|
||||
<classification authority="ddc">' + v.escape('xml') + '</classification>').join(''),'')}}{{forNonBlank(cells['ioo'].value,x,forEach(x.split('␞'),v,'
|
||||
<classification authority="ioo" displayLabel="' + v.escape('xml') + '"></classification>').join(''),'')}}
|
||||
<identifier type="urn">{{cells['urn'].value.escape('xml')}}</identifier>{{forNonBlank(cells['doi'].value,v,'
|
||||
<identifier type="doi">' + v.escape('xml') + '</identifier>','')}}{{forNonBlank(cells['hbz'].value,v,'
|
||||
<identifier type="sys">' + v.escape('xml') + '</identifier>','')}}{{forNonBlank(cells['cc'].value,v,'
|
||||
<accessCondition type="use and reproduction">' + v.escape('xml') + '</accessCondition>','')}}
|
||||
<recordInfo>
|
||||
<recordIdentifier>wuppertal_elpub_{{cells['id'].value.split(':').reverse()[0].escape('xml')}}</recordIdentifier>
|
||||
</recordInfo>
|
||||
<extension>
|
||||
<vl:doctype>oaDoctoralThesis</vl:doctype>
|
||||
</extension>
|
||||
</mods>
|
||||
</mets:xmlData>
|
||||
</mets:mdWrap>
|
||||
</mets:dmdSec>
|
||||
<mets:fileSec>
|
||||
<mets:fileGrp USE="pdf upload">
|
||||
<mets:file MIMETYPE="application/pdf" ID="FILE_wuppertal_elpub_{{cells['id'].value.split(':').reverse()[0].escape('xml')}}">
|
||||
<mets:FLocat xlink:href="{{cells['url'].value.escape('xml')}}" LOCTYPE="URL"/>
|
||||
</mets:file>
|
||||
</mets:fileGrp>
|
||||
</mets:fileSec>
|
||||
<mets:structMap TYPE="LOGICAL">
|
||||
<mets:div TYPE="document" ID="wuppertal_elpub_{{cells['id'].value.split(':').reverse()[0].escape('xml')}}"
|
||||
DMDID="DMD_wuppertal_elpub_{{cells['id'].value.split(':').reverse()[0].escape('xml')}}">
|
||||
<mets:fptr FILEID="FILE_wuppertal_elpub_{{cells['id'].value.split(':').reverse()[0].escape('xml')}}"/>
|
||||
</mets:div>
|
||||
</mets:structMap>
|
||||
</mets:mets>
|
|
@ -0,0 +1,139 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-move",
|
||||
"columnName": "Record - header - identifier",
|
||||
"index": 0,
|
||||
"description": "Move column Record - header - identifier to position 0"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "Record - metadata - oai_dc:dc - xsi:schemaLocation",
|
||||
"description": "Remove column Record - metadata - oai_dc:dc - xsi:schemaLocation"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - header - identifier",
|
||||
"newColumnName": "id",
|
||||
"description": "Rename column Record - header - identifier to id"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:identifier",
|
||||
"newColumnName": "dc:identifier",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:identifier to dc:identifier"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:subject",
|
||||
"newColumnName": "dc:subject",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:subject to dc:subject"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:type",
|
||||
"newColumnName": "dc:type",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:type to dc:type"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:description",
|
||||
"newColumnName": "dc:description",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:description to dc:description"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:description - xml:lang",
|
||||
"newColumnName": "dc:description/xml:lang",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:description - xml:lang to dc:description/xml:lang"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:title",
|
||||
"newColumnName": "dc:title",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:title to dc:title"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:title - xml:lang",
|
||||
"newColumnName": "dc:title/xml:lang",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:title - xml:lang to dc:title/xml:lang"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:format",
|
||||
"newColumnName": "dc:format",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:format to dc:format"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:language",
|
||||
"newColumnName": "dc:language",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:language to dc:language"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:date",
|
||||
"newColumnName": "dc:date",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:date to dc:date"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:creator",
|
||||
"newColumnName": "dc:creator",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:creator to dc:creator"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:contributor",
|
||||
"newColumnName": "dc:contributor",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:contributor to dc:contributor"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:source",
|
||||
"newColumnName": "dc:source",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:source to dc:source"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:source - xml:lang",
|
||||
"newColumnName": "dc:source/xml:lang",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:source - xml:lang to dc:source/xml:lang"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:rights",
|
||||
"newColumnName": "dc:rights",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:rights to dc:rights"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:rights - xml:lang",
|
||||
"newColumnName": "dc:rights/xml:lang",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:rights - xml:lang to dc:rights/xml:lang"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:coverage",
|
||||
"newColumnName": "dc:coverage",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:coverage to dc:coverage"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - metadata - oai_dc:dc - dc:coverage - xml:lang",
|
||||
"newColumnName": "dc:coverage/xml:lang",
|
||||
"description": "Rename column Record - metadata - oai_dc:dc - dc:coverage - xml:lang to dc:coverage/xml:lang"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - header - setSpec",
|
||||
"newColumnName": "setSpec",
|
||||
"description": "Rename column Record - header - setSpec to setSpec"
|
||||
},
|
||||
{
|
||||
"op": "core/column-rename",
|
||||
"oldColumnName": "Record - header - datestamp",
|
||||
"newColumnName": "datestamp",
|
||||
"description": "Rename column Record - header - datestamp to datestamp"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,130 @@
|
|||
# https://taskfile.dev
|
||||
|
||||
version: '3'
|
||||
|
||||
tasks:
|
||||
default:
|
||||
desc: harvesten und transformieren
|
||||
deps: [harvest]
|
||||
cmds:
|
||||
- task: refine
|
||||
- task: check
|
||||
- task: split
|
||||
- task: validate
|
||||
- task: zip
|
||||
|
||||
harvest:
|
||||
desc: nur harvesten
|
||||
dir: data/wuppertal/harvest
|
||||
cmds:
|
||||
- METHA_DIR=$PWD metha-sync --format oai_dc http://elpub.bib.uni-wuppertal.de/servlets/OAIDataProvider
|
||||
- METHA_DIR=$PWD metha-cat --format oai_dc http://elpub.bib.uni-wuppertal.de/servlets/OAIDataProvider > wuppertal.xml
|
||||
|
||||
refine:
|
||||
dir: data/wuppertal/refine
|
||||
ignore_error: true # provisorisch verwaisten Java-Prozess bei Exit vermeiden https://github.com/go-task/task/issues/141
|
||||
env:
|
||||
PORT: 3335
|
||||
RAM: 8G
|
||||
PROJECT: wuppertal
|
||||
cmds:
|
||||
# OpenRefine starten
|
||||
- $OPENREFINE -v warn -p $PORT -m $RAM -d $PWD > openrefine.log 2>&1 &
|
||||
- timeout 30s bash -c "until curl -s http://localhost:$PORT | cat | grep -q -o OpenRefine ; do sleep 1; done"
|
||||
# Import (erfordert absoluten Pfad zur XML-Datei)
|
||||
- $OPENREFINE_CLIENT -P $PORT --create "$(readlink -e ../harvest/wuppertal.xml)" --recordPath Records --recordPath Record --storeEmptyStrings false --trimStrings true --projectName $PROJECT
|
||||
# Vorverarbeitung: Identifier in erste Spalte; nicht benötigte Spalten (ohne differenzierende Merkmale) löschen; verbleibende Spalten umbenennen (Pfad entfernen)
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/vorverarbeitung.json $PROJECT
|
||||
# Entfernen von HTML-Tags und Transformation von subscript und superscript in Unicode (betrifft dc:description, dc:source und dc:title)
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/html.json $PROJECT
|
||||
# DDC einheitlich auf drei Ziffern vereinheitlichen (betrifft dc:subjects und oai:setSpec)
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/ddc.json $PROJECT
|
||||
# dc:publisher setzen
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/publisher.json $PROJECT
|
||||
# URNs, DOIs und PDF-Links aus dc:identifier extrahieren
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/identifier.json $PROJECT
|
||||
# Direktlinks generieren durch Abgleich der URNs mit nbn-resolving und Datensätze ohne Direktlink auf ein PDF löschen
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/nbn.json $PROJECT
|
||||
# Aufteilung dc:subject in ioo und topic
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/subjects.json $PROJECT
|
||||
# Standardisierte Rechteangaben Teil 1 (Links zu CC-Lizenzen)
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/rights.json $PROJECT
|
||||
# Datenstruktur für Templating vorbereiten: Pro Zeile ein Datensatz und leere Zeilen löschen
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/join.json $PROJECT
|
||||
# Zusammenführung gleichsprachiger Titelangaben zu Title/Subtitle
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/subtitle.json $PROJECT
|
||||
# Sprachangaben nach ISO-639-2b (betrifft dc:language sowie die xml:lang Attribute für dc:coverage, dc:description und dc:title)
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/language.json $PROJECT
|
||||
# Standardisierte Rechteangaben Teil 2 (Canonical Name für CC-Lizenzen)
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/rights-cc.json $PROJECT
|
||||
# Anreicherung HT-Nummer via lobid-resources
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/hbz.json $PROJECT
|
||||
# Sortierung mods:nonSort für das erste Element in dc:title
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/nonsort.json $PROJECT
|
||||
# Links prüfen: HTTP status code ermitteln (z.B. 200)
|
||||
- $OPENREFINE_CLIENT -P $PORT --apply ../../../rules/wuppertal/linkcheck.json $PROJECT
|
||||
# Export in METS:MODS mit Templating
|
||||
- |
|
||||
$OPENREFINE_CLIENT -P $PORT --export --template "$(< ../../../rules/wuppertal/template.txt)" --rowSeparator "
|
||||
<!-- SPLIT -->
|
||||
" --output wuppertal.txt $PROJECT
|
||||
# Export für Debugging
|
||||
- $OPENREFINE_CLIENT -P $PORT --export --output wuppertal-debug.tsv $PROJECT
|
||||
# OpenRefine beenden
|
||||
- ps -o start,etime,%mem,%cpu,rss -p $(lsof -t -i:$PORT) # Statistik
|
||||
- kill -9 $(lsof -t -i:$PORT) # SIGKILL (-9) verhindert unnötigen Speichervorgang
|
||||
- rm -rf ./*.project* && rm -f workspace.json # temporäre Dateien von OpenRefine löschen
|
||||
sources:
|
||||
- ../harvest/wuppertal.xml
|
||||
- ../../../rules/wuppertal/*.json
|
||||
- ../../../rules/wuppertal/template.txt
|
||||
# - ../../../rules/common/*.json
|
||||
generates:
|
||||
- wuppertal.txt
|
||||
- wuppertal-debug.tsv
|
||||
|
||||
check:
|
||||
dir: data/wuppertal/refine
|
||||
cmds:
|
||||
# Logdatei von OpenRefine auf Warnungen und Fehlermeldungen prüfen
|
||||
- if grep -i 'exception\|error' openrefine.log; then echo 1>&2 "log contains warnings!" && exit 1; fi
|
||||
|
||||
split:
|
||||
dir: data/wuppertal/split
|
||||
cmds:
|
||||
# in Einzeldateien aufteilen
|
||||
- csplit -q ../refine/wuppertal.txt --suppress-matched '/<!-- SPLIT -->/' "{*}"
|
||||
# ggf. vorhandene XML-Dateien löschen
|
||||
- rm -f *.xml
|
||||
# Identifier als Dateinamen
|
||||
- for f in xx*; do mv "$f" "$(xmllint --xpath "//*[local-name(.) = 'recordIdentifier']/text()" "$f").xml"; done
|
||||
sources:
|
||||
- ../refine/wuppertal.txt
|
||||
generates:
|
||||
- ./*.xml
|
||||
|
||||
validate:
|
||||
dir: data/wuppertal/
|
||||
cmds:
|
||||
# Validierung gegen METS Schema
|
||||
- wget -q -nc https://www.loc.gov/standards/mets/mets.xsd
|
||||
- xmllint --schema mets.xsd --noout split/*.xml > validate.log 2>&1
|
||||
sources:
|
||||
- split/*.xml
|
||||
generates:
|
||||
- validate.log
|
||||
|
||||
zip:
|
||||
dir: data/wuppertal/
|
||||
cmds:
|
||||
# ZIP-Archiv mit Zeitstempel erstellen
|
||||
- zip -q -FS -j wuppertal_{{.DATE}}.zip split/*.xml
|
||||
sources:
|
||||
- split/*.xml
|
||||
generates:
|
||||
- wuppertal_{{.DATE}}.zip
|
||||
|
||||
delete:
|
||||
desc: cache löschen
|
||||
cmds:
|
||||
- rm -rf data/wuppertal
|
Loading…
Reference in New Issue