Automatisierung OPUS Siegen
This commit is contained in:
parent
bd4f9c182a
commit
a71e86eba8
|
@ -0,0 +1,3 @@
|
|||
data
|
||||
openrefine
|
||||
.task
|
125
README.md
125
README.md
|
@ -1,2 +1,123 @@
|
|||
# noah
|
||||
Harvesting von OAI-PMH-Schnittstellen und Transformation in METS/MODS für das Portal noah.nrw
|
||||
# Datenintegration für noah.nrw
|
||||
Harvesting von OAI-PMH-Schnittstellen und Transformation in METS/MODS für das Portal [noah.nrw](https://noah.nrw/)
|
||||
|
||||
## Datenfluss
|
||||
|
||||
![Datenflussdiagramm](/home/felix/git/noah/flowchart.svg)
|
||||
|
||||
## Verwendete Tools
|
||||
|
||||
* Harvesting (mit Cache): [metha](https://github.com/miku/metha/)
|
||||
* Transformation: [OpenRefine](https://github.com/OpenRefine/OpenRefine) und [openrefine-client](https://github.com/opencultureconsulting/openrefine-client)
|
||||
* Task Runner: [Task](https://github.com/go-task/task)
|
||||
|
||||
## Systemvoraussetzungen
|
||||
|
||||
* GNU/Linux (getestet mit Fedora 32)
|
||||
* JAVA 8+
|
||||
|
||||
## Installation
|
||||
|
||||
1. Git Repository klonen
|
||||
|
||||
```sh
|
||||
git clone https://github.com/opencultureconsulting/noah.git
|
||||
cd noah
|
||||
```
|
||||
|
||||
2. [OpenRefine 3.4.1](https://github.com/OpenRefine/OpenRefine/releases/tag/3.4.1) (benötigt JAVA 8+)
|
||||
|
||||
```sh
|
||||
# download OpenRefine
|
||||
wget -O openrefine.tar.gz https://github.com/OpenRefine/OpenRefine/releases/download/3.4.1/openrefine-linux-3.4.1.tar.gz
|
||||
# install OpenRefine in subdirectory openrefine
|
||||
mkdir -p openrefine
|
||||
tar -xzf openrefine.tar.gz -C openrefine --strip 1 && rm openrefine.tar.gz
|
||||
# do not try to open OpenRefine in browser
|
||||
sed -i '$ a JAVA_OPTIONS=-Drefine.headless=true' "openrefine/refine.ini"
|
||||
# set autosave period from 5 minutes to 25 hours
|
||||
sed -i 's/#REFINE_AUTOSAVE_PERIOD=60/REFINE_AUTOSAVE_PERIOD=1440/' "openrefine/refine.ini"
|
||||
```
|
||||
|
||||
3. [openrefine-client 0.3.10](https://github.com/opencultureconsulting/openrefine-client/releases/tag/v0.3.10)
|
||||
|
||||
```sh
|
||||
# install openrefine-client in subdirectory openrefine
|
||||
mkdir -p openrefine
|
||||
wget -O openrefine/openrefine-client https://github.com/opencultureconsulting/openrefine-client/releases/download/v0.3.10/openrefine-client_0-3-10_linux
|
||||
chmod +x openrefine/openrefine-client
|
||||
```
|
||||
|
||||
4. [metha 0.2.20](https://github.com/miku/metha/releases/tag/v0.2.20)
|
||||
|
||||
a) RPM-basiert (Fedora, CentOS, SLES, etc.)
|
||||
|
||||
```sh
|
||||
# download and install rpm package
|
||||
wget https://github.com/miku/metha/releases/download/v0.2.20/metha-0.2.20-0.x86_64.rpm
|
||||
sudo dnf install ./metha-0.2.20-0.x86_64.rpm && rm metha-0.2.20-0.x86_64.rpm
|
||||
```
|
||||
|
||||
b) DEB-basiert (Debian, Ubuntu etc.)
|
||||
|
||||
```sh
|
||||
# download and install deb package
|
||||
wget https://github.com/miku/metha/releases/download/v0.2.20/metha_0.2.20_amd64.deb
|
||||
sudo apt install ./metha_0.2.20_amd64.deb && rm metha_0.2.20_amd64.deb
|
||||
```
|
||||
|
||||
5. [Task 3.2.2](https://github.com/go-task/task/releases/tag/v3.2.2)
|
||||
|
||||
a) RPM-basiert (Fedora, CentOS, SLES, etc.)
|
||||
|
||||
```sh
|
||||
# download and install rpm package
|
||||
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.rpm
|
||||
sudo dnf install ./task_linux_amd64.rpm && rm task_linux_amd64.rpm
|
||||
```
|
||||
|
||||
b) DEB-basiert (Debian, Ubuntu etc.)
|
||||
|
||||
```sh
|
||||
# download and install deb package
|
||||
wget https://github.com/go-task/task/releases/download/v3.2.2/task_linux_amd64.deb
|
||||
sudo apt install ./task_linux_amd64.deb && rm task_linux_amd64.deb
|
||||
```
|
||||
|
||||
## Nutzung
|
||||
|
||||
* Alle Datenquellen harvesten und transformieren (parallelisiert)
|
||||
|
||||
```
|
||||
task default
|
||||
```
|
||||
|
||||
* Eine Datenquelle harvesten und transformieren
|
||||
|
||||
```
|
||||
task siegen:default
|
||||
```
|
||||
|
||||
* Cache einer Datenquelle löschen
|
||||
|
||||
```
|
||||
task siegen:delete
|
||||
```
|
||||
|
||||
* Verfügbare Tasks auflisten
|
||||
|
||||
```
|
||||
task --list
|
||||
```
|
||||
|
||||
## Konfiguration
|
||||
|
||||
* Umgebungsvariablen in [Taskfile.yml](Taskfile.yml)
|
||||
* Workflow für die Datenquellen in [tasks](tasks)
|
||||
* Beispiel: [tasks/siegen.yml](tasks/siegen.yml)
|
||||
* Transformationsregeln in [rules](rules)
|
||||
* Beispiel: [rules/siegen/hbz.json](rules/siegen/hbz.json)
|
||||
|
||||
## OAI-PMH Data Provider
|
||||
|
||||
Für die Bereitstellung der transformierten Daten wird der dateibasierte OAI-PMH Data Provider [oai_pmh](https://github.com/opencultureconsulting/oai_pmh) genutzt. Installations- und Nutzungshinweise sind dort zu finden.
|
|
@ -0,0 +1,35 @@
|
|||
# https://taskfile.dev
|
||||
|
||||
version: '3'
|
||||
|
||||
#silent: true
|
||||
output: prefixed
|
||||
|
||||
includes:
|
||||
siegen: ./tasks/siegen.yml
|
||||
# wuppertal: ./tasks/wuppertal.yml
|
||||
|
||||
vars:
|
||||
DATE: '{{ now | date "2006-01-02"}}'
|
||||
|
||||
env:
|
||||
OPENREFINE:
|
||||
sh: readlink -e openrefine/refine
|
||||
OPENREFINE_CLIENT:
|
||||
sh: readlink -e openrefine/openrefine-client
|
||||
|
||||
tasks:
|
||||
default:
|
||||
desc: alle Datenquellen harvesten und transformieren (parallel)
|
||||
preconditions:
|
||||
- sh: test -n "$(command -v metha-sync)"
|
||||
msg: "requirement metha missing"
|
||||
- sh: test -n "$(command -v java)"
|
||||
msg: "requirement JAVA runtime environment (jre) missing"
|
||||
- sh: test -x "$OPENREFINE"
|
||||
msg: "requirement OpenRefine missing"
|
||||
- sh: test -x "$OPENREFINE_CLIENT"
|
||||
msg: "requirement openrefine-client missing"
|
||||
deps:
|
||||
# - task: wuppertal:default
|
||||
- task: siegen:default
|
|
@ -0,0 +1,19 @@
|
|||
graph LR
|
||||
wuppertal[elpub.bib.uni-wuppertal.de] --- metha_wuppertal
|
||||
click wuppertal "http://elpub.bib.uni-wuppertal.de/servlets/OAIDataProvider?verb=ListRecords&metadataPrefix=oai_dc" _blank
|
||||
siegen[dspace.ub.uni-siegen.de] --- metha_siegen
|
||||
click siegen "https://dspace.ub.uni-siegen.de/oai/request?verb=ListRecords&metadataPrefix=xMetaDissPlus" _blank
|
||||
subgraph Harvesting
|
||||
metha_wuppertal["fa:fa-cogs metha"]
|
||||
metha_siegen["fa:fa-cogs metha"]
|
||||
end
|
||||
subgraph Transformation
|
||||
metha_wuppertal -->|Dublin Core| refine_wuppertal[fa:fa-cogs OpenRefine]
|
||||
metha_siegen -->|xMetaDissPlus| refine_siegen[fa:fa-cogs OpenRefine]
|
||||
end
|
||||
subgraph OAI-PMH Data Provider
|
||||
refine_wuppertal -->|METS/MODS| oai_wuppertal["noah.opencultureconsulting.com/ubw/"]
|
||||
click oai_wuppertal "https://noah.opencultureconsulting.com/ubw/?verb=ListRecords&metadataPrefix=mets" _blank
|
||||
refine_siegen -->|METS/MODS| oai_siegen["noah.opencultureconsulting.com/ubs/"]
|
||||
click oai_siegen "https://noah.opencultureconsulting.com/ubs/?verb=ListRecords&metadataPrefix=mets" _blank
|
||||
end
|
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 15 KiB |
|
@ -0,0 +1,25 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "text",
|
||||
"name": "dc:rights",
|
||||
"columnName": "dc:rights",
|
||||
"query": "creativecommons.org",
|
||||
"mode": "text",
|
||||
"caseSensitive": false,
|
||||
"invert": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:rights",
|
||||
"expression": "grel:value.replace('https://','').replace('http://','').replace('creativecommons.org/licenses/','CC ').replace('/',' ').trim().toUppercase()",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "cc",
|
||||
"columnInsertIndex": 23,
|
||||
"description": "Create column cc at index 23 based on column dc:rights using expression grel:value.replace('https://','').replace('http://','').replace('creativecommons.org/licenses/','CC ').replace('/',' ').trim().toUppercase()"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,74 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:subject/xsi:type",
|
||||
"expression": "value",
|
||||
"columnName": "dc:subject/xsi:type",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "xMetaDiss:DDC-SG",
|
||||
"l": "xMetaDiss:DDC-SG"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:subject",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "ddc",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column ddc at index 2 based on column dc:subject using expression grel:value"
|
||||
},
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:subject/xsi:type",
|
||||
"expression": "value",
|
||||
"columnName": "dc:subject/xsi:type",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "xMetaDiss:noScheme",
|
||||
"l": "xMetaDiss:noScheme"
|
||||
}
|
||||
},
|
||||
{
|
||||
"v": {
|
||||
"v": "xMetaDiss:SWD",
|
||||
"l": "xMetaDiss:SWD"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:subject",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "topic",
|
||||
"columnInsertIndex": 2,
|
||||
"description": "Create column topic at index 2 based on column dc:subject using expression grel:value"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,35 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:type/xsi:type",
|
||||
"expression": "value",
|
||||
"columnName": "dc:type/xsi:type",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "dini:PublType",
|
||||
"l": "dini:PublType"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:type",
|
||||
"expression": "grel:value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "dini",
|
||||
"columnInsertIndex": 7,
|
||||
"description": "Create column dini at index 7 based on column dc:type using expression grel:value"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,127 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition-by-fetching-urls",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "ddb:transfer",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "ddb:transfer",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"urlExpression": "grel:'https://dspace.ub.uni-siegen.de/oai/request?verb=GetRecord&metadataPrefix=mets&identifier=' + value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "mets",
|
||||
"columnInsertIndex": 1,
|
||||
"delay": 0,
|
||||
"cacheResponses": true,
|
||||
"httpHeadersJson": [
|
||||
{
|
||||
"name": "authorization",
|
||||
"value": ""
|
||||
},
|
||||
{
|
||||
"name": "user-agent",
|
||||
"value": "OpenRefine 3.4.1 [437dc4d]"
|
||||
},
|
||||
{
|
||||
"name": "accept",
|
||||
"value": "*/*"
|
||||
}
|
||||
],
|
||||
"description": "Create column mets at index 1 by fetching URLs based on column id using expression grel:'https://dspace.ub.uni-siegen.de/oai/request?verb=GetRecord&metadataPrefix=mets&identifier=' + value"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "ddb:transfer",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "ddb:transfer",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": true,
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddb:transfer",
|
||||
"expression": "grel:forEach(cells['mets'].value.parseXml().select('FLocat'),v,v.xmlAttr('xlink:href')).join('␞')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddb:transfer using expression grel:forEach(cells['mets'].value.parseXml().select('FLocat'),v,v.xmlAttr('xlink:href')).join('␞')"
|
||||
},
|
||||
{
|
||||
"op": "core/column-removal",
|
||||
"columnName": "mets",
|
||||
"description": "Remove column mets"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,68 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:type/xsi:type",
|
||||
"expression": "value",
|
||||
"columnName": "dc:type/xsi:type",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "dini:PublType",
|
||||
"l": "dini:PublType"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:type",
|
||||
"expression": "grel:with([ ['article','oaArticle'], ['bachelorThesis','oaBachelorThesis'], ['book','oaBook'], ['bookPart','oaBookPart'], ['conferenceObject','conferenceObject'], ['doctoralThesis','oaDoctoralThesis'], ['masterThesis','oaMasterThesis'], ['PeriodicalPart','journal issue'], ['StudyThesis','oaStudyThesis'], ['Other','oaBdOther'] ], x, forEach(x, v, if(value == v[0], v[1], null)).join(''))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "doctype",
|
||||
"columnInsertIndex": 7,
|
||||
"description": "Create column doctype at index 7 based on column dc:type using expression grel:with([ ['article','oaArticle'], ['bachelorThesis','oaBachelorThesis'], ['book','oaBook'], ['bookPart','oaBookPart'], ['conferenceObject','conferenceObject'], ['doctoralThesis','oaDoctoralThesis'], ['masterThesis','oaMasterThesis'], ['StudyThesis','oaStudyThesis'], ['Other','oaBdOther'] ], x, forEach(x, v, if(value == v[0], v[1], null)).join(''))"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "thesis:level",
|
||||
"expression": "value",
|
||||
"columnName": "thesis:level",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "thesis.habilitation",
|
||||
"l": "thesis.habilitation"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "doctype",
|
||||
"expression": "grel:'oaHabil'",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column doctype using expression grel:'oaHabil'"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,84 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition-by-fetching-urls",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "id",
|
||||
"urlExpression": "grel:'https://dspace.ub.uni-siegen.de/oai/request?verb=GetRecord&metadataPrefix=oai_dc&identifier=' + value",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "doi",
|
||||
"columnInsertIndex": 1,
|
||||
"delay": 0,
|
||||
"cacheResponses": true,
|
||||
"httpHeadersJson": [
|
||||
{
|
||||
"name": "authorization",
|
||||
"value": ""
|
||||
},
|
||||
{
|
||||
"name": "user-agent",
|
||||
"value": "OpenRefine 3.4.1 [437dc4d]"
|
||||
},
|
||||
{
|
||||
"name": "accept",
|
||||
"value": "*/*"
|
||||
}
|
||||
],
|
||||
"description": "Create column doi at index 1 by fetching URLs based on column id using expression grel:'https://dspace.ub.uni-siegen.de/oai/request?verb=GetRecord&metadataPrefix=oai_dc&identifier=' + value"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "doi",
|
||||
"expression": "grel:forNonBlank(filter(value.parseXml().select('dc|identifier'),v,v.xmlAttr('xsi:type') == 'doi:doi')[0].ownText(),v,v,null)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc using expression grel:forNonBlank(filter(value.parseXml().select('dc|identifier'),v,v.xmlAttr('xsi:type') == 'doi:doi')[0].ownText(),v,v,null)"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,84 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition-by-fetching-urls",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:identifier",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "dc:identifier",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:identifier",
|
||||
"urlExpression": "grel:'https://lobid.org/resources/search?q=' + forEach(value.split('␞'),v,'urn:\"'+v+'\"').join('+OR+')",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "hbz",
|
||||
"columnInsertIndex": 11,
|
||||
"delay": 0,
|
||||
"cacheResponses": true,
|
||||
"httpHeadersJson": [
|
||||
{
|
||||
"name": "authorization",
|
||||
"value": ""
|
||||
},
|
||||
{
|
||||
"name": "user-agent",
|
||||
"value": "OpenRefine 3.4.1 [437dc4d]"
|
||||
},
|
||||
{
|
||||
"name": "accept",
|
||||
"value": "*/*"
|
||||
}
|
||||
],
|
||||
"description": "Create column hbz at index 11 by fetching URLs based on column dc:identifier using expression grel:'https://lobid.org/resources/search?q=' + forEach(value.split('␞'),v,'urn:\"'+v+'\"').join('+OR+')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "dc:identifier",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "dc:identifier",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "hbz",
|
||||
"expression": "grel:forNonBlank(value.parseJson().member[0].hbzId,v,v,null)",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column lobid using expression grel:forNonBlank(value.parseJson().member[0].hbzId,v,v,null)"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,693 @@
|
|||
[
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "doi",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column doi using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "id",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column id using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:subject",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:subject using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "topic",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column topic using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:subject/xsi:type",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:subject/xsi:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddc",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddc using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:identifier",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:identifier using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:creator/pc:foreName",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:creator/pc:foreName using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:creator/pc:surName",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:creator/pc:surName using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:type/xsi:type",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:type/xsi:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:type",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "nonsort",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column nonsort using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:abstract/lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:abstract/lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:abstract",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:abstract using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "hbz",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column hbz using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:publisher/cc:address",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:publisher/cc:address using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddb:fileSize",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddb:fileSize using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddb:fileName",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddb:fileName using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title/ddb:type",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title/ddb:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title/lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title/lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:rights",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:rights using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:language",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:language using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:publisher/cc:place",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:publisher/cc:place using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:publisher/cc:name",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:publisher/cc:name using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:publisher/cc:GKD-Nr",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:publisher/cc:GKD-Nr using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddb:checksum",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddb:checksum using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:issued",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:issued using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddb:identifier",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddb:identifier using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddb:fileNumber",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddb:fileNumber using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "cc",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column cc using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:dateAccepted",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:dateAccepted using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "thesis:grantor",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column thesis:grantor using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "thesis:level",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column thesis:level using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "mime",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column mime using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "ddb:transfer",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column ddb:transfer using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:contributor/thesis:role",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:contributor/thesis:role using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:source/xsi:type",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:source/xsi:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:source",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:source using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:isPartOf",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:isPartOf using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:isPartOf/xsi:type",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:isPartOf/xsi:type using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:hasVersion",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:hasVersion using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:alternative/lang",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:alternative/lang using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dcterms:alternative",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dcterms:alternative using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:contributor/pc:foreName",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:contributor/pc:foreName using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:contributor/pc:surName",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:contributor/pc:surName using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "datestamp",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column datestamp using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "setSpec",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column setSpec using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dini",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dini using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "doctype",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column doctype using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "linkcheck",
|
||||
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column linkcheck using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),'')"
|
||||
},
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "Blank Rows",
|
||||
"expression": "(filter(row.columnNames,cn,isNonBlank(cells[cn].value)).length()==0).toString()",
|
||||
"columnName": "",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": "true",
|
||||
"l": "true"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"description": "Remove rows"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,35 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "ddb:transfer",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "ddb:transfer",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "ddb:transfer",
|
||||
"expression": "jython:import httplib\nimport urlparse\nstatus = []\nfor x in value.split(u'\\u241e'):\n url = urlparse.urlparse(x)\n conn = httplib.HTTPConnection(url[1])\n conn.request(\"HEAD\", url[2])\n res = conn.getresponse()\n status.append(str(res.status))\nreturn ','.join(status)",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "linkcheck",
|
||||
"columnInsertIndex": 34,
|
||||
"description": "Create column linkcheck at index 34 based on column ddb:transfer using expression jython:import httplib\nimport urlparse\nstatus = []\nfor x in value.split(u'\\u241e'):\n url = urlparse.urlparse(x)\n conn = httplib.HTTPConnection(url[1])\n conn.request(\"HEAD\", url[2])\n res = conn.getresponse()\n status.append(str(res.status))\nreturn ','.join(status)"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,35 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "ddb:transfer",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "ddb:transfer",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "ddb:transfer",
|
||||
"expression": "grel:with([ ['pdf','application/pdf'], ['exe','application/x-msdownload'], ['zip','application/zip'] ], x, forEach(value.split('␞'), v, forEach(x, z, if(v.endsWith(z[0]), z[1], null)).join('')).join('␞'))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "mime",
|
||||
"columnInsertIndex": 29,
|
||||
"description": "Create column mime at index 29 based on column ddb:transfer using expression grel:with([ ['pdf','application/pdf'], ['exe','application/x-msdownload'], ['zip','application/zip'] ], x, forEach(value.split('␞'), v, forEach(x, z, if(v.endsWith(z[0]), z[1], null)).join('')).join('␞'))"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,87 @@
|
|||
[
|
||||
{
|
||||
"op": "core/column-addition",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"baseColumnName": "dc:title",
|
||||
"expression": "grel:with(['a', 'das', 'dem', 'den', 'der', 'des', 'die', 'ein', 'eine', 'einem', 'einen', 'einer', 'eines', 'the'],x,if(inArray(x,value.split(' ')[0].toLowercase()),value.split(' ')[0] + ' ',''))",
|
||||
"onError": "set-to-blank",
|
||||
"newColumnName": "nonsort",
|
||||
"columnInsertIndex": 15,
|
||||
"description": "Create column nonsort at index 15 based on column dc:title using expression grel:with(['a', 'das', 'dem', 'den', 'der', 'des', 'die', 'ein', 'eine', 'einem', 'einen', 'einer', 'eines', 'the'],x,if(inArray(x,value.split(' ')[0].toLowercase()),value.split(' ')[0] + ' ',''))"
|
||||
},
|
||||
{
|
||||
"op": "core/text-transform",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "id",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "id",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
},
|
||||
{
|
||||
"type": "list",
|
||||
"name": "nonsort",
|
||||
"expression": "isBlank(value)",
|
||||
"columnName": "nonsort",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "row-based"
|
||||
},
|
||||
"columnName": "dc:title",
|
||||
"expression": "grel:value.split(' ').slice(1).join(' ')",
|
||||
"onError": "keep-original",
|
||||
"repeat": false,
|
||||
"repeatCount": 10,
|
||||
"description": "Text transform on cells in column dc:title using expression grel:value.split(' ').slice(1).join(' ')"
|
||||
}
|
||||
]
|
|
@ -0,0 +1,30 @@
|
|||
[
|
||||
{
|
||||
"op": "core/row-removal",
|
||||
"engineConfig": {
|
||||
"facets": [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "ddb:transfer",
|
||||
"expression": "grel:row.record.cells['ddb:transfer'].value.join('').contains('.pdf')",
|
||||
"columnName": "ddb:transfer",
|
||||
"invert": false,
|
||||
"omitBlank": false,
|
||||
"omitError": false,
|
||||
"selection": [
|
||||
{
|
||||
"v": {
|
||||
"v": false,
|
||||
"l": "false"
|
||||
}
|
||||
}
|
||||
],
|
||||
"selectBlank": false,
|
||||
"selectError": false
|
||||
}
|
||||
],
|
||||
"mode": "record-based"
|
||||
},
|
||||
"description": "Remove rows"
|
||||
}
|
||||