182 lines
5.1 KiB
JSON
182 lines
5.1 KiB
JSON
[
|
|
{
|
|
"op": "core/column-addition-by-fetching-urls",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "id",
|
|
"expression": "isBlank(value)",
|
|
"columnName": "urn",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": false,
|
|
"l": "false"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
},
|
|
"baseColumnName": "urn",
|
|
"urlExpression": "grel:'https://nbn-resolving.org/process-urn-form?identifier=' + value + '&verb=FULL&xml=on'",
|
|
"onError": "set-to-blank",
|
|
"newColumnName": "nbn-resolving",
|
|
"columnInsertIndex": 2,
|
|
"delay": 0,
|
|
"cacheResponses": true,
|
|
"httpHeadersJson": [
|
|
{
|
|
"name": "authorization",
|
|
"value": ""
|
|
},
|
|
{
|
|
"name": "user-agent",
|
|
"value": "OpenRefine 3.4 [6443506]"
|
|
},
|
|
{
|
|
"name": "accept",
|
|
"value": "*/*"
|
|
}
|
|
],
|
|
"description": "Create column nbn-resolving at index 2 by fetching URLs based on column urn using expression grel:'https://nbn-resolving.org/process-urn-form?identifier=' + value + '&verb=FULL&xml=on'"
|
|
},
|
|
{
|
|
"op": "core/column-addition",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "urn",
|
|
"expression": "isBlank(value)",
|
|
"columnName": "urn",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": false,
|
|
"l": "false"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
},
|
|
"baseColumnName": "nbn-resolving",
|
|
"expression": "grel:value.parseXml().select('pidef|pidef pidef|data pidef|resolving_information pidef|url_info pidef|url')[0].htmlText()",
|
|
"onError": "set-to-blank",
|
|
"newColumnName": "url",
|
|
"columnInsertIndex": 3,
|
|
"description": "Create column url at index 3 based on column nbn-resolving using expression grel:value.parseXml().select('pidef|pidef pidef|data pidef|resolving_information pidef|url_info pidef|url')[0].htmlText()"
|
|
},
|
|
{
|
|
"op": "core/column-removal",
|
|
"columnName": "nbn-resolving",
|
|
"description": "Remove column nbn-resolving"
|
|
},
|
|
{
|
|
"op": "core/text-transform",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "url",
|
|
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)",
|
|
"onError": "keep-original",
|
|
"repeat": false,
|
|
"repeatCount": 10,
|
|
"description": "Text transform on cells in column url using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)"
|
|
},
|
|
{
|
|
"op": "core/text-transform",
|
|
"engineConfig": {
|
|
"facets": [],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "pdf",
|
|
"expression": "grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)",
|
|
"onError": "keep-original",
|
|
"repeat": false,
|
|
"repeatCount": 10,
|
|
"description": "Text transform on cells in column pdf using expression grel:if(isNonBlank(cells['id'].value),row.record.cells[columnName].value.join('␞'),null)"
|
|
},
|
|
{
|
|
"op": "core/text-transform",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "id",
|
|
"expression": "isBlank(value)",
|
|
"columnName": "id",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": false,
|
|
"l": "false"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "row-based"
|
|
},
|
|
"columnName": "url",
|
|
"expression": "grel:if(value.contains('.pdf'),value,cells['pdf'].value)",
|
|
"onError": "keep-original",
|
|
"repeat": false,
|
|
"repeatCount": 10,
|
|
"description": "Text transform on cells in column url using expression grel:if(value.contains('.pdf'),value,cells['pdf'].value)"
|
|
},
|
|
{
|
|
"op": "core/column-removal",
|
|
"columnName": "pdf",
|
|
"description": "Remove column pdf"
|
|
},
|
|
{
|
|
"op": "core/row-removal",
|
|
"engineConfig": {
|
|
"facets": [
|
|
{
|
|
"type": "list",
|
|
"name": "url",
|
|
"expression": "grel:row.record.cells['url'].value.join('').toLowercase().contains('.pdf')",
|
|
"columnName": "url",
|
|
"invert": false,
|
|
"omitBlank": false,
|
|
"omitError": false,
|
|
"selection": [
|
|
{
|
|
"v": {
|
|
"v": false,
|
|
"l": "false"
|
|
}
|
|
}
|
|
],
|
|
"selectBlank": false,
|
|
"selectError": false
|
|
}
|
|
],
|
|
"mode": "record-based"
|
|
},
|
|
"description": "Remove rows"
|
|
}
|
|
]
|