Alephino Basis

This commit is contained in:
Felix Lohmeier 2022-02-04 00:33:19 +01:00
parent c44a328c9d
commit 35998bc0f9
10 changed files with 316 additions and 465 deletions

View File

@ -79,10 +79,10 @@ tasks:
- task: :check # check OpenRefine log for any warnings and exit on error
vars: {DIR: '{{.DIR}}'}
sources:
- input/{{.PROJECT}}.imp
- input/{{.PROJECT}}*.txt
- config/pre/**
generates:
- output/{{.PROJECT}}.tsv
- output/{{.PROJECT}}*.tsv
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
refine-main:
@ -113,15 +113,20 @@ tasks:
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}}
- > # Bibliothekskürzel aus Import-Dateiname
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
- > # spec_A_E_01: Signatur 7100a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
# - > # TODO: Spalte 2199 muss vorne stehen, weil für Sortierung benötigt
- > # spec_Z_04: PPN anreichern über ISBN
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/ppn.json > {{.LOG}}
- > # spec_Z_05: Exemplare clustern
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/clustern.json > {{.LOG}}
- | # Exemplardaten
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/E00X.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100.json > {{.LOG}}
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8200.json > {{.LOG}}
- | # Titeldaten
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/2199.json > {{.LOG}}
- > # Titel ohne Exemplare löschen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/abschluss.json > {{.LOG}}
# - > # spec_Z_04: PPN anreichern über ISBN
# "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/ppn.json > {{.LOG}}
# - > # spec_Z_05: Exemplare clustern
# "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/clustern.json > {{.LOG}}
- mkdir -p output
- > # Export dubletter Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427
- > # Export der Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/barcodes.txt)"
--template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}"
@ -148,7 +153,8 @@ tasks:
- config/main/**
generates:
- log/{{.PROJECT}}.openrefine.tar.gz
# - output/{{.PROJECT}}.csv
- output/alephino.txt
- output/barcodes.txt
ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141
default: # enable standalone execution (running `task` in project directory)

View File

@ -0,0 +1,15 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "M|IDN",
"expression": "grel:'BA' + cells['File'].value + value",
"onError": "set-to-blank",
"newColumnName": "2199",
"columnInsertIndex": 1,
"description": "Create column 2199 at index 1 based on column M|IDN using expression grel:'BA' + cells['File'].value + value"
}
]

View File

@ -0,0 +1,152 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|100",
"expression": "grel:value.split('\u001f')[0].slice(1).trim()",
"onError": "set-to-blank",
"newColumnName": "7100a",
"columnInsertIndex": 3,
"description": "Create column 7100a at index 3 based on column E|100 using expression grel:value.split('\u001f')[0].slice(1).trim()"
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|001",
"expression": "isBlank(value)",
"columnName": "E|001",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "File",
"expression": "grel:if(value == 'LE', '0005', '0006')",
"onError": "set-to-blank",
"newColumnName": "7100j",
"columnInsertIndex": 3,
"description": "Create column 7100j at index 3 based on column File using expression grel:if(value == 'LE', '0005', '0006')"
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|001",
"expression": "isBlank(value)",
"columnName": "E|001",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "LE",
"l": "LE"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "File",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "7100f",
"columnInsertIndex": 3,
"description": "Create column 7100f at index 3 based on column File using expression grel:value"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|001",
"expression": "isBlank(value)",
"columnName": "E|001",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "File",
"expression": "value",
"columnName": "File",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": "RS",
"l": "RS"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "7100f",
"expression": "grel:if(cells['E|107'].value.contains('Beuth'),'RS-BD', if(cells['E|105'].value == '00002','RS-MAG', if(cells['E|105'].value == '00003','RS-TH', if(cells['E|105'].value == '00004','RS-ZS','RS'))))",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column 7100f using expression grel:if(cells['E|107'].value.contains('Beuth'),'RS-BD', if(cells['E|105'].value == '00002','RS-MAG', if(cells['E|105'].value == '00003','RS-TH', if(cells['E|105'].value == '00004','RS-ZS','RS'))))"
}
]

View File

@ -1,14 +0,0 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|100",
"expression": "grel:value.split('\u001f')[0].slice(1)",
"onError": "set-to-blank",
"newColumnName": "7100a",
"columnInsertIndex": 5
}
]

View File

@ -0,0 +1,15 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "E|120",
"expression": "grel:value",
"onError": "set-to-blank",
"newColumnName": "8200",
"columnInsertIndex": 3,
"description": "Create column 8200 at index 40 based on column E|120 using expression grel:value"
}
]

View File

@ -0,0 +1,68 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|001",
"expression": "isBlank(value)",
"columnName": "E|001",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "File",
"expression": "grel:'n' + value.toLowercase()",
"onError": "set-to-blank",
"newColumnName": "E0XXb",
"columnInsertIndex": 3,
"description": "Create column E0XXb at index 3 based on column File using expression grel:'n' + value.toLowercase()"
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "E|001",
"expression": "isBlank(value)",
"columnName": "E|001",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "E|002a",
"expression": "grel:value[6,8] + '-' + value[4,6] + '-' + value[2,4]",
"onError": "set-to-blank",
"newColumnName": "E0XX",
"columnInsertIndex": 3,
"description": "Create column E0XX at index 41 based on column E|002a using expression grel:value[6,8] + '-' + value[4,6] + '-' + value[2,4]"
}
]

View File

@ -0,0 +1,49 @@
[
{
"op": "core/row-removal",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "isBlank(value)",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
},
{
"type": "list",
"name": "E0XX",
"expression": "isBlank(value)",
"columnName": "E0XX",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"description": "Remove rows"
}
]

View File

@ -1,138 +0,0 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "isBlank(value)",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "2199",
"expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)",
"onError": "set-to-blank",
"newColumnName": "ppn",
"columnInsertIndex": 1
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "ppn",
"expression": "isBlank(value)",
"columnName": "ppn",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "ppn",
"expression": "grel:row.record.cells[columnName].value[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-reorder",
"mode": "record-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "ppn",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
}
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "ppn",
"expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 0
},
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "id",
"expression": "isBlank(value)",
"columnName": "id",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
},
"columnName": "2199",
"expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "ppn"
}
]

View File

@ -1,292 +0,0 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "2000",
"expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 3
},
{
"op": "core/column-split",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "tmp",
"guessCellType": false,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "␟",
"regex": false,
"maxColumns": 0
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "tmp 1"
},
{
"op": "core/column-removal",
"columnName": "tmp 2"
}
]

View File

@ -9,16 +9,6 @@
"columnName": "E|001",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|029",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|026f",
"index": 0
},
{
"op": "core/column-move",
"columnName": "M|IDN",