Export aus Task Alephino direkt in PICA+

This commit is contained in:
Felix Lohmeier 2021-05-03 12:54:14 +02:00
parent c00d0d5e6e
commit 101b41fa7b
6 changed files with 540 additions and 7 deletions

View File

@ -55,6 +55,10 @@ Analyse dubletter Barcodes
task barcodes:main
```
## Hinweise
* Ursprünglich war eine Zusammenführung der Daten aus Bibliotheca und Alephino bei der Datenmigration geplant. Der Task "pica+" ist dafür ausgelegt, aber wurde letztlich nur für Bibliotheca genutzt. Für Alephino erfolgt der Export in pica+ direkt im Job "Alephino" ohne Zwischenschritt.
## Systemvoraussetzungen
* GNU/Linux (getestet auf Fedora 32)

View File

@ -116,13 +116,26 @@ tasks:
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}}
- > # spec_A_E_01: Signatur 7100a
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}}
# - > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt
# mkdir -p output &&
# "$CLIENT" -P {{.PORT}} {{.PROJECT}}
# --output "$(readlink -m output/{{.PROJECT}}.csv)"
# --template "$(< config/main/template.txt)"
# --rowSeparator ""
# > {{.LOG}}
# - > # TODO: Spalte 2199 muss vorne stehen, weil für Sortierung benötigt
- > # spec_Z_04: PPN anreichern über ISBN
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/ppn.json > {{.LOG}}
- > # spec_Z_05: Exemplare clustern
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/clustern.json > {{.LOG}}
- mkdir -p output
- > # Export dubletter Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/barcodes.txt)"
--template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}"
--rowSeparator ""
> {{.LOG}}
- > # spec_Z_06: Dublette Barcodes löschen
"$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/dedup.json > {{.LOG}}
- > # Export als PICA+
"$CLIENT" -P {{.PORT}} {{.PROJECT}}
--output "$(readlink -m output/{{.PROJECT}}.txt)"
--template "$(< config/template.txt)"
--rowSeparator ""
> {{.LOG}}
- | # print allocated system resources
PID="$(lsof -t -i:{{.PORT}})"
echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}}

View File

@ -0,0 +1,138 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "isBlank(value)",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": false,
"l": "false"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"baseColumnName": "2199",
"expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)",
"onError": "set-to-blank",
"newColumnName": "ppn",
"columnInsertIndex": 1
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "ppn",
"expression": "isBlank(value)",
"columnName": "ppn",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "ppn",
"expression": "grel:row.record.cells[columnName].value[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/row-reorder",
"mode": "record-based",
"sorting": {
"criteria": [
{
"valueType": "string",
"column": "ppn",
"blankPosition": 2,
"errorPosition": 1,
"reverse": false,
"caseSensitive": false
}
]
}
},
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "ppn",
"expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))",
"onError": "set-to-blank",
"newColumnName": "id",
"columnInsertIndex": 0
},
{
"op": "core/blank-down",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "id"
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "id",
"expression": "isBlank(value)",
"columnName": "id",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "record-based"
},
"columnName": "2199",
"expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "ppn"
}
]

View File

@ -0,0 +1,35 @@
[
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "8200",
"expression": "facetCount(value, 'value', '8200') > 1",
"columnName": "8200",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "8200",
"expression": "null",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10,
"description": "Text transform on cells in column 8200 using expression null"
}
]

View File

@ -0,0 +1,292 @@
[
{
"op": "core/column-addition",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"baseColumnName": "2000",
"expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')",
"onError": "set-to-blank",
"newColumnName": "tmp",
"columnInsertIndex": 3
},
{
"op": "core/column-split",
"engineConfig": {
"facets": [],
"mode": "row-based"
},
"columnName": "tmp",
"guessCellType": false,
"removeOriginalColumn": true,
"mode": "separator",
"separator": "␟",
"regex": false,
"maxColumns": 0
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0100",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/text-transform",
"engineConfig": {
"facets": [
{
"type": "list",
"name": "2199",
"expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))",
"columnName": "2199",
"invert": false,
"omitBlank": false,
"omitError": false,
"selection": [
{
"v": {
"v": true,
"l": "true"
}
}
],
"selectBlank": false,
"selectError": false
}
],
"mode": "row-based"
},
"columnName": "0110",
"expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]",
"onError": "keep-original",
"repeat": false,
"repeatCount": 10
},
{
"op": "core/column-removal",
"columnName": "tmp 1"
},
{
"op": "core/column-removal",
"columnName": "tmp 2"
}
]

View File

@ -0,0 +1,51 @@
{{
if(row.index - row.record.fromRowIndex == 0,
'' + '\n'
+ forNonBlank(cells['0500'].value, v, '002@ ' + '0' + v + '\n', '')
+ forNonBlank(cells['0501a'].value, v, '002C ' + 'a' + v + forNonBlank(cells['0501b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0502a'].value, v, '002D ' + 'a' + v + forNonBlank(cells['0502b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0503a'].value, v, '002E ' + 'a' + v + forNonBlank(cells['0503b'].value, v, 'b' + v, '') + '\n', '')
+ forNonBlank(cells['0100'].value, v, '003@ ' + '0' + v + '\n', '')
+ forNonBlank(cells['0110'].value, v, '003S ' + '0' + v + '\n', '')
+ forNonBlank(cells['2000'].value, v, forEach(v.split('␟'),x,'004A ' + '0' + x + '\n').join(''), '')
+ forNonBlank(cells['2009'].value, v, forEach(v.split('␟'),x,'004D ' + '0' + x + '\n').join(''), '')
+ forNonBlank(cells['2020'].value, v, '004F ' + '0' + v + '\n', '')
+ forNonBlank(cells['2201'].value, v, '004L ' + '0' + v + '\n', '')
+ forNonBlank(cells['2199'].value, v, forEach(v.split('␟'),x,'006Y ' + '0' + x + '\n').join(''), '')
+ forNonBlank(cells['2240'].value, v, '007G ' + 'iZDB' + '0' + v + '\n', '')
+ forNonBlank(cells['1500'].value, v, '010@ ' + forEach(v.split('␟'),x,'a' + x).join('') + '\n', '')
+ forNonBlank(cells['1100a'].value, v, '011@ ' + 'a' + v + forNonBlank(cells['1100b'].value, v, 'b' + v, '') + forNonBlank(cells['1100n'].value, v, 'n' + v, '') + '\n', '')
+ forNonBlank(cells['1131'].value, v, '013D ' + 'a' + v + '\n', '')
+ forNonBlank(cells['1140'].value, v, '013H ' + 'a' + v + '\n', '')
+ forNonBlank(cells['4000a'].value, v, '021A ' + 'a' + v + forNonBlank(cells['4000d'].value, v, 'd' + v, '') + forNonBlank(cells['4000h'].value, v, 'h' + v, '') + '\n', '')
+ forNonBlank(cells['4002'].value, v, '021G ' + 'a' + v + '\n', '')
+ forNonBlank(cells['3210'].value, v, '022A/00 ' + 'a' + v + '\n', '')
+ forNonBlank(cells['3000'].value, v, '028A ' + v + '\n', '')
+ forNonBlank(cells['3010'].value, v, forEach(v.split('␟'),x,'028C ' + x + '\n').join(''), '')
+ forNonBlank(cells['3110'].value, v, forEach(v.split('␟'),x,'029F ' + x + '\n').join(''), '')
+ forNonBlank(cells['4020a'].value, v, '032@ ' + 'a' + v + '\n', '')
+ if(or(isNonBlank(cells['4030n'].value),isNonBlank(cells['4030p'].value)),'033A ' + forNonBlank(cells['4030p'].value, v, 'p' + v, '') + forNonBlank(cells['4030n'].value, v, 'n' + v,'') + '\n', '')
+ forNonBlank(cells['4060'].value, v, '034D ' + 'a' + v + '\n', '')
+ forNonBlank(cells['4062'].value, v, '034I ' + 'a' + v + '\n', '')
+ forNonBlank(cells['4061'].value, v, '034M ' + 'a' + v + '\n', '')
+ forNonBlank(cells['4150'].value, v, '036C/00 ' + v + '\n', '')
+ forNonBlank(cells['4160'].value, v, '036D ' + v + '\n', '')
+ forNonBlank(cells['4170_1'].value, v, '036E/00 ' + v + '\n', '')
+ forNonBlank(cells['4170_2'].value, v, '036E/01 ' + v + '\n', '')
+ forNonBlank(cells['4204'].value, v, '037C ' + 'a' + v + '\n', '')
+ forNonBlank(cells['0999'].value, v, '046W ' + 'a' + v + '\n', '')
,'')
}}{{
if(isNonBlank(cells['E0XXb'].value),
with(with(rowIndex - row.record.fromRowIndex + 1, i, '00'[0,2-i.length()] + i),exnr,
'208@/' + exnr + ' a' + cells['E0XX'].value + 'b' + cells['E0XXb'].value + '\n'
+ '209A/' + exnr + ' b4736' + 'j' + cells['7100j'].value + 'f' + cells['7100f'].value + forNonBlank(cells['7100a'].value, v, 'a' + v, '') + forNonBlank(cells['7100d'].value, v, 'd' + v, '') + 'x00' + '\n'
+ forNonBlank(cells['8011'].value, v, '209B/' + exnr + ' a' + v + 'x11' + '\n', '')
+ forNonBlank(cells['8100'].value, v, '209C/' + exnr + ' a' + v + 'x00' + '\n', '')
+ forNonBlank(cells['8200'].value, v, '209G/' + exnr + ' a' + v + '\n', '')
+ forNonBlank(cells['8600'].value, v, '209O/' + exnr + ' a' + v + 'x00' + '\n', '')
+ forNonBlank(cells['8515'].value, v, '220B/' + exnr + ' a' + v + '\n', '')
+ forNonBlank(cells['6800'].value, v, forEachIndex(v.split('␟'), i, x, '244Z/' + exnr + ' a' + x.trim() + 'x' + '00'[0,2-i.split('␟').length().length()] + i + '\n').join(''), '')
+ forNonBlank(cells['67XX'].value, v, forEachIndex(v.split('||'), i, x, '245Z/' + exnr + ' a' + x.trim() + 'x' + '00'[0,2-i.split('␟').length().length()] + i + '\n').join(''), '')
), '')
}}