diff --git a/README.md b/README.md index 96caf31..11fbd8d 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,10 @@ Analyse dubletter Barcodes task barcodes:main ``` +## Hinweise + +* Ursprünglich war eine Zusammenführung der Daten aus Bibliotheca und Alephino bei der Datenmigration geplant. Der Task "pica+" ist dafür ausgelegt, aber wurde letztlich nur für Bibliotheca genutzt. Für Alephino erfolgt der Export in pica+ direkt im Job "Alephino" ohne Zwischenschritt. + ## Systemvoraussetzungen * GNU/Linux (getestet auf Fedora 32) diff --git a/alephino/Taskfile.yml b/alephino/Taskfile.yml index 0915697..98750c2 100644 --- a/alephino/Taskfile.yml +++ b/alephino/Taskfile.yml @@ -116,13 +116,26 @@ tasks: "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}} - > # spec_A_E_01: Signatur 7100a "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}} -# - > # Export der PICA3-Spalten als CSV; Spalte 2199 muss vorne stehen, weil später für Sortierung benötigt -# mkdir -p output && -# "$CLIENT" -P {{.PORT}} {{.PROJECT}} -# --output "$(readlink -m output/{{.PROJECT}}.csv)" -# --template "$(< config/main/template.txt)" -# --rowSeparator "" -# > {{.LOG}} +# - > # TODO: Spalte 2199 muss vorne stehen, weil für Sortierung benötigt + - > # spec_Z_04: PPN anreichern über ISBN + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/ppn.json > {{.LOG}} + - > # spec_Z_05: Exemplare clustern + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/clustern.json > {{.LOG}} + - mkdir -p output + - > # Export dubletter Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427 + "$CLIENT" -P {{.PORT}} {{.PROJECT}} + --output "$(readlink -m output/barcodes.txt)" + --template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}" + --rowSeparator "" + > {{.LOG}} + - > # spec_Z_06: Dublette Barcodes löschen + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/dedup.json > {{.LOG}} + - > # Export als PICA+ + "$CLIENT" -P {{.PORT}} {{.PROJECT}} + --output "$(readlink -m output/{{.PROJECT}}.txt)" + --template "$(< config/template.txt)" + --rowSeparator "" + > {{.LOG}} - | # print allocated system resources PID="$(lsof -t -i:{{.PORT}})" echo "used $(($(ps --no-headers -o rss -p "$PID") / 1024)) MB RAM" > {{.LOG}} diff --git a/alephino/config/main/clustern.json b/alephino/config/main/clustern.json new file mode 100644 index 0000000..17b9284 --- /dev/null +++ b/alephino/config/main/clustern.json @@ -0,0 +1,138 @@ +[ + { + "op": "core/column-addition", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "isBlank(value)", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "baseColumnName": "2199", + "expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)", + "onError": "set-to-blank", + "newColumnName": "ppn", + "columnInsertIndex": 1 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "ppn", + "expression": "isBlank(value)", + "columnName": "ppn", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "ppn", + "expression": "grel:row.record.cells[columnName].value[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/row-reorder", + "mode": "record-based", + "sorting": { + "criteria": [ + { + "valueType": "string", + "column": "ppn", + "blankPosition": 2, + "errorPosition": 1, + "reverse": false, + "caseSensitive": false + } + ] + } + }, + { + "op": "core/column-addition", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "baseColumnName": "ppn", + "expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))", + "onError": "set-to-blank", + "newColumnName": "id", + "columnInsertIndex": 0 + }, + { + "op": "core/blank-down", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "id" + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "id", + "expression": "isBlank(value)", + "columnName": "id", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "record-based" + }, + "columnName": "2199", + "expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/column-removal", + "columnName": "ppn" + } +] diff --git a/alephino/config/main/dedup.json b/alephino/config/main/dedup.json new file mode 100644 index 0000000..012f5ef --- /dev/null +++ b/alephino/config/main/dedup.json @@ -0,0 +1,35 @@ +[ + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "8200", + "expression": "facetCount(value, 'value', '8200') > 1", + "columnName": "8200", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "8200", + "expression": "null", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10, + "description": "Text transform on cells in column 8200 using expression null" + } +] diff --git a/alephino/config/main/ppn.json b/alephino/config/main/ppn.json new file mode 100644 index 0000000..4f4da49 --- /dev/null +++ b/alephino/config/main/ppn.json @@ -0,0 +1,292 @@ +[ + { + "op": "core/column-addition", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "baseColumnName": "2000", + "expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')", + "onError": "set-to-blank", + "newColumnName": "tmp", + "columnInsertIndex": 3 + }, + { + "op": "core/column-split", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "tmp", + "guessCellType": false, + "removeOriginalColumn": true, + "mode": "separator", + "separator": "␟", + "regex": false, + "maxColumns": 0 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0100", + "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0100", + "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0100", + "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0100", + "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0110", + "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0110", + "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0110", + "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "0110", + "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10 + }, + { + "op": "core/column-removal", + "columnName": "tmp 1" + }, + { + "op": "core/column-removal", + "columnName": "tmp 2" + } +] diff --git a/alephino/config/main/template.txt b/alephino/config/main/template.txt new file mode 100644 index 0000000..2408315 --- /dev/null +++ b/alephino/config/main/template.txt @@ -0,0 +1,51 @@ +{{ +if(row.index - row.record.fromRowIndex == 0, +'' + '\n' ++ forNonBlank(cells['0500'].value, v, '002@ ' + '0' + v + '\n', '') ++ forNonBlank(cells['0501a'].value, v, '002C ' + 'a' + v + forNonBlank(cells['0501b'].value, v, 'b' + v, '') + '\n', '') ++ forNonBlank(cells['0502a'].value, v, '002D ' + 'a' + v + forNonBlank(cells['0502b'].value, v, 'b' + v, '') + '\n', '') ++ forNonBlank(cells['0503a'].value, v, '002E ' + 'a' + v + forNonBlank(cells['0503b'].value, v, 'b' + v, '') + '\n', '') ++ forNonBlank(cells['0100'].value, v, '003@ ' + '0' + v + '\n', '') ++ forNonBlank(cells['0110'].value, v, '003S ' + '0' + v + '\n', '') ++ forNonBlank(cells['2000'].value, v, forEach(v.split('␟'),x,'004A ' + '0' + x + '\n').join(''), '') ++ forNonBlank(cells['2009'].value, v, forEach(v.split('␟'),x,'004D ' + '0' + x + '\n').join(''), '') ++ forNonBlank(cells['2020'].value, v, '004F ' + '0' + v + '\n', '') ++ forNonBlank(cells['2201'].value, v, '004L ' + '0' + v + '\n', '') ++ forNonBlank(cells['2199'].value, v, forEach(v.split('␟'),x,'006Y ' + '0' + x + '\n').join(''), '') ++ forNonBlank(cells['2240'].value, v, '007G ' + 'iZDB' + '0' + v + '\n', '') ++ forNonBlank(cells['1500'].value, v, '010@ ' + forEach(v.split('␟'),x,'a' + x).join('') + '\n', '') ++ forNonBlank(cells['1100a'].value, v, '011@ ' + 'a' + v + forNonBlank(cells['1100b'].value, v, 'b' + v, '') + forNonBlank(cells['1100n'].value, v, 'n' + v, '') + '\n', '') ++ forNonBlank(cells['1131'].value, v, '013D ' + 'a' + v + '\n', '') ++ forNonBlank(cells['1140'].value, v, '013H ' + 'a' + v + '\n', '') ++ forNonBlank(cells['4000a'].value, v, '021A ' + 'a' + v + forNonBlank(cells['4000d'].value, v, 'd' + v, '') + forNonBlank(cells['4000h'].value, v, 'h' + v, '') + '\n', '') ++ forNonBlank(cells['4002'].value, v, '021G ' + 'a' + v + '\n', '') ++ forNonBlank(cells['3210'].value, v, '022A/00 ' + 'a' + v + '\n', '') ++ forNonBlank(cells['3000'].value, v, '028A ' + v + '\n', '') ++ forNonBlank(cells['3010'].value, v, forEach(v.split('␟'),x,'028C ' + x + '\n').join(''), '') ++ forNonBlank(cells['3110'].value, v, forEach(v.split('␟'),x,'029F ' + x + '\n').join(''), '') ++ forNonBlank(cells['4020a'].value, v, '032@ ' + 'a' + v + '\n', '') ++ if(or(isNonBlank(cells['4030n'].value),isNonBlank(cells['4030p'].value)),'033A ' + forNonBlank(cells['4030p'].value, v, 'p' + v, '') + forNonBlank(cells['4030n'].value, v, 'n' + v,'') + '\n', '') ++ forNonBlank(cells['4060'].value, v, '034D ' + 'a' + v + '\n', '') ++ forNonBlank(cells['4062'].value, v, '034I ' + 'a' + v + '\n', '') ++ forNonBlank(cells['4061'].value, v, '034M ' + 'a' + v + '\n', '') ++ forNonBlank(cells['4150'].value, v, '036C/00 ' + v + '\n', '') ++ forNonBlank(cells['4160'].value, v, '036D ' + v + '\n', '') ++ forNonBlank(cells['4170_1'].value, v, '036E/00 ' + v + '\n', '') ++ forNonBlank(cells['4170_2'].value, v, '036E/01 ' + v + '\n', '') ++ forNonBlank(cells['4204'].value, v, '037C ' + 'a' + v + '\n', '') ++ forNonBlank(cells['0999'].value, v, '046W ' + 'a' + v + '\n', '') +,'') +}}{{ +if(isNonBlank(cells['E0XXb'].value), +with(with(rowIndex - row.record.fromRowIndex + 1, i, '00'[0,2-i.length()] + i),exnr, +'208@/' + exnr + ' a' + cells['E0XX'].value + 'b' + cells['E0XXb'].value + '\n' ++ '209A/' + exnr + ' b4736' + 'j' + cells['7100j'].value + 'f' + cells['7100f'].value + forNonBlank(cells['7100a'].value, v, 'a' + v, '') + forNonBlank(cells['7100d'].value, v, 'd' + v, '') + 'x00' + '\n' ++ forNonBlank(cells['8011'].value, v, '209B/' + exnr + ' a' + v + 'x11' + '\n', '') ++ forNonBlank(cells['8100'].value, v, '209C/' + exnr + ' a' + v + 'x00' + '\n', '') ++ forNonBlank(cells['8200'].value, v, '209G/' + exnr + ' a' + v + '\n', '') ++ forNonBlank(cells['8600'].value, v, '209O/' + exnr + ' a' + v + 'x00' + '\n', '') ++ forNonBlank(cells['8515'].value, v, '220B/' + exnr + ' a' + v + '\n', '') ++ forNonBlank(cells['6800'].value, v, forEachIndex(v.split('␟'), i, x, '244Z/' + exnr + ' a' + x.trim() + 'x' + '00'[0,2-i.split('␟').length().length()] + i + '\n').join(''), '') ++ forNonBlank(cells['67XX'].value, v, forEachIndex(v.split('||'), i, x, '245Z/' + exnr + ' a' + x.trim() + 'x' + '00'[0,2-i.split('␟').length().length()] + i + '\n').join(''), '') +), '') +}}