diff --git a/alephino/Taskfile.yml b/alephino/Taskfile.yml index 2d2630a..6164fe2 100644 --- a/alephino/Taskfile.yml +++ b/alephino/Taskfile.yml @@ -79,10 +79,10 @@ tasks: - task: :check # check OpenRefine log for any warnings and exit on error vars: {DIR: '{{.DIR}}'} sources: - - input/{{.PROJECT}}.imp + - input/{{.PROJECT}}*.txt - config/pre/** generates: - - output/{{.PROJECT}}.tsv + - output/{{.PROJECT}}*.tsv ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 refine-main: @@ -113,15 +113,20 @@ tasks: "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/sortieren.json > {{.LOG}} - > # Bibliothekskürzel aus Import-Dateiname "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/file.json > {{.LOG}} - - > # spec_A_E_01: Signatur 7100a - "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100a.json > {{.LOG}} -# - > # TODO: Spalte 2199 muss vorne stehen, weil für Sortierung benötigt - - > # spec_Z_04: PPN anreichern über ISBN - "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/ppn.json > {{.LOG}} - - > # spec_Z_05: Exemplare clustern - "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/clustern.json > {{.LOG}} + - | # Exemplardaten + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/E00X.json > {{.LOG}} + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/7100.json > {{.LOG}} + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/8200.json > {{.LOG}} + - | # Titeldaten + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/2199.json > {{.LOG}} + - > # Titel ohne Exemplare löschen + "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/abschluss.json > {{.LOG}} +# - > # spec_Z_04: PPN anreichern über ISBN +# "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/ppn.json > {{.LOG}} +# - > # spec_Z_05: Exemplare clustern +# "$CLIENT" -P {{.PORT}} {{.PROJECT}} --apply config/main/clustern.json > {{.LOG}} - mkdir -p output - - > # Export dubletter Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427 + - > # Export der Barcodes; golang requires strange escaping https://stackoverflow.com/questions/17641887/how-do-i-escape-and-delimiters-in-go-templates/17642427#17642427 "$CLIENT" -P {{.PORT}} {{.PROJECT}} --output "$(readlink -m output/barcodes.txt)" --template "{{"{{"}}forNonBlank(cells['8200'].value, v, v + '\n', ''){{"}}"}}" @@ -148,7 +153,8 @@ tasks: - config/main/** generates: - log/{{.PROJECT}}.openrefine.tar.gz -# - output/{{.PROJECT}}.csv + - output/alephino.txt + - output/barcodes.txt ignore_error: true # workaround to avoid an orphaned Java process on error https://github.com/go-task/task/issues/141 default: # enable standalone execution (running `task` in project directory) diff --git a/alephino/config/main/2199.json b/alephino/config/main/2199.json new file mode 100644 index 0000000..c07c1cd --- /dev/null +++ b/alephino/config/main/2199.json @@ -0,0 +1,15 @@ +[ + { + "op": "core/column-addition", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "baseColumnName": "M|IDN", + "expression": "grel:'BA' + cells['File'].value + value", + "onError": "set-to-blank", + "newColumnName": "2199", + "columnInsertIndex": 1, + "description": "Create column 2199 at index 1 based on column M|IDN using expression grel:'BA' + cells['File'].value + value" + } +] diff --git a/alephino/config/main/7100.json b/alephino/config/main/7100.json new file mode 100644 index 0000000..f59e484 --- /dev/null +++ b/alephino/config/main/7100.json @@ -0,0 +1,152 @@ +[ + { + "op": "core/column-addition", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "baseColumnName": "E|100", + "expression": "grel:value.split('\u001f')[0].slice(1).trim()", + "onError": "set-to-blank", + "newColumnName": "7100a", + "columnInsertIndex": 3, + "description": "Create column 7100a at index 3 based on column E|100 using expression grel:value.split('\u001f')[0].slice(1).trim()" + }, + { + "op": "core/column-addition", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "E|001", + "expression": "isBlank(value)", + "columnName": "E|001", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "baseColumnName": "File", + "expression": "grel:if(value == 'LE', '0005', '0006')", + "onError": "set-to-blank", + "newColumnName": "7100j", + "columnInsertIndex": 3, + "description": "Create column 7100j at index 3 based on column File using expression grel:if(value == 'LE', '0005', '0006')" + }, + { + "op": "core/column-addition", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "E|001", + "expression": "isBlank(value)", + "columnName": "E|001", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + }, + { + "type": "list", + "name": "File", + "expression": "value", + "columnName": "File", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": "LE", + "l": "LE" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "baseColumnName": "File", + "expression": "grel:value", + "onError": "set-to-blank", + "newColumnName": "7100f", + "columnInsertIndex": 3, + "description": "Create column 7100f at index 3 based on column File using expression grel:value" + }, + { + "op": "core/text-transform", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "E|001", + "expression": "isBlank(value)", + "columnName": "E|001", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + }, + { + "type": "list", + "name": "File", + "expression": "value", + "columnName": "File", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": "RS", + "l": "RS" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "columnName": "7100f", + "expression": "grel:if(cells['E|107'].value.contains('Beuth'),'RS-BD', if(cells['E|105'].value == '00002','RS-MAG', if(cells['E|105'].value == '00003','RS-TH', if(cells['E|105'].value == '00004','RS-ZS','RS'))))", + "onError": "keep-original", + "repeat": false, + "repeatCount": 10, + "description": "Text transform on cells in column 7100f using expression grel:if(cells['E|107'].value.contains('Beuth'),'RS-BD', if(cells['E|105'].value == '00002','RS-MAG', if(cells['E|105'].value == '00003','RS-TH', if(cells['E|105'].value == '00004','RS-ZS','RS'))))" + } +] diff --git a/alephino/config/main/7100a.json b/alephino/config/main/7100a.json deleted file mode 100644 index 439d358..0000000 --- a/alephino/config/main/7100a.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "op": "core/column-addition", - "engineConfig": { - "facets": [], - "mode": "row-based" - }, - "baseColumnName": "E|100", - "expression": "grel:value.split('\u001f')[0].slice(1)", - "onError": "set-to-blank", - "newColumnName": "7100a", - "columnInsertIndex": 5 - } -] diff --git a/alephino/config/main/8200.json b/alephino/config/main/8200.json new file mode 100644 index 0000000..353ec3a --- /dev/null +++ b/alephino/config/main/8200.json @@ -0,0 +1,15 @@ +[ + { + "op": "core/column-addition", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "baseColumnName": "E|120", + "expression": "grel:value", + "onError": "set-to-blank", + "newColumnName": "8200", + "columnInsertIndex": 3, + "description": "Create column 8200 at index 40 based on column E|120 using expression grel:value" + } +] diff --git a/alephino/config/main/E00X.json b/alephino/config/main/E00X.json new file mode 100644 index 0000000..073e73e --- /dev/null +++ b/alephino/config/main/E00X.json @@ -0,0 +1,68 @@ +[ + { + "op": "core/column-addition", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "E|001", + "expression": "isBlank(value)", + "columnName": "E|001", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "baseColumnName": "File", + "expression": "grel:'n' + value.toLowercase()", + "onError": "set-to-blank", + "newColumnName": "E0XXb", + "columnInsertIndex": 3, + "description": "Create column E0XXb at index 3 based on column File using expression grel:'n' + value.toLowercase()" + }, + { + "op": "core/column-addition", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "E|001", + "expression": "isBlank(value)", + "columnName": "E|001", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "baseColumnName": "E|002a", + "expression": "grel:value[6,8] + '-' + value[4,6] + '-' + value[2,4]", + "onError": "set-to-blank", + "newColumnName": "E0XX", + "columnInsertIndex": 3, + "description": "Create column E0XX at index 41 based on column E|002a using expression grel:value[6,8] + '-' + value[4,6] + '-' + value[2,4]" + } +] diff --git a/alephino/config/main/abschluss.json b/alephino/config/main/abschluss.json new file mode 100644 index 0000000..028ca9f --- /dev/null +++ b/alephino/config/main/abschluss.json @@ -0,0 +1,49 @@ +[ + { + "op": "core/row-removal", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "2199", + "expression": "isBlank(value)", + "columnName": "2199", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + }, + { + "type": "list", + "name": "E0XX", + "expression": "isBlank(value)", + "columnName": "E0XX", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": true, + "l": "true" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "row-based" + }, + "description": "Remove rows" + } +] diff --git a/alephino/config/main/clustern.json b/alephino/config/main/clustern.json deleted file mode 100644 index 17b9284..0000000 --- a/alephino/config/main/clustern.json +++ /dev/null @@ -1,138 +0,0 @@ -[ - { - "op": "core/column-addition", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "isBlank(value)", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": false, - "l": "false" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "baseColumnName": "2199", - "expression": "grel:forNonBlank(cells['0100'].value,v,v,cells['0110'].value)", - "onError": "set-to-blank", - "newColumnName": "ppn", - "columnInsertIndex": 1 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "ppn", - "expression": "isBlank(value)", - "columnName": "ppn", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "ppn", - "expression": "grel:row.record.cells[columnName].value[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/row-reorder", - "mode": "record-based", - "sorting": { - "criteria": [ - { - "valueType": "string", - "column": "ppn", - "blankPosition": 2, - "errorPosition": 1, - "reverse": false, - "caseSensitive": false - } - ] - } - }, - { - "op": "core/column-addition", - "engineConfig": { - "facets": [], - "mode": "row-based" - }, - "baseColumnName": "ppn", - "expression": "grel:forNonBlank(cells['ppn'].value,v,v,forNonBlank(cells['2199'].value,v,v,''))", - "onError": "set-to-blank", - "newColumnName": "id", - "columnInsertIndex": 0 - }, - { - "op": "core/blank-down", - "engineConfig": { - "facets": [], - "mode": "row-based" - }, - "columnName": "id" - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "id", - "expression": "isBlank(value)", - "columnName": "id", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "record-based" - }, - "columnName": "2199", - "expression": "grel:if(rowIndex - row.record.fromRowIndex == 0,row.record.cells[columnName].value.join('␟'),null)", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/column-removal", - "columnName": "ppn" - } -] diff --git a/alephino/config/main/ppn.json b/alephino/config/main/ppn.json deleted file mode 100644 index 4f4da49..0000000 --- a/alephino/config/main/ppn.json +++ /dev/null @@ -1,292 +0,0 @@ -[ - { - "op": "core/column-addition", - "engineConfig": { - "facets": [], - "mode": "row-based" - }, - "baseColumnName": "2000", - "expression": "grel:with(value.replace('-',''),x,forEach(x.split('␟'),v,if(v.length()==10,with('978'+v[0,9],z,z+((10-(sum(forRange(0,12,1,i,toNumber(z[i])*(1+(i%2*2)) )) %10)) %10).toString()[0] ),v))).uniques().join('␟')", - "onError": "set-to-blank", - "newColumnName": "tmp", - "columnInsertIndex": 3 - }, - { - "op": "core/column-split", - "engineConfig": { - "facets": [], - "mode": "row-based" - }, - "columnName": "tmp", - "guessCellType": false, - "removeOriginalColumn": true, - "mode": "separator", - "separator": "␟", - "regex": false, - "maxColumns": 0 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0100", - "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0100", - "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0100", - "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0100", - "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0100'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0110", - "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0110", - "expression": "grel:forEach(cells['tmp 1'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0110", - "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 1'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "engineConfig": { - "facets": [ - { - "type": "list", - "name": "2199", - "expression": "grel:and(isNonBlank(cells['2199'].value),isBlank(cells['0100'].value),isBlank(cells['0110'].value))", - "columnName": "2199", - "invert": false, - "omitBlank": false, - "omitError": false, - "selection": [ - { - "v": { - "v": true, - "l": "true" - } - } - ], - "selectBlank": false, - "selectError": false - } - ], - "mode": "row-based" - }, - "columnName": "0110", - "expression": "grel:forEach(cells['tmp 2'].value.cross('pica+','tmp 2'),r,forNonBlank(r.cells['0110'].value,v,v,null)).join('␟').split('␟')[0]", - "onError": "keep-original", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/column-removal", - "columnName": "tmp 1" - }, - { - "op": "core/column-removal", - "columnName": "tmp 2" - } -] diff --git a/alephino/config/main/sortieren.json b/alephino/config/main/sortieren.json index 5fefd5d..2873bb7 100644 --- a/alephino/config/main/sortieren.json +++ b/alephino/config/main/sortieren.json @@ -9,16 +9,6 @@ "columnName": "E|001", "index": 0 }, - { - "op": "core/column-move", - "columnName": "M|029", - "index": 0 - }, - { - "op": "core/column-move", - "columnName": "M|026f", - "index": 0 - }, { "op": "core/column-move", "columnName": "M|IDN",