diff --git a/muenster/Taskfile.yml b/muenster/Taskfile.yml index b72fa5d..261631c 100644 --- a/muenster/Taskfile.yml +++ b/muenster/Taskfile.yml @@ -4,7 +4,7 @@ tasks: main: desc: miami ULB Münster vars: - MINIMUM: 7300 # Mindestanzahl der zu erwartenden Datensätze + MINIMUM: 6600 # Mindestanzahl der zu erwartenden Datensätze PROJECT: '{{splitList ":" .TASK | first}}' # results in the task namespace, which is identical to the directory name cmds: - task: harvest @@ -65,6 +65,18 @@ tasks: "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/nur-mit-pdf.json > {{.LOG}} + - > # Separaten Download-Link entfernen, wenn nur eine Datei vorhanden ist + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" + --apply config/flocat.json + > {{.LOG}} + - > # Vorläufig Datensätze löschen, die mehr als einen Direktlink beinhalten https://github.com/opencultureconsulting/noah/issues/25 + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" + --apply config/nur-ein-direktlink.json + > {{.LOG}} + - > # Vorläufig Zeitschriftenhefte löschen https://github.com/opencultureconsulting/noah/issues/31 + "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" + --apply config/keine-zeitschriftenhefte.json + > {{.LOG}} - > # Datensätze mit "restriction on access" löschen "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/restriction.json @@ -85,10 +97,6 @@ tasks: "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/abstract.json > {{.LOG}} - - > # Separaten Download-Link entfernen, wenn nur eine Datei vorhanden ist - "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" - --apply config/flocat.json - > {{.LOG}} - > # mets:file - ID eindeutig machen, um Validierungsfehler zu vermeiden "$CLIENT" -P {{.PORT}} "{{.PROJECT}}" --apply config/file-id.json diff --git a/muenster/config/keine-zeitschriftenhefte.json b/muenster/config/keine-zeitschriftenhefte.json new file mode 100644 index 0000000..bdca456 --- /dev/null +++ b/muenster/config/keine-zeitschriftenhefte.json @@ -0,0 +1,30 @@ +[ + { + "op": "core/row-removal", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "mets:mets - mets:dmdSec - mets:mdWrap - mets:xmlData - mods:mods - mods:genre", + "expression": "value", + "columnName": "mets:mets - mets:dmdSec - mets:mdWrap - mets:xmlData - mods:mods - mods:genre", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": "PeriodicalPart", + "l": "PeriodicalPart" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "record-based" + }, + "description": "Remove rows" + } +] diff --git a/muenster/config/nur-ein-direktlink.json b/muenster/config/nur-ein-direktlink.json new file mode 100644 index 0000000..609493e --- /dev/null +++ b/muenster/config/nur-ein-direktlink.json @@ -0,0 +1,30 @@ +[ + { + "op": "core/row-removal", + "engineConfig": { + "facets": [ + { + "type": "list", + "name": "mets:mets - mets:fileSec - mets:fileGrp - mets:file - mets:FLocat - xlink:href", + "expression": "grel:with(row.record.cells[columnName].value, x, and(x.length() == 1, x[0].toLowercase().contains('.pdf')))", + "columnName": "mets:mets - mets:fileSec - mets:fileGrp - mets:file - mets:FLocat - xlink:href", + "invert": false, + "omitBlank": false, + "omitError": false, + "selection": [ + { + "v": { + "v": false, + "l": "false" + } + } + ], + "selectBlank": false, + "selectError": false + } + ], + "mode": "record-based" + }, + "description": "Remove rows" + } +]