From 29544f7eaa4543d9a987fea2c769548c4743157d Mon Sep 17 00:00:00 2001 From: Sebastian Meyer Date: Thu, 4 Jan 2024 08:56:11 +0100 Subject: [PATCH] Add command to prune deleted records --- bin/cli | 2 + config/config.dist.yml | 23 +++++ src/Configuration.php | 8 +- src/Console/PruneRecordsCommand.php | 92 ++++++++++++++++++++ src/Console/PruneResumptionTokensCommand.php | 2 +- src/Console/UpdateFormatsCommand.php | 4 +- src/Database.php | 58 +++++++++--- src/Database/Record.php | 36 ++++---- src/Database/Set.php | 63 ++++++++++++-- src/Middleware/GetRecord.php | 4 +- src/Middleware/ListIdentifiers.php | 4 +- 11 files changed, 257 insertions(+), 39 deletions(-) create mode 100644 src/Console/PruneRecordsCommand.php diff --git a/bin/cli b/bin/cli index 8571e92..badc792 100644 --- a/bin/cli +++ b/bin/cli @@ -29,6 +29,7 @@ use Exception; use OCC\OaiPmh2\Console\AddRecordCommand; use OCC\OaiPmh2\Console\BulkUpdateCommand; use OCC\OaiPmh2\Console\DeleteRecordCommand; +use OCC\OaiPmh2\Console\PruneRecordsCommand; use OCC\OaiPmh2\Console\PruneResumptionTokensCommand; use OCC\OaiPmh2\Console\UpdateFormatsCommand; @@ -38,6 +39,7 @@ $commands = [ new AddRecordCommand(), new BulkUpdateCommand(), new DeleteRecordCommand(), + new PruneRecordsCommand(), new PruneResumptionTokensCommand(), new UpdateFormatsCommand() ]; diff --git a/config/config.dist.yml b/config/config.dist.yml index 99fb8e8..579699b 100644 --- a/config/config.dist.yml +++ b/config/config.dist.yml @@ -72,6 +72,29 @@ metadataPrefix: { } } +# +# Deleted records policy +# +# This states if and how the repository keeps track of deleted records. You can +# delete records by importing empty records with the same identifiers and +# metadata prefixes. Depending on the deleted records policy those records will +# be either marked as deleted or completely removed from the database. +# "no" means the repository does not provide any information about deletions. +# "persistent" means the repository consistently provides information about +# deletions. +# "transient" - The repository may provide information about deletions. This is +# handled exactly the same as "persistent", but you are allowed to manually +# prune deleted records from the database (see below). +# +# ["no"|"persistent"|"transient"] +# +# Run "bin/cli oai:records:prune" after changing the deleted records policy to +# "no" to remove all deleted records from the database. +# If your policy is "transient" and you want to clean up deleted records from +# the database anyway, run the command with the "--force" flag. +# +deletedRecords: 'transient' + # # Maximum number of records to return per request # diff --git a/src/Configuration.php b/src/Configuration.php index e8ec706..59c7854 100644 --- a/src/Configuration.php +++ b/src/Configuration.php @@ -40,11 +40,12 @@ use Symfony\Component\Yaml\Yaml; * @property-read string $adminEmail * @property-read string $database * @property-read array $metadataPrefix + * @property-read string $deletedRecords * @property-read int $maxRecords * @property-read int $tokenValid * * @template TKey of string - * @template TValue + * @template TValue of array|int|string */ class Configuration { @@ -102,6 +103,11 @@ class Configuration ]) ]) ], + 'deletedRecords' => [ + new Assert\Type('string'), + new Assert\Choice(['no', 'persistent', 'transient']), + new Assert\NotBlank() + ], 'maxRecords' => [ new Assert\Type('int'), new Assert\Range([ diff --git a/src/Console/PruneRecordsCommand.php b/src/Console/PruneRecordsCommand.php new file mode 100644 index 0000000..826b598 --- /dev/null +++ b/src/Console/PruneRecordsCommand.php @@ -0,0 +1,92 @@ + + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +declare(strict_types=1); + +namespace OCC\OaiPmh2\Console; + +use OCC\OaiPmh2\Configuration; +use OCC\OaiPmh2\Database; +use Symfony\Component\Console\Attribute\AsCommand; +use Symfony\Component\Console\Command\Command; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; + +/** + * Prune deleted records from database. + * + * @author Sebastian Meyer + * @package opencultureconsulting/oai-pmh2 + */ +#[AsCommand( + name: 'oai:records:prune', + description: 'Prune deleted records from database' +)] +class PruneRecordsCommand extends Command +{ + protected function configure(): void + { + $this->addOption( + 'force', + null, + InputOption::VALUE_NONE, + 'Deletes records even under "transient" policy.' + ); + parent::configure(); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $policy = Configuration::getInstance()->deletedRecords; + $forced = (bool) $input->getOption('force'); + if ( + $policy === 'no' + or ($policy === 'transient' && $forced) + ) { + $deleted = Database::getInstance()->pruneDeletedRecords(); + $output->writeln([ + '', + sprintf( + ' [OK] %d records are deleted and were successfully removed! ', + $deleted + ), + '' + ]); + return Command::SUCCESS; + } else { + if ($policy === 'persistent') { + $output->writeln([ + '', + ' [ERROR] Under "persistent" policy removal of deleted records is not allowed. ', + '' + ]); + return Command::FAILURE; + } else { + $output->writeln([ + '', + ' [INFO] Use the "--force" option to remove deleted records under "transient" policy. ', + '' + ]); + return Command::INVALID; + } + } + } +} diff --git a/src/Console/PruneResumptionTokensCommand.php b/src/Console/PruneResumptionTokensCommand.php index ea86359..24a9c4e 100644 --- a/src/Console/PruneResumptionTokensCommand.php +++ b/src/Console/PruneResumptionTokensCommand.php @@ -46,7 +46,7 @@ class PruneResumptionTokensCommand extends Command $output->writeln([ '', sprintf( - ' [OK] %d resumption tokens are expired and were successfully deleted. ', + ' [OK] %d resumption tokens are expired and were successfully deleted! ', $expired ), '' diff --git a/src/Console/UpdateFormatsCommand.php b/src/Console/UpdateFormatsCommand.php index 50e5332..a484102 100644 --- a/src/Console/UpdateFormatsCommand.php +++ b/src/Console/UpdateFormatsCommand.php @@ -66,7 +66,7 @@ class UpdateFormatsCommand extends Command ++$added; $output->writeln([ sprintf( - ' [OK] Metadata format "%s" added or updated successfully. ', + ' [OK] Metadata format "%s" added or updated successfully! ', $prefix ) ]); @@ -87,7 +87,7 @@ class UpdateFormatsCommand extends Command ++$deleted; $output->writeln([ sprintf( - ' [OK] Metadata format "%s" and all associated records deleted successfully. ', + ' [OK] Metadata format "%s" and all associated records deleted successfully! ', $prefix ) ]); diff --git a/src/Database.php b/src/Database.php index c1cf636..fba13b2 100644 --- a/src/Database.php +++ b/src/Database.php @@ -23,6 +23,7 @@ declare(strict_types=1); namespace OCC\OaiPmh2; use DateTime; +use Doctrine\Common\Collections\Criteria; use Doctrine\DBAL\DriverManager; use Doctrine\DBAL\Schema\AbstractAsset; use Doctrine\DBAL\Tools\DsnParser; @@ -149,11 +150,11 @@ class Database if (isset($identifier)) { $dql->innerJoin( 'format.records', - 'records', + 'record', 'WITH', $dql->expr()->andX( - $dql->expr()->eq('records.identifier', ':identifier'), - $dql->expr()->neq('records.data', '') + $dql->expr()->eq('record.identifier', ':identifier'), + $dql->expr()->isNotNull('record.content') ) ) ->setParameter('identifier', $identifier); @@ -328,6 +329,41 @@ class Database return (bool) $query->getOneOrNullResult(AbstractQuery::HYDRATE_SINGLE_SCALAR); } + /** + * Prune deleted records. + * + * @return int The number of removed records + */ + public function pruneDeletedRecords(): int + { + $repository = $this->entityManager->getRepository(Record::class); + $criteria = Criteria::create()->where(Criteria::expr()->isNull('content')); + $records = $repository->matching($criteria); + foreach ($records as $record) { + $this->entityManager->remove($record); + } + $this->entityManager->flush(); + $this->pruneOrphanSets(); + return count($records); + } + + /** + * Prune orphan sets. + * + * @return void + */ + public function pruneOrphanSets(): void + { + $repository = $this->entityManager->getRepository(Set::class); + $sets = $repository->findAll(); + foreach ($sets as $set) { + if ($set->isEmpty()) { + $this->entityManager->remove($set); + } + } + $this->entityManager->flush(); + } + /** * Prune expired resumption tokens. * @@ -335,13 +371,14 @@ class Database */ public function pruneResumptionTokens(): int { - $dql = $this->entityManager->createQueryBuilder(); - $dql->delete(Token::class, 'token') - ->where($dql->expr()->lt('token.validUntil', ':now')) - ->setParameter('now', new DateTime()); - $query = $dql->getQuery(); - /** @var int */ - return $query->execute(); + $repository = $this->entityManager->getRepository(Token::class); + $criteria = Criteria::create()->where(Criteria::expr()->lt('validUntil', new DateTime())); + $tokens = $repository->matching($criteria); + foreach ($tokens as $token) { + $this->entityManager->remove($token); + } + $this->entityManager->flush(); + return count($tokens); } /** @@ -357,6 +394,7 @@ class Database if (isset($format)) { $this->entityManager->remove($format); $this->entityManager->flush(); + $this->pruneOrphanSets(); return true; } else { return false; diff --git a/src/Database/Record.php b/src/Database/Record.php index 28f62e6..7121d3e 100644 --- a/src/Database/Record.php +++ b/src/Database/Record.php @@ -64,8 +64,8 @@ class Record /** * The record's content. */ - #[ORM\Column(type: 'text')] - private string $content = ''; + #[ORM\Column(type: 'text', nullable: true)] + private ?string $content = null; /** * Collection of associated sets. @@ -97,9 +97,9 @@ class Record /** * Get the record's content. * - * @return string The record's content + * @return ?string The record's content or NULL if deleted */ - public function getContent(): string + public function getContent(): ?string { return $this->content; } @@ -174,21 +174,23 @@ class Record /** * Set record's content. * - * @param string $data The record's content + * @param ?string $data The record's content or NULL to mark as deleted * @param bool $validate Should the input be validated? * * @return void * * @throws ValidationFailedException */ - public function setContent(string $data, bool $validate = true): void + public function setContent(?string $data = null, bool $validate = true): void { - $data = trim($data); - if ($validate && $data !== '') { - try { - $data = $this->validate($data); - } catch (ValidationFailedException $exception) { - throw $exception; + if (isset($data)) { + $data = trim($data); + if ($validate && $data !== '') { + try { + $data = $this->validate($data); + } catch (ValidationFailedException $exception) { + throw $exception; + } } } $this->content = $data; @@ -246,16 +248,18 @@ class Record * * @param string $identifier The record identifier * @param Format $format The format - * @param string $data The record's content + * @param ?string $data The record's content * * @throws ValidationFailedException */ - public function __construct(string $identifier, Format $format, string $data = '') + public function __construct(string $identifier, Format $format, ?string $data = null) { try { $this->identifier = $identifier; $this->setFormat($format); - $this->setContent($data); + if (isset($data)) { + $this->setContent($data); + } $this->setLastChanged(); $this->sets = new ArrayCollection(); } catch (ValidationFailedException $exception) { @@ -270,6 +274,6 @@ class Record */ public function __toString(): string { - return $this->content; + return $this->content ?? ''; } } diff --git a/src/Database/Set.php b/src/Database/Set.php index 666c046..64a88fa 100644 --- a/src/Database/Set.php +++ b/src/Database/Set.php @@ -25,6 +25,9 @@ namespace OCC\OaiPmh2\Database; use Doctrine\Common\Collections\ArrayCollection; use Doctrine\Common\Collections\Collection; use Doctrine\ORM\Mapping as ORM; +use Symfony\Component\Validator\Constraints as Assert; +use Symfony\Component\Validator\Exception\ValidationFailedException; +use Symfony\Component\Validator\Validation; /** * Doctrine/ORM Entity for sets. @@ -74,6 +77,7 @@ class Set { if (!$this->records->contains($record)) { $this->records->add($record); + $record->addSet($this); } } @@ -117,6 +121,16 @@ class Set return $this->records->toArray(); } + /** + * Whether this set contains any records. + * + * @return bool TRUE if empty or FALSE otherwise + */ + public function isEmpty(): bool + { + return count($this->records) === 0; + } + /** * Update bi-directional association with records. * @@ -126,7 +140,10 @@ class Set */ public function removeRecord(Record $record): void { - $this->records->removeElement($record); + if ($this->records->contains($record)) { + $this->records->removeElement($record); + $record->removeSet($this); + } } /** @@ -141,18 +158,54 @@ class Set $this->description = trim($description); } + /** + * Validate set spec. + * + * @param string $spec The set spec + * + * @return string The validated spec + * + * @throws ValidationFailedException + */ + protected function validate(string $spec): string + { + $spec = trim($spec); + $validator = Validation::createValidator(); + $violations = $validator->validate( + $spec, + [ + new Assert\Regex([ + 'pattern' => '/\s/', + 'match' => false, + 'message' => 'This value contains whitespaces.' + ]), + new Assert\NotBlank() + ] + ); + if ($violations->count() > 0) { + throw new ValidationFailedException(null, $violations); + } + return $spec; + } + /** * Get new entity of set. * * @param string $spec The set spec * @param string $name The name of the set * @param string $description The description of the set + * + * @throws ValidationFailedException */ public function __construct(string $spec, string $name, string $description = '') { - $this->spec = $spec; - $this->name = $name; - $this->setDescription($description); - $this->records = new ArrayCollection(); + try { + $this->spec = $this->validate($spec); + $this->name = trim($name); + $this->setDescription($description); + $this->records = new ArrayCollection(); + } catch (ValidationFailedException $exception) { + throw $exception; + } } } diff --git a/src/Middleware/GetRecord.php b/src/Middleware/GetRecord.php index 3d08597..607670b 100644 --- a/src/Middleware/GetRecord.php +++ b/src/Middleware/GetRecord.php @@ -64,7 +64,7 @@ class GetRecord extends Middleware $getRecord->appendChild($record); $header = $document->createElement('header'); - if ($oaiRecord->getContent() === '') { + if ($oaiRecord->getContent() === null) { $header->setAttribute('status', 'deleted'); } $record->appendChild($header); @@ -80,7 +80,7 @@ class GetRecord extends Middleware $header->appendChild($setSpec); } - if ($oaiRecord->getContent() !== '') { + if ($oaiRecord->getContent() !== null) { $metadata = $document->createElement('metadata'); $record->appendChild($metadata); diff --git a/src/Middleware/ListIdentifiers.php b/src/Middleware/ListIdentifiers.php index a3133d6..904f122 100644 --- a/src/Middleware/ListIdentifiers.php +++ b/src/Middleware/ListIdentifiers.php @@ -106,7 +106,7 @@ class ListIdentifiers extends Middleware } $header = $document->createElement('header'); - if ($oaiRecord->getContent() === '') { + if ($oaiRecord->getContent() === null) { $header->setAttribute('status', 'deleted'); } $baseNode->appendChild($header); @@ -122,7 +122,7 @@ class ListIdentifiers extends Middleware $header->appendChild($setSpec); } - if ($verb === 'ListRecords' && $oaiRecord->getContent() !== '') { + if ($verb === 'ListRecords' && $oaiRecord->getContent() !== null) { $metadata = $document->createElement('metadata'); $baseNode->appendChild($metadata);