Add command to prune deleted records

This commit is contained in:
Sebastian Meyer 2024-01-04 08:56:11 +01:00
parent b7ddb19cfd
commit 29544f7eaa
11 changed files with 257 additions and 39 deletions

View File

@ -29,6 +29,7 @@ use Exception;
use OCC\OaiPmh2\Console\AddRecordCommand; use OCC\OaiPmh2\Console\AddRecordCommand;
use OCC\OaiPmh2\Console\BulkUpdateCommand; use OCC\OaiPmh2\Console\BulkUpdateCommand;
use OCC\OaiPmh2\Console\DeleteRecordCommand; use OCC\OaiPmh2\Console\DeleteRecordCommand;
use OCC\OaiPmh2\Console\PruneRecordsCommand;
use OCC\OaiPmh2\Console\PruneResumptionTokensCommand; use OCC\OaiPmh2\Console\PruneResumptionTokensCommand;
use OCC\OaiPmh2\Console\UpdateFormatsCommand; use OCC\OaiPmh2\Console\UpdateFormatsCommand;
@ -38,6 +39,7 @@ $commands = [
new AddRecordCommand(), new AddRecordCommand(),
new BulkUpdateCommand(), new BulkUpdateCommand(),
new DeleteRecordCommand(), new DeleteRecordCommand(),
new PruneRecordsCommand(),
new PruneResumptionTokensCommand(), new PruneResumptionTokensCommand(),
new UpdateFormatsCommand() new UpdateFormatsCommand()
]; ];

View File

@ -72,6 +72,29 @@ metadataPrefix: {
} }
} }
#
# Deleted records policy
#
# This states if and how the repository keeps track of deleted records. You can
# delete records by importing empty records with the same identifiers and
# metadata prefixes. Depending on the deleted records policy those records will
# be either marked as deleted or completely removed from the database.
# "no" means the repository does not provide any information about deletions.
# "persistent" means the repository consistently provides information about
# deletions.
# "transient" - The repository may provide information about deletions. This is
# handled exactly the same as "persistent", but you are allowed to manually
# prune deleted records from the database (see below).
#
# ["no"|"persistent"|"transient"]
#
# Run "bin/cli oai:records:prune" after changing the deleted records policy to
# "no" to remove all deleted records from the database.
# If your policy is "transient" and you want to clean up deleted records from
# the database anyway, run the command with the "--force" flag.
#
deletedRecords: 'transient'
# #
# Maximum number of records to return per request # Maximum number of records to return per request
# #

View File

@ -40,11 +40,12 @@ use Symfony\Component\Yaml\Yaml;
* @property-read string $adminEmail * @property-read string $adminEmail
* @property-read string $database * @property-read string $database
* @property-read array $metadataPrefix * @property-read array $metadataPrefix
* @property-read string $deletedRecords
* @property-read int $maxRecords * @property-read int $maxRecords
* @property-read int $tokenValid * @property-read int $tokenValid
* *
* @template TKey of string * @template TKey of string
* @template TValue * @template TValue of array|int|string
*/ */
class Configuration class Configuration
{ {
@ -102,6 +103,11 @@ class Configuration
]) ])
]) ])
], ],
'deletedRecords' => [
new Assert\Type('string'),
new Assert\Choice(['no', 'persistent', 'transient']),
new Assert\NotBlank()
],
'maxRecords' => [ 'maxRecords' => [
new Assert\Type('int'), new Assert\Type('int'),
new Assert\Range([ new Assert\Range([

View File

@ -0,0 +1,92 @@
<?php
/**
* OAI-PMH 2.0 Data Provider
* Copyright (C) 2023 Sebastian Meyer <sebastian.meyer@opencultureconsulting.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace OCC\OaiPmh2\Console;
use OCC\OaiPmh2\Configuration;
use OCC\OaiPmh2\Database;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
/**
* Prune deleted records from database.
*
* @author Sebastian Meyer <sebastian.meyer@opencultureconsulting.com>
* @package opencultureconsulting/oai-pmh2
*/
#[AsCommand(
name: 'oai:records:prune',
description: 'Prune deleted records from database'
)]
class PruneRecordsCommand extends Command
{
protected function configure(): void
{
$this->addOption(
'force',
null,
InputOption::VALUE_NONE,
'Deletes records even under "transient" policy.'
);
parent::configure();
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$policy = Configuration::getInstance()->deletedRecords;
$forced = (bool) $input->getOption('force');
if (
$policy === 'no'
or ($policy === 'transient' && $forced)
) {
$deleted = Database::getInstance()->pruneDeletedRecords();
$output->writeln([
'',
sprintf(
' [OK] %d records are deleted and were successfully removed! ',
$deleted
),
''
]);
return Command::SUCCESS;
} else {
if ($policy === 'persistent') {
$output->writeln([
'',
' [ERROR] Under "persistent" policy removal of deleted records is not allowed. ',
''
]);
return Command::FAILURE;
} else {
$output->writeln([
'',
' [INFO] Use the "--force" option to remove deleted records under "transient" policy. ',
''
]);
return Command::INVALID;
}
}
}
}

View File

@ -46,7 +46,7 @@ class PruneResumptionTokensCommand extends Command
$output->writeln([ $output->writeln([
'', '',
sprintf( sprintf(
' [OK] %d resumption tokens are expired and were successfully deleted. ', ' [OK] %d resumption tokens are expired and were successfully deleted! ',
$expired $expired
), ),
'' ''

View File

@ -66,7 +66,7 @@ class UpdateFormatsCommand extends Command
++$added; ++$added;
$output->writeln([ $output->writeln([
sprintf( sprintf(
' [OK] Metadata format "%s" added or updated successfully. ', ' [OK] Metadata format "%s" added or updated successfully! ',
$prefix $prefix
) )
]); ]);
@ -87,7 +87,7 @@ class UpdateFormatsCommand extends Command
++$deleted; ++$deleted;
$output->writeln([ $output->writeln([
sprintf( sprintf(
' [OK] Metadata format "%s" and all associated records deleted successfully. ', ' [OK] Metadata format "%s" and all associated records deleted successfully! ',
$prefix $prefix
) )
]); ]);

View File

@ -23,6 +23,7 @@ declare(strict_types=1);
namespace OCC\OaiPmh2; namespace OCC\OaiPmh2;
use DateTime; use DateTime;
use Doctrine\Common\Collections\Criteria;
use Doctrine\DBAL\DriverManager; use Doctrine\DBAL\DriverManager;
use Doctrine\DBAL\Schema\AbstractAsset; use Doctrine\DBAL\Schema\AbstractAsset;
use Doctrine\DBAL\Tools\DsnParser; use Doctrine\DBAL\Tools\DsnParser;
@ -149,11 +150,11 @@ class Database
if (isset($identifier)) { if (isset($identifier)) {
$dql->innerJoin( $dql->innerJoin(
'format.records', 'format.records',
'records', 'record',
'WITH', 'WITH',
$dql->expr()->andX( $dql->expr()->andX(
$dql->expr()->eq('records.identifier', ':identifier'), $dql->expr()->eq('record.identifier', ':identifier'),
$dql->expr()->neq('records.data', '') $dql->expr()->isNotNull('record.content')
) )
) )
->setParameter('identifier', $identifier); ->setParameter('identifier', $identifier);
@ -328,6 +329,41 @@ class Database
return (bool) $query->getOneOrNullResult(AbstractQuery::HYDRATE_SINGLE_SCALAR); return (bool) $query->getOneOrNullResult(AbstractQuery::HYDRATE_SINGLE_SCALAR);
} }
/**
* Prune deleted records.
*
* @return int The number of removed records
*/
public function pruneDeletedRecords(): int
{
$repository = $this->entityManager->getRepository(Record::class);
$criteria = Criteria::create()->where(Criteria::expr()->isNull('content'));
$records = $repository->matching($criteria);
foreach ($records as $record) {
$this->entityManager->remove($record);
}
$this->entityManager->flush();
$this->pruneOrphanSets();
return count($records);
}
/**
* Prune orphan sets.
*
* @return void
*/
public function pruneOrphanSets(): void
{
$repository = $this->entityManager->getRepository(Set::class);
$sets = $repository->findAll();
foreach ($sets as $set) {
if ($set->isEmpty()) {
$this->entityManager->remove($set);
}
}
$this->entityManager->flush();
}
/** /**
* Prune expired resumption tokens. * Prune expired resumption tokens.
* *
@ -335,13 +371,14 @@ class Database
*/ */
public function pruneResumptionTokens(): int public function pruneResumptionTokens(): int
{ {
$dql = $this->entityManager->createQueryBuilder(); $repository = $this->entityManager->getRepository(Token::class);
$dql->delete(Token::class, 'token') $criteria = Criteria::create()->where(Criteria::expr()->lt('validUntil', new DateTime()));
->where($dql->expr()->lt('token.validUntil', ':now')) $tokens = $repository->matching($criteria);
->setParameter('now', new DateTime()); foreach ($tokens as $token) {
$query = $dql->getQuery(); $this->entityManager->remove($token);
/** @var int */ }
return $query->execute(); $this->entityManager->flush();
return count($tokens);
} }
/** /**
@ -357,6 +394,7 @@ class Database
if (isset($format)) { if (isset($format)) {
$this->entityManager->remove($format); $this->entityManager->remove($format);
$this->entityManager->flush(); $this->entityManager->flush();
$this->pruneOrphanSets();
return true; return true;
} else { } else {
return false; return false;

View File

@ -64,8 +64,8 @@ class Record
/** /**
* The record's content. * The record's content.
*/ */
#[ORM\Column(type: 'text')] #[ORM\Column(type: 'text', nullable: true)]
private string $content = ''; private ?string $content = null;
/** /**
* Collection of associated sets. * Collection of associated sets.
@ -97,9 +97,9 @@ class Record
/** /**
* Get the record's content. * Get the record's content.
* *
* @return string The record's content * @return ?string The record's content or NULL if deleted
*/ */
public function getContent(): string public function getContent(): ?string
{ {
return $this->content; return $this->content;
} }
@ -174,21 +174,23 @@ class Record
/** /**
* Set record's content. * Set record's content.
* *
* @param string $data The record's content * @param ?string $data The record's content or NULL to mark as deleted
* @param bool $validate Should the input be validated? * @param bool $validate Should the input be validated?
* *
* @return void * @return void
* *
* @throws ValidationFailedException * @throws ValidationFailedException
*/ */
public function setContent(string $data, bool $validate = true): void public function setContent(?string $data = null, bool $validate = true): void
{ {
$data = trim($data); if (isset($data)) {
if ($validate && $data !== '') { $data = trim($data);
try { if ($validate && $data !== '') {
$data = $this->validate($data); try {
} catch (ValidationFailedException $exception) { $data = $this->validate($data);
throw $exception; } catch (ValidationFailedException $exception) {
throw $exception;
}
} }
} }
$this->content = $data; $this->content = $data;
@ -246,16 +248,18 @@ class Record
* *
* @param string $identifier The record identifier * @param string $identifier The record identifier
* @param Format $format The format * @param Format $format The format
* @param string $data The record's content * @param ?string $data The record's content
* *
* @throws ValidationFailedException * @throws ValidationFailedException
*/ */
public function __construct(string $identifier, Format $format, string $data = '') public function __construct(string $identifier, Format $format, ?string $data = null)
{ {
try { try {
$this->identifier = $identifier; $this->identifier = $identifier;
$this->setFormat($format); $this->setFormat($format);
$this->setContent($data); if (isset($data)) {
$this->setContent($data);
}
$this->setLastChanged(); $this->setLastChanged();
$this->sets = new ArrayCollection(); $this->sets = new ArrayCollection();
} catch (ValidationFailedException $exception) { } catch (ValidationFailedException $exception) {
@ -270,6 +274,6 @@ class Record
*/ */
public function __toString(): string public function __toString(): string
{ {
return $this->content; return $this->content ?? '';
} }
} }

View File

@ -25,6 +25,9 @@ namespace OCC\OaiPmh2\Database;
use Doctrine\Common\Collections\ArrayCollection; use Doctrine\Common\Collections\ArrayCollection;
use Doctrine\Common\Collections\Collection; use Doctrine\Common\Collections\Collection;
use Doctrine\ORM\Mapping as ORM; use Doctrine\ORM\Mapping as ORM;
use Symfony\Component\Validator\Constraints as Assert;
use Symfony\Component\Validator\Exception\ValidationFailedException;
use Symfony\Component\Validator\Validation;
/** /**
* Doctrine/ORM Entity for sets. * Doctrine/ORM Entity for sets.
@ -74,6 +77,7 @@ class Set
{ {
if (!$this->records->contains($record)) { if (!$this->records->contains($record)) {
$this->records->add($record); $this->records->add($record);
$record->addSet($this);
} }
} }
@ -117,6 +121,16 @@ class Set
return $this->records->toArray(); return $this->records->toArray();
} }
/**
* Whether this set contains any records.
*
* @return bool TRUE if empty or FALSE otherwise
*/
public function isEmpty(): bool
{
return count($this->records) === 0;
}
/** /**
* Update bi-directional association with records. * Update bi-directional association with records.
* *
@ -126,7 +140,10 @@ class Set
*/ */
public function removeRecord(Record $record): void public function removeRecord(Record $record): void
{ {
$this->records->removeElement($record); if ($this->records->contains($record)) {
$this->records->removeElement($record);
$record->removeSet($this);
}
} }
/** /**
@ -141,18 +158,54 @@ class Set
$this->description = trim($description); $this->description = trim($description);
} }
/**
* Validate set spec.
*
* @param string $spec The set spec
*
* @return string The validated spec
*
* @throws ValidationFailedException
*/
protected function validate(string $spec): string
{
$spec = trim($spec);
$validator = Validation::createValidator();
$violations = $validator->validate(
$spec,
[
new Assert\Regex([
'pattern' => '/\s/',
'match' => false,
'message' => 'This value contains whitespaces.'
]),
new Assert\NotBlank()
]
);
if ($violations->count() > 0) {
throw new ValidationFailedException(null, $violations);
}
return $spec;
}
/** /**
* Get new entity of set. * Get new entity of set.
* *
* @param string $spec The set spec * @param string $spec The set spec
* @param string $name The name of the set * @param string $name The name of the set
* @param string $description The description of the set * @param string $description The description of the set
*
* @throws ValidationFailedException
*/ */
public function __construct(string $spec, string $name, string $description = '') public function __construct(string $spec, string $name, string $description = '')
{ {
$this->spec = $spec; try {
$this->name = $name; $this->spec = $this->validate($spec);
$this->setDescription($description); $this->name = trim($name);
$this->records = new ArrayCollection(); $this->setDescription($description);
$this->records = new ArrayCollection();
} catch (ValidationFailedException $exception) {
throw $exception;
}
} }
} }

View File

@ -64,7 +64,7 @@ class GetRecord extends Middleware
$getRecord->appendChild($record); $getRecord->appendChild($record);
$header = $document->createElement('header'); $header = $document->createElement('header');
if ($oaiRecord->getContent() === '') { if ($oaiRecord->getContent() === null) {
$header->setAttribute('status', 'deleted'); $header->setAttribute('status', 'deleted');
} }
$record->appendChild($header); $record->appendChild($header);
@ -80,7 +80,7 @@ class GetRecord extends Middleware
$header->appendChild($setSpec); $header->appendChild($setSpec);
} }
if ($oaiRecord->getContent() !== '') { if ($oaiRecord->getContent() !== null) {
$metadata = $document->createElement('metadata'); $metadata = $document->createElement('metadata');
$record->appendChild($metadata); $record->appendChild($metadata);

View File

@ -106,7 +106,7 @@ class ListIdentifiers extends Middleware
} }
$header = $document->createElement('header'); $header = $document->createElement('header');
if ($oaiRecord->getContent() === '') { if ($oaiRecord->getContent() === null) {
$header->setAttribute('status', 'deleted'); $header->setAttribute('status', 'deleted');
} }
$baseNode->appendChild($header); $baseNode->appendChild($header);
@ -122,7 +122,7 @@ class ListIdentifiers extends Middleware
$header->appendChild($setSpec); $header->appendChild($setSpec);
} }
if ($verb === 'ListRecords' && $oaiRecord->getContent() !== '') { if ($verb === 'ListRecords' && $oaiRecord->getContent() !== null) {
$metadata = $document->createElement('metadata'); $metadata = $document->createElement('metadata');
$baseNode->appendChild($metadata); $baseNode->appendChild($metadata);