Add command to prune deleted records

This commit is contained in:
Sebastian Meyer 2024-01-04 08:56:11 +01:00
parent b7ddb19cfd
commit 29544f7eaa
11 changed files with 257 additions and 39 deletions

View File

@ -29,6 +29,7 @@ use Exception;
use OCC\OaiPmh2\Console\AddRecordCommand;
use OCC\OaiPmh2\Console\BulkUpdateCommand;
use OCC\OaiPmh2\Console\DeleteRecordCommand;
use OCC\OaiPmh2\Console\PruneRecordsCommand;
use OCC\OaiPmh2\Console\PruneResumptionTokensCommand;
use OCC\OaiPmh2\Console\UpdateFormatsCommand;
@ -38,6 +39,7 @@ $commands = [
new AddRecordCommand(),
new BulkUpdateCommand(),
new DeleteRecordCommand(),
new PruneRecordsCommand(),
new PruneResumptionTokensCommand(),
new UpdateFormatsCommand()
];

View File

@ -72,6 +72,29 @@ metadataPrefix: {
}
}
#
# Deleted records policy
#
# This states if and how the repository keeps track of deleted records. You can
# delete records by importing empty records with the same identifiers and
# metadata prefixes. Depending on the deleted records policy those records will
# be either marked as deleted or completely removed from the database.
# "no" means the repository does not provide any information about deletions.
# "persistent" means the repository consistently provides information about
# deletions.
# "transient" - The repository may provide information about deletions. This is
# handled exactly the same as "persistent", but you are allowed to manually
# prune deleted records from the database (see below).
#
# ["no"|"persistent"|"transient"]
#
# Run "bin/cli oai:records:prune" after changing the deleted records policy to
# "no" to remove all deleted records from the database.
# If your policy is "transient" and you want to clean up deleted records from
# the database anyway, run the command with the "--force" flag.
#
deletedRecords: 'transient'
#
# Maximum number of records to return per request
#

View File

@ -40,11 +40,12 @@ use Symfony\Component\Yaml\Yaml;
* @property-read string $adminEmail
* @property-read string $database
* @property-read array $metadataPrefix
* @property-read string $deletedRecords
* @property-read int $maxRecords
* @property-read int $tokenValid
*
* @template TKey of string
* @template TValue
* @template TValue of array|int|string
*/
class Configuration
{
@ -102,6 +103,11 @@ class Configuration
])
])
],
'deletedRecords' => [
new Assert\Type('string'),
new Assert\Choice(['no', 'persistent', 'transient']),
new Assert\NotBlank()
],
'maxRecords' => [
new Assert\Type('int'),
new Assert\Range([

View File

@ -0,0 +1,92 @@
<?php
/**
* OAI-PMH 2.0 Data Provider
* Copyright (C) 2023 Sebastian Meyer <sebastian.meyer@opencultureconsulting.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace OCC\OaiPmh2\Console;
use OCC\OaiPmh2\Configuration;
use OCC\OaiPmh2\Database;
use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
/**
* Prune deleted records from database.
*
* @author Sebastian Meyer <sebastian.meyer@opencultureconsulting.com>
* @package opencultureconsulting/oai-pmh2
*/
#[AsCommand(
name: 'oai:records:prune',
description: 'Prune deleted records from database'
)]
class PruneRecordsCommand extends Command
{
protected function configure(): void
{
$this->addOption(
'force',
null,
InputOption::VALUE_NONE,
'Deletes records even under "transient" policy.'
);
parent::configure();
}
protected function execute(InputInterface $input, OutputInterface $output): int
{
$policy = Configuration::getInstance()->deletedRecords;
$forced = (bool) $input->getOption('force');
if (
$policy === 'no'
or ($policy === 'transient' && $forced)
) {
$deleted = Database::getInstance()->pruneDeletedRecords();
$output->writeln([
'',
sprintf(
' [OK] %d records are deleted and were successfully removed! ',
$deleted
),
''
]);
return Command::SUCCESS;
} else {
if ($policy === 'persistent') {
$output->writeln([
'',
' [ERROR] Under "persistent" policy removal of deleted records is not allowed. ',
''
]);
return Command::FAILURE;
} else {
$output->writeln([
'',
' [INFO] Use the "--force" option to remove deleted records under "transient" policy. ',
''
]);
return Command::INVALID;
}
}
}
}

View File

@ -46,7 +46,7 @@ class PruneResumptionTokensCommand extends Command
$output->writeln([
'',
sprintf(
' [OK] %d resumption tokens are expired and were successfully deleted. ',
' [OK] %d resumption tokens are expired and were successfully deleted! ',
$expired
),
''

View File

@ -66,7 +66,7 @@ class UpdateFormatsCommand extends Command
++$added;
$output->writeln([
sprintf(
' [OK] Metadata format "%s" added or updated successfully. ',
' [OK] Metadata format "%s" added or updated successfully! ',
$prefix
)
]);
@ -87,7 +87,7 @@ class UpdateFormatsCommand extends Command
++$deleted;
$output->writeln([
sprintf(
' [OK] Metadata format "%s" and all associated records deleted successfully. ',
' [OK] Metadata format "%s" and all associated records deleted successfully! ',
$prefix
)
]);

View File

@ -23,6 +23,7 @@ declare(strict_types=1);
namespace OCC\OaiPmh2;
use DateTime;
use Doctrine\Common\Collections\Criteria;
use Doctrine\DBAL\DriverManager;
use Doctrine\DBAL\Schema\AbstractAsset;
use Doctrine\DBAL\Tools\DsnParser;
@ -149,11 +150,11 @@ class Database
if (isset($identifier)) {
$dql->innerJoin(
'format.records',
'records',
'record',
'WITH',
$dql->expr()->andX(
$dql->expr()->eq('records.identifier', ':identifier'),
$dql->expr()->neq('records.data', '')
$dql->expr()->eq('record.identifier', ':identifier'),
$dql->expr()->isNotNull('record.content')
)
)
->setParameter('identifier', $identifier);
@ -328,6 +329,41 @@ class Database
return (bool) $query->getOneOrNullResult(AbstractQuery::HYDRATE_SINGLE_SCALAR);
}
/**
* Prune deleted records.
*
* @return int The number of removed records
*/
public function pruneDeletedRecords(): int
{
$repository = $this->entityManager->getRepository(Record::class);
$criteria = Criteria::create()->where(Criteria::expr()->isNull('content'));
$records = $repository->matching($criteria);
foreach ($records as $record) {
$this->entityManager->remove($record);
}
$this->entityManager->flush();
$this->pruneOrphanSets();
return count($records);
}
/**
* Prune orphan sets.
*
* @return void
*/
public function pruneOrphanSets(): void
{
$repository = $this->entityManager->getRepository(Set::class);
$sets = $repository->findAll();
foreach ($sets as $set) {
if ($set->isEmpty()) {
$this->entityManager->remove($set);
}
}
$this->entityManager->flush();
}
/**
* Prune expired resumption tokens.
*
@ -335,13 +371,14 @@ class Database
*/
public function pruneResumptionTokens(): int
{
$dql = $this->entityManager->createQueryBuilder();
$dql->delete(Token::class, 'token')
->where($dql->expr()->lt('token.validUntil', ':now'))
->setParameter('now', new DateTime());
$query = $dql->getQuery();
/** @var int */
return $query->execute();
$repository = $this->entityManager->getRepository(Token::class);
$criteria = Criteria::create()->where(Criteria::expr()->lt('validUntil', new DateTime()));
$tokens = $repository->matching($criteria);
foreach ($tokens as $token) {
$this->entityManager->remove($token);
}
$this->entityManager->flush();
return count($tokens);
}
/**
@ -357,6 +394,7 @@ class Database
if (isset($format)) {
$this->entityManager->remove($format);
$this->entityManager->flush();
$this->pruneOrphanSets();
return true;
} else {
return false;

View File

@ -64,8 +64,8 @@ class Record
/**
* The record's content.
*/
#[ORM\Column(type: 'text')]
private string $content = '';
#[ORM\Column(type: 'text', nullable: true)]
private ?string $content = null;
/**
* Collection of associated sets.
@ -97,9 +97,9 @@ class Record
/**
* Get the record's content.
*
* @return string The record's content
* @return ?string The record's content or NULL if deleted
*/
public function getContent(): string
public function getContent(): ?string
{
return $this->content;
}
@ -174,21 +174,23 @@ class Record
/**
* Set record's content.
*
* @param string $data The record's content
* @param ?string $data The record's content or NULL to mark as deleted
* @param bool $validate Should the input be validated?
*
* @return void
*
* @throws ValidationFailedException
*/
public function setContent(string $data, bool $validate = true): void
public function setContent(?string $data = null, bool $validate = true): void
{
$data = trim($data);
if ($validate && $data !== '') {
try {
$data = $this->validate($data);
} catch (ValidationFailedException $exception) {
throw $exception;
if (isset($data)) {
$data = trim($data);
if ($validate && $data !== '') {
try {
$data = $this->validate($data);
} catch (ValidationFailedException $exception) {
throw $exception;
}
}
}
$this->content = $data;
@ -246,16 +248,18 @@ class Record
*
* @param string $identifier The record identifier
* @param Format $format The format
* @param string $data The record's content
* @param ?string $data The record's content
*
* @throws ValidationFailedException
*/
public function __construct(string $identifier, Format $format, string $data = '')
public function __construct(string $identifier, Format $format, ?string $data = null)
{
try {
$this->identifier = $identifier;
$this->setFormat($format);
$this->setContent($data);
if (isset($data)) {
$this->setContent($data);
}
$this->setLastChanged();
$this->sets = new ArrayCollection();
} catch (ValidationFailedException $exception) {
@ -270,6 +274,6 @@ class Record
*/
public function __toString(): string
{
return $this->content;
return $this->content ?? '';
}
}

View File

@ -25,6 +25,9 @@ namespace OCC\OaiPmh2\Database;
use Doctrine\Common\Collections\ArrayCollection;
use Doctrine\Common\Collections\Collection;
use Doctrine\ORM\Mapping as ORM;
use Symfony\Component\Validator\Constraints as Assert;
use Symfony\Component\Validator\Exception\ValidationFailedException;
use Symfony\Component\Validator\Validation;
/**
* Doctrine/ORM Entity for sets.
@ -74,6 +77,7 @@ class Set
{
if (!$this->records->contains($record)) {
$this->records->add($record);
$record->addSet($this);
}
}
@ -117,6 +121,16 @@ class Set
return $this->records->toArray();
}
/**
* Whether this set contains any records.
*
* @return bool TRUE if empty or FALSE otherwise
*/
public function isEmpty(): bool
{
return count($this->records) === 0;
}
/**
* Update bi-directional association with records.
*
@ -126,7 +140,10 @@ class Set
*/
public function removeRecord(Record $record): void
{
$this->records->removeElement($record);
if ($this->records->contains($record)) {
$this->records->removeElement($record);
$record->removeSet($this);
}
}
/**
@ -141,18 +158,54 @@ class Set
$this->description = trim($description);
}
/**
* Validate set spec.
*
* @param string $spec The set spec
*
* @return string The validated spec
*
* @throws ValidationFailedException
*/
protected function validate(string $spec): string
{
$spec = trim($spec);
$validator = Validation::createValidator();
$violations = $validator->validate(
$spec,
[
new Assert\Regex([
'pattern' => '/\s/',
'match' => false,
'message' => 'This value contains whitespaces.'
]),
new Assert\NotBlank()
]
);
if ($violations->count() > 0) {
throw new ValidationFailedException(null, $violations);
}
return $spec;
}
/**
* Get new entity of set.
*
* @param string $spec The set spec
* @param string $name The name of the set
* @param string $description The description of the set
*
* @throws ValidationFailedException
*/
public function __construct(string $spec, string $name, string $description = '')
{
$this->spec = $spec;
$this->name = $name;
$this->setDescription($description);
$this->records = new ArrayCollection();
try {
$this->spec = $this->validate($spec);
$this->name = trim($name);
$this->setDescription($description);
$this->records = new ArrayCollection();
} catch (ValidationFailedException $exception) {
throw $exception;
}
}
}

View File

@ -64,7 +64,7 @@ class GetRecord extends Middleware
$getRecord->appendChild($record);
$header = $document->createElement('header');
if ($oaiRecord->getContent() === '') {
if ($oaiRecord->getContent() === null) {
$header->setAttribute('status', 'deleted');
}
$record->appendChild($header);
@ -80,7 +80,7 @@ class GetRecord extends Middleware
$header->appendChild($setSpec);
}
if ($oaiRecord->getContent() !== '') {
if ($oaiRecord->getContent() !== null) {
$metadata = $document->createElement('metadata');
$record->appendChild($metadata);

View File

@ -106,7 +106,7 @@ class ListIdentifiers extends Middleware
}
$header = $document->createElement('header');
if ($oaiRecord->getContent() === '') {
if ($oaiRecord->getContent() === null) {
$header->setAttribute('status', 'deleted');
}
$baseNode->appendChild($header);
@ -122,7 +122,7 @@ class ListIdentifiers extends Middleware
$header->appendChild($setSpec);
}
if ($verb === 'ListRecords' && $oaiRecord->getContent() !== '') {
if ($verb === 'ListRecords' && $oaiRecord->getContent() !== null) {
$metadata = $document->createElement('metadata');
$baseNode->appendChild($metadata);