oai-pmh2/src/Database.php

503 lines
16 KiB
PHP
Raw Normal View History

2024-01-03 16:54:13 +01:00
<?php
/**
* OAI-PMH 2.0 Data Provider
* Copyright (C) 2023 Sebastian Meyer <sebastian.meyer@opencultureconsulting.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
declare(strict_types=1);
namespace OCC\OaiPmh2;
use DateTime;
2024-01-04 08:56:11 +01:00
use Doctrine\Common\Collections\Criteria;
2024-01-03 16:54:13 +01:00
use Doctrine\DBAL\DriverManager;
use Doctrine\DBAL\Schema\AbstractAsset;
use Doctrine\DBAL\Tools\DsnParser;
use Doctrine\ORM\AbstractQuery;
use Doctrine\ORM\Configuration as DoctrineConfiguration;
use Doctrine\ORM\EntityManager;
use Doctrine\ORM\Mapping\Driver\AttributeDriver;
use Doctrine\ORM\Proxy\ProxyFactory;
use Doctrine\ORM\Tools\Pagination\Paginator;
use OCC\Basics\Traits\Singleton;
use OCC\OaiPmh2\Database\Format;
use OCC\OaiPmh2\Database\Record;
use OCC\OaiPmh2\Database\Result;
use OCC\OaiPmh2\Database\Set;
use OCC\OaiPmh2\Database\Token;
use Symfony\Component\Cache\Adapter\PhpFilesAdapter;
use Symfony\Component\Filesystem\Path;
use Symfony\Component\Validator\Exception\ValidationFailedException;
2024-01-03 16:54:13 +01:00
/**
* Handles all database shenanigans.
*
* @author Sebastian Meyer <sebastian.meyer@opencultureconsulting.com>
* @package opencultureconsulting/oai-pmh2
*
* @template Formats of array<string, Format>
* @template Records of array<string, Record>
* @template Sets of array<string, Set>
*/
class Database
{
use Singleton;
protected const DB_TABLES = [
'formats',
'records',
'records_sets',
'sets',
'tokens'
];
/**
* This holds the Doctrine entity manager.
*/
protected EntityManager $entityManager;
/**
* Add or update metadata format.
*
2024-01-06 16:24:01 +01:00
* @param Format $newFormat The metadata format
2024-01-03 16:54:13 +01:00
*
* @return void
2024-01-03 16:54:13 +01:00
*/
2024-01-06 16:24:01 +01:00
public function addOrUpdateMetadataFormat(Format $newFormat): void
2024-01-03 16:54:13 +01:00
{
2024-01-06 16:24:01 +01:00
$oldFormat = $this->entityManager->find(Format::class, $newFormat->getPrefix());
if (isset($oldFormat)) {
$oldFormat->setNamespace($newFormat->getNamespace());
$oldFormat->setSchema($newFormat->getSchema());
2024-01-03 16:54:13 +01:00
} else {
2024-01-06 16:24:01 +01:00
$this->entityManager->persist($newFormat);
2024-01-03 16:54:13 +01:00
}
$this->entityManager->flush();
2024-01-03 16:54:13 +01:00
}
2024-01-04 14:07:00 +01:00
/**
* Add or update record.
*
2024-01-06 17:12:01 +01:00
* @param Record $newRecord The record
2024-01-04 14:07:00 +01:00
* @param bool $bulkMode Should we operate in bulk mode (no flush)?
*
* @return void
*/
2024-01-06 17:12:01 +01:00
public function addOrUpdateRecord(Record $newRecord, bool $bulkMode = false): void
2024-01-04 14:07:00 +01:00
{
2024-01-06 17:12:01 +01:00
$oldRecord = $this->entityManager->find(
Record::class,
[
'identifier' => $newRecord->getIdentifier(),
'format' => $newRecord->getFormat()
]
);
if (isset($oldRecord)) {
if ($newRecord->hasContent() || Configuration::getInstance()->deletedRecords !== 'no') {
$oldRecord->setContent($newRecord->getContent(), false);
$oldRecord->setLastChanged($newRecord->getLastChanged());
// TODO: Add full set support.
} else {
$this->entityManager->remove($oldRecord);
2024-01-04 14:07:00 +01:00
}
} else {
2024-01-06 17:12:01 +01:00
if ($newRecord->hasContent() || Configuration::getInstance()->deletedRecords !== 'no') {
$this->entityManager->persist($newRecord);
2024-01-04 14:07:00 +01:00
}
}
if (!$bulkMode) {
$this->entityManager->flush();
}
}
/**
* Flush all changes to the database.
*
2024-01-06 14:59:54 +01:00
* @param string[] $entities Optional array of entity types to clear from entity manager
2024-01-04 14:07:00 +01:00
*
* @return void
*/
2024-01-06 14:59:54 +01:00
public function flush(array $entities = []): void
2024-01-04 14:07:00 +01:00
{
$this->entityManager->flush();
2024-01-06 14:59:54 +01:00
foreach ($entities as $entity) {
$this->entityManager->clear($entity);
2024-01-04 14:07:00 +01:00
}
}
2024-01-03 16:54:13 +01:00
/**
* Get the earliest datestamp of any record.
*
* @return string The earliest datestamp
*/
public function getEarliestDatestamp(): string
{
$timestamp = '0000-00-00T00:00:00Z';
$dql = $this->entityManager->createQueryBuilder();
$dql->select('record')
->from(Record::class, 'record')
->orderBy('record.lastChanged', 'ASC')
->setMaxResults(1);
$query = $dql->getQuery();
$query->enableResultCache();
/** @var ?array<string, \DateTime> */
$result = $query->getOneOrNullResult(AbstractQuery::HYDRATE_ARRAY);
if (isset($result)) {
$timestamp = $result['lastChanged']->format('Y-m-d\TH:i:s\Z');
}
return $timestamp;
}
/**
* Get the Doctrine entity manager.
*
* @return EntityManager The entity manager instance
*/
public function getEntityManager(): EntityManager
{
return $this->entityManager;
}
/**
* Get all metadata prefixes.
*
* @param ?string $identifier Optional record identifier
*
* @return Result<Formats> The metadata prefixes
*/
public function getMetadataFormats(?string $identifier = null): Result
{
$dql = $this->entityManager->createQueryBuilder();
$dql->select('format')
->from(Format::class, 'format', 'format.prefix');
if (isset($identifier)) {
2024-01-06 16:24:01 +01:00
$dql->innerJoin(Record::class, 'record')
->where(
2024-01-03 16:54:13 +01:00
$dql->expr()->andX(
2024-01-04 08:56:11 +01:00
$dql->expr()->eq('record.identifier', ':identifier'),
$dql->expr()->isNotNull('record.content')
2024-01-03 16:54:13 +01:00
)
)
->setParameter('identifier', $identifier);
}
$query = $dql->getQuery();
$query->enableResultCache();
/** @var Formats */
$queryResult = $query->getResult();
return new Result($queryResult);
}
/**
* Get a single record.
*
* @param string $identifier The record identifier
2024-01-06 16:31:19 +01:00
* @param Format $format The metadata format
2024-01-03 16:54:13 +01:00
*
* @return ?Record The record or NULL on failure
*/
2024-01-06 16:31:19 +01:00
public function getRecord(string $identifier, Format $format): ?Record
2024-01-03 16:54:13 +01:00
{
return $this->entityManager->find(
Record::class,
[
'identifier' => $identifier,
2024-01-06 16:31:19 +01:00
'format' => $format
]
);
2024-01-03 16:54:13 +01:00
}
/**
* Get list of records.
*
* @param string $verb The currently requested verb ('ListIdentifiers' or 'ListRecords')
2024-01-06 12:53:20 +01:00
* @param Format $metadataPrefix The metadata format
2024-01-03 16:54:13 +01:00
* @param int $counter Counter for split result sets
2024-01-06 14:59:54 +01:00
* @param ?DateTime $from The "from" datestamp
* @param ?DateTime $until The "until" datestamp
* @param ?Set $set The set spec
2024-01-03 16:54:13 +01:00
*
* @return Result<Records> The records and possibly a resumtion token
*/
public function getRecords(
string $verb,
2024-01-06 12:53:20 +01:00
Format $metadataPrefix,
2024-01-03 16:54:13 +01:00
int $counter = 0,
2024-01-06 14:59:54 +01:00
?DateTime $from = null,
?DateTime $until = null,
?Set $set = null
2024-01-03 16:54:13 +01:00
): Result
{
$maxRecords = Configuration::getInstance()->maxRecords;
$cursor = $counter * $maxRecords;
$dql = $this->entityManager->createQueryBuilder();
$dql->select('record')
->from(Record::class, 'record', 'record.identifier')
->where($dql->expr()->eq('record.format', ':metadataPrefix'))
->setParameter('metadataPrefix', $metadataPrefix)
->setFirstResult($cursor)
->setMaxResults($maxRecords);
if (isset($from)) {
$dql->andWhere($dql->expr()->gte('record.lastChanged', ':from'));
2024-01-06 14:59:54 +01:00
$dql->setParameter('from', $from);
$from = $from->format('Y-m-d\TH:i:s\Z');
2024-01-03 16:54:13 +01:00
}
if (isset($until)) {
$dql->andWhere($dql->expr()->lte('record.lastChanged', ':until'));
2024-01-06 14:59:54 +01:00
$dql->setParameter('until', $until);
$until = $until->format('Y-m-d\TH:i:s\Z');
2024-01-03 16:54:13 +01:00
}
if (isset($set)) {
$dql->andWhere($dql->expr()->in('record.sets', ':set'));
$dql->setParameter('set', $set);
2024-01-06 14:59:54 +01:00
$set = $set->getSpec();
2024-01-03 16:54:13 +01:00
}
$query = $dql->getQuery();
/** @var Records */
$queryResult = $query->getResult();
$result = new Result($queryResult);
$paginator = new Paginator($query, true);
if (count($paginator) > ($cursor + count($result))) {
$token = new Token($verb, [
'counter' => $counter + 1,
'completeListSize' => count($paginator),
2024-01-06 12:53:20 +01:00
'metadataPrefix' => $metadataPrefix->getPrefix(),
2024-01-03 16:54:13 +01:00
'from' => $from,
'until' => $until,
'set' => $set
]);
$this->entityManager->persist($token);
$this->entityManager->flush();
$result->setResumptionToken($token);
}
return $result;
}
/**
* Get resumption token.
*
* @param string $token The token
* @param string $verb The current verb to validate token
*
* @return ?Token The resumption token or NULL if invalid
*/
public function getResumptionToken(string $token, string $verb): ?Token
{
$dql = $this->entityManager->createQueryBuilder();
$dql->select('token')
->from(Token::class, 'token')
->where($dql->expr()->gte('token.validUntil', ':now'))
->andWhere($dql->expr()->eq('token.token', ':token'))
->andWhere($dql->expr()->eq('token.verb', ':verb'))
->setParameter('now', new DateTime())
->setParameter('token', $token)
->setParameter('verb', $verb)
->setMaxResults(1);
$query = $dql->getQuery();
/** @var ?Token */
return $query->getOneOrNullResult();
}
/**
* Get all sets.
*
* @param int $counter Counter for split result sets
*
* @return Result<Sets> The sets and possibly a resumption token
*/
public function getSets($counter = 0): Result
{
$result = [];
$maxRecords = Configuration::getInstance()->maxRecords;
$cursor = $counter * $maxRecords;
$dql = $this->entityManager->createQueryBuilder();
$dql->select('sets')
->from(Set::class, 'sets', 'sets.spec')
->setFirstResult($cursor)
->setMaxResults($maxRecords);
$query = $dql->getQuery();
$query->enableResultCache();
/** @var Sets */
$resultQuery = $query->getResult();
$result = new Result($resultQuery);
$paginator = new Paginator($query, false);
if (count($paginator) > ($cursor + count($result))) {
$token = new Token('ListSets', [
'counter' => $counter + 1,
'completeListSize' => count($paginator)
]);
$this->entityManager->persist($token);
$this->entityManager->flush();
$result->setResumptionToken($token);
}
return $result;
}
/**
* Check if a record identifier exists.
*
* @param string $identifier The record identifier
*
* @return bool Whether the identifier exists
*/
public function idDoesExist(string $identifier): bool
{
$dql = $this->entityManager->createQueryBuilder();
$dql->select('COUNT(record.identifier)')
->from(Record::class, 'record')
->where($dql->expr()->eq('record.identifier', ':identifier'))
->setParameter('identifier', $identifier)
->setMaxResults(1);
$query = $dql->getQuery();
return (bool) $query->getOneOrNullResult(AbstractQuery::HYDRATE_SINGLE_SCALAR);
}
2024-01-04 08:56:11 +01:00
/**
* Prune deleted records.
*
* @return int The number of removed records
*/
public function pruneDeletedRecords(): int
{
$repository = $this->entityManager->getRepository(Record::class);
$criteria = Criteria::create()->where(Criteria::expr()->isNull('content'));
$records = $repository->matching($criteria);
foreach ($records as $record) {
$this->entityManager->remove($record);
}
$this->entityManager->flush();
$this->pruneOrphanSets();
return count($records);
}
/**
* Prune orphan sets.
*
* @return void
*/
public function pruneOrphanSets(): void
{
$repository = $this->entityManager->getRepository(Set::class);
$sets = $repository->findAll();
foreach ($sets as $set) {
if ($set->isEmpty()) {
$this->entityManager->remove($set);
}
}
$this->entityManager->flush();
}
2024-01-03 16:54:13 +01:00
/**
* Prune expired resumption tokens.
*
* @return int The number of deleted tokens
*/
public function pruneResumptionTokens(): int
{
2024-01-04 08:56:11 +01:00
$repository = $this->entityManager->getRepository(Token::class);
$criteria = Criteria::create()->where(Criteria::expr()->lt('validUntil', new DateTime()));
$tokens = $repository->matching($criteria);
foreach ($tokens as $token) {
$this->entityManager->remove($token);
}
$this->entityManager->flush();
return count($tokens);
2024-01-03 16:54:13 +01:00
}
/**
* Remove metadata format and all associated records.
*
2024-01-06 16:24:01 +01:00
* @param Format $format The metadata format
2024-01-03 16:54:13 +01:00
*
2024-01-06 16:24:01 +01:00
* @return void
2024-01-03 16:54:13 +01:00
*/
2024-01-06 16:24:01 +01:00
public function removeMetadataFormat(Format $format): void
2024-01-03 16:54:13 +01:00
{
2024-01-06 16:24:01 +01:00
$repository = $this->entityManager->getRepository(Record::class);
$criteria = Criteria::create()->where(Criteria::expr()->eq('format', $format));
$records = $repository->matching($criteria);
foreach ($records as $record) {
$this->entityManager->remove($record);
2024-01-03 16:54:13 +01:00
}
2024-01-06 16:24:01 +01:00
$this->entityManager->remove($format);
$this->entityManager->flush();
$this->pruneOrphanSets();
2024-01-03 16:54:13 +01:00
}
/**
* This is a singleton class, thus the constructor is private.
*
* Usage: Get an instance of this class by calling Database::getInstance()
*/
private function __construct()
{
$configuration = new DoctrineConfiguration();
$configuration->setAutoGenerateProxyClasses(
ProxyFactory::AUTOGENERATE_NEVER
);
$configuration->setMetadataCache(
new PhpFilesAdapter(
'Metadata',
0,
__DIR__ . '/../var/cache'
)
);
$configuration->setMetadataDriverImpl(
new AttributeDriver([__DIR__ . '/Database'])
);
$configuration->setProxyDir(__DIR__ . '/../var/generated');
$configuration->setProxyNamespace('OCC\OaiPmh2\Proxy');
$configuration->setQueryCache(
new PhpFilesAdapter(
'Query',
0,
__DIR__ . '/../var/cache'
)
);
$configuration->setResultCache(
new PhpFilesAdapter(
'Result',
0,
__DIR__ . '/../var/cache'
)
);
$configuration->setSchemaAssetsFilter(
static function(string|AbstractAsset $assetName): bool {
if ($assetName instanceof AbstractAsset) {
$assetName = $assetName->getName();
}
return in_array($assetName, self::DB_TABLES, true);
}
);
$baseDir = Path::canonicalize(__DIR__ . '/../');
$dsn = str_replace('%BASEDIR%', $baseDir, Configuration::getInstance()->database);
$parser = new DsnParser([
'mariadb' => 'pdo_mysql',
'mssql' => 'pdo_sqlsrv',
'mysql' => 'pdo_mysql',
'oracle' => 'pdo_oci',
'postgres' => 'pdo_pgsql',
'sqlite' => 'pdo_sqlite'
]);
$connection = DriverManager::getConnection($parser->parse($dsn), $configuration);
$this->entityManager = new EntityManager($connection, $configuration);
}
}