From 60f41db284504ba56d93b8ecd38953c775aeb689 Mon Sep 17 00:00:00 2001 From: Sebastian Meyer Date: Sat, 6 Jan 2024 14:59:54 +0100 Subject: [PATCH] Improve performance of importing --- src/Console/CsvImportCommand.php | 7 +++---- src/Database.php | 33 +++++++++++++++++++----------- src/Middleware/ListIdentifiers.php | 21 ++++++++++++++++--- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/src/Console/CsvImportCommand.php b/src/Console/CsvImportCommand.php index 23a89b7..6a3f2bc 100644 --- a/src/Console/CsvImportCommand.php +++ b/src/Console/CsvImportCommand.php @@ -25,6 +25,7 @@ namespace OCC\OaiPmh2\Console; use DateTime; use OCC\OaiPmh2\Database; use OCC\OaiPmh2\Database\Format; +use OCC\OaiPmh2\Database\Record; use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Helper\ProgressIndicator; @@ -145,12 +146,10 @@ class CsvImportCommand extends Command // Flush to database if memory usage reaches 90% of available limit. if (memory_get_usage() / $memoryLimit > 0.9) { - Database::getInstance()->flush(true); - /** @var Format */ - $format = Database::getInstance()->getEntityManager()->getReference(Format::class, $arguments['format']); + Database::getInstance()->flush([Record::class]); } } - Database::getInstance()->flush(true); + Database::getInstance()->flush(); Database::getInstance()->pruneOrphanSets(); $progressIndicator->finish('All done!'); diff --git a/src/Database.php b/src/Database.php index 7c961bd..4ef113b 100644 --- a/src/Database.php +++ b/src/Database.php @@ -154,15 +154,15 @@ class Database /** * Flush all changes to the database. * - * @param bool $clear Also clear the entity manager? + * @param string[] $entities Optional array of entity types to clear from entity manager * * @return void */ - public function flush(bool $clear = false): void + public function flush(array $entities = []): void { $this->entityManager->flush(); - if ($clear) { - $this->entityManager->clear(); + foreach ($entities as $entity) { + $this->entityManager->clear($entity); } } @@ -255,9 +255,9 @@ class Database * @param string $verb The currently requested verb ('ListIdentifiers' or 'ListRecords') * @param Format $metadataPrefix The metadata format * @param int $counter Counter for split result sets - * @param ?string $from The "from" datestamp - * @param ?string $until The "until" datestamp - * @param ?string $set The set spec + * @param ?DateTime $from The "from" datestamp + * @param ?DateTime $until The "until" datestamp + * @param ?Set $set The set spec * * @return Result The records and possibly a resumtion token */ @@ -265,9 +265,9 @@ class Database string $verb, Format $metadataPrefix, int $counter = 0, - ?string $from = null, - ?string $until = null, - ?string $set = null + ?DateTime $from = null, + ?DateTime $until = null, + ?Set $set = null ): Result { $maxRecords = Configuration::getInstance()->maxRecords; @@ -282,15 +282,18 @@ class Database ->setMaxResults($maxRecords); if (isset($from)) { $dql->andWhere($dql->expr()->gte('record.lastChanged', ':from')); - $dql->setParameter('from', new DateTime($from)); + $dql->setParameter('from', $from); + $from = $from->format('Y-m-d\TH:i:s\Z'); } if (isset($until)) { $dql->andWhere($dql->expr()->lte('record.lastChanged', ':until')); - $dql->setParameter('until', new DateTime($until)); + $dql->setParameter('until', $until); + $until = $until->format('Y-m-d\TH:i:s\Z'); } if (isset($set)) { $dql->andWhere($dql->expr()->in('record.sets', ':set')); $dql->setParameter('set', $set); + $set = $set->getSpec(); } $query = $dql->getQuery(); /** @var Records */ @@ -456,6 +459,12 @@ class Database { $format = $this->entityManager->find(Format::class, $prefix); if (isset($format)) { + $repository = $this->entityManager->getRepository(Record::class); + $criteria = Criteria::create()->where(Criteria::expr()->eq('format', $format)); + $records = $repository->matching($criteria); + foreach ($records as $record) { + $this->entityManager->remove($record); + } $this->entityManager->remove($format); $this->entityManager->flush(); $this->pruneOrphanSets(); diff --git a/src/Middleware/ListIdentifiers.php b/src/Middleware/ListIdentifiers.php index bc1af21..6dcca2f 100644 --- a/src/Middleware/ListIdentifiers.php +++ b/src/Middleware/ListIdentifiers.php @@ -22,6 +22,7 @@ declare(strict_types=1); namespace OCC\OaiPmh2\Middleware; +use DateTime; use OCC\OaiPmh2\Configuration; use OCC\OaiPmh2\Database; use OCC\OaiPmh2\Database\Record; @@ -75,15 +76,29 @@ class ListIdentifiers extends Middleware ErrorHandler::getInstance()->withError('cannotDisseminateFormat'); return; } + if (isset($from)) { + $from = new DateTime($from); + } + if (isset($until)) { + $until = new DateTime($until); + } if (isset($set)) { - $sets = Database::getInstance()->getSets(); - if (!in_array($set, array_keys($sets->getQueryResult()), true)) { + $sets = Database::getInstance()->getSets()->getQueryResult(); + if (!in_array($set, array_keys($sets), true)) { ErrorHandler::getInstance()->withError('noSetHierarchy'); return; } + $set = $sets[$set]; } - $records = Database::getInstance()->getRecords($verb, $prefixes[$metadataPrefix], $counter, $from, $until, $set); + $records = Database::getInstance()->getRecords( + $verb, + $prefixes[$metadataPrefix], + $counter, + $from, + $until, + $set + ); if (count($records) === 0) { ErrorHandler::getInstance()->withError('noRecordsMatch'); return;