Improve performance of importing

This commit is contained in:
Sebastian Meyer 2024-01-06 14:59:54 +01:00
parent b0f8c49d97
commit 60f41db284
3 changed files with 42 additions and 19 deletions

View File

@ -25,6 +25,7 @@ namespace OCC\OaiPmh2\Console;
use DateTime; use DateTime;
use OCC\OaiPmh2\Database; use OCC\OaiPmh2\Database;
use OCC\OaiPmh2\Database\Format; use OCC\OaiPmh2\Database\Format;
use OCC\OaiPmh2\Database\Record;
use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Attribute\AsCommand;
use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Helper\ProgressIndicator; use Symfony\Component\Console\Helper\ProgressIndicator;
@ -145,12 +146,10 @@ class CsvImportCommand extends Command
// Flush to database if memory usage reaches 90% of available limit. // Flush to database if memory usage reaches 90% of available limit.
if (memory_get_usage() / $memoryLimit > 0.9) { if (memory_get_usage() / $memoryLimit > 0.9) {
Database::getInstance()->flush(true); Database::getInstance()->flush([Record::class]);
/** @var Format */
$format = Database::getInstance()->getEntityManager()->getReference(Format::class, $arguments['format']);
} }
} }
Database::getInstance()->flush(true); Database::getInstance()->flush();
Database::getInstance()->pruneOrphanSets(); Database::getInstance()->pruneOrphanSets();
$progressIndicator->finish('All done!'); $progressIndicator->finish('All done!');

View File

@ -154,15 +154,15 @@ class Database
/** /**
* Flush all changes to the database. * Flush all changes to the database.
* *
* @param bool $clear Also clear the entity manager? * @param string[] $entities Optional array of entity types to clear from entity manager
* *
* @return void * @return void
*/ */
public function flush(bool $clear = false): void public function flush(array $entities = []): void
{ {
$this->entityManager->flush(); $this->entityManager->flush();
if ($clear) { foreach ($entities as $entity) {
$this->entityManager->clear(); $this->entityManager->clear($entity);
} }
} }
@ -255,9 +255,9 @@ class Database
* @param string $verb The currently requested verb ('ListIdentifiers' or 'ListRecords') * @param string $verb The currently requested verb ('ListIdentifiers' or 'ListRecords')
* @param Format $metadataPrefix The metadata format * @param Format $metadataPrefix The metadata format
* @param int $counter Counter for split result sets * @param int $counter Counter for split result sets
* @param ?string $from The "from" datestamp * @param ?DateTime $from The "from" datestamp
* @param ?string $until The "until" datestamp * @param ?DateTime $until The "until" datestamp
* @param ?string $set The set spec * @param ?Set $set The set spec
* *
* @return Result<Records> The records and possibly a resumtion token * @return Result<Records> The records and possibly a resumtion token
*/ */
@ -265,9 +265,9 @@ class Database
string $verb, string $verb,
Format $metadataPrefix, Format $metadataPrefix,
int $counter = 0, int $counter = 0,
?string $from = null, ?DateTime $from = null,
?string $until = null, ?DateTime $until = null,
?string $set = null ?Set $set = null
): Result ): Result
{ {
$maxRecords = Configuration::getInstance()->maxRecords; $maxRecords = Configuration::getInstance()->maxRecords;
@ -282,15 +282,18 @@ class Database
->setMaxResults($maxRecords); ->setMaxResults($maxRecords);
if (isset($from)) { if (isset($from)) {
$dql->andWhere($dql->expr()->gte('record.lastChanged', ':from')); $dql->andWhere($dql->expr()->gte('record.lastChanged', ':from'));
$dql->setParameter('from', new DateTime($from)); $dql->setParameter('from', $from);
$from = $from->format('Y-m-d\TH:i:s\Z');
} }
if (isset($until)) { if (isset($until)) {
$dql->andWhere($dql->expr()->lte('record.lastChanged', ':until')); $dql->andWhere($dql->expr()->lte('record.lastChanged', ':until'));
$dql->setParameter('until', new DateTime($until)); $dql->setParameter('until', $until);
$until = $until->format('Y-m-d\TH:i:s\Z');
} }
if (isset($set)) { if (isset($set)) {
$dql->andWhere($dql->expr()->in('record.sets', ':set')); $dql->andWhere($dql->expr()->in('record.sets', ':set'));
$dql->setParameter('set', $set); $dql->setParameter('set', $set);
$set = $set->getSpec();
} }
$query = $dql->getQuery(); $query = $dql->getQuery();
/** @var Records */ /** @var Records */
@ -456,6 +459,12 @@ class Database
{ {
$format = $this->entityManager->find(Format::class, $prefix); $format = $this->entityManager->find(Format::class, $prefix);
if (isset($format)) { if (isset($format)) {
$repository = $this->entityManager->getRepository(Record::class);
$criteria = Criteria::create()->where(Criteria::expr()->eq('format', $format));
$records = $repository->matching($criteria);
foreach ($records as $record) {
$this->entityManager->remove($record);
}
$this->entityManager->remove($format); $this->entityManager->remove($format);
$this->entityManager->flush(); $this->entityManager->flush();
$this->pruneOrphanSets(); $this->pruneOrphanSets();

View File

@ -22,6 +22,7 @@ declare(strict_types=1);
namespace OCC\OaiPmh2\Middleware; namespace OCC\OaiPmh2\Middleware;
use DateTime;
use OCC\OaiPmh2\Configuration; use OCC\OaiPmh2\Configuration;
use OCC\OaiPmh2\Database; use OCC\OaiPmh2\Database;
use OCC\OaiPmh2\Database\Record; use OCC\OaiPmh2\Database\Record;
@ -75,15 +76,29 @@ class ListIdentifiers extends Middleware
ErrorHandler::getInstance()->withError('cannotDisseminateFormat'); ErrorHandler::getInstance()->withError('cannotDisseminateFormat');
return; return;
} }
if (isset($from)) {
$from = new DateTime($from);
}
if (isset($until)) {
$until = new DateTime($until);
}
if (isset($set)) { if (isset($set)) {
$sets = Database::getInstance()->getSets(); $sets = Database::getInstance()->getSets()->getQueryResult();
if (!in_array($set, array_keys($sets->getQueryResult()), true)) { if (!in_array($set, array_keys($sets), true)) {
ErrorHandler::getInstance()->withError('noSetHierarchy'); ErrorHandler::getInstance()->withError('noSetHierarchy');
return; return;
} }
$set = $sets[$set];
} }
$records = Database::getInstance()->getRecords($verb, $prefixes[$metadataPrefix], $counter, $from, $until, $set); $records = Database::getInstance()->getRecords(
$verb,
$prefixes[$metadataPrefix],
$counter,
$from,
$until,
$set
);
if (count($records) === 0) { if (count($records) === 0) {
ErrorHandler::getInstance()->withError('noRecordsMatch'); ErrorHandler::getInstance()->withError('noRecordsMatch');
return; return;