From 7188f37c9faaf727525e9effad15638afe5a499d Mon Sep 17 00:00:00 2001 From: Sebastian Meyer Date: Thu, 4 Jan 2024 14:07:00 +0100 Subject: [PATCH] Add CSV bulk import command --- bin/cli | 4 +- composer.json | 3 +- composer.lock | 113 ++---------------- src/Console/AddRecordCommand.php | 2 + src/Console/BulkUpdateCommand.php | 48 -------- src/Console/CsvImportCommand.php | 175 ++++++++++++++++++++++++++++ src/Console/DeleteRecordCommand.php | 4 + src/Database.php | 62 ++++++++++ src/Database/Format.php | 2 +- src/Database/Record.php | 21 ++-- 10 files changed, 268 insertions(+), 166 deletions(-) delete mode 100644 src/Console/BulkUpdateCommand.php create mode 100644 src/Console/CsvImportCommand.php diff --git a/bin/cli b/bin/cli index badc792..358f909 100644 --- a/bin/cli +++ b/bin/cli @@ -27,7 +27,7 @@ use Doctrine\ORM\Tools\Console\ConsoleRunner; use Doctrine\ORM\Tools\Console\EntityManagerProvider\SingleManagerProvider; use Exception; use OCC\OaiPmh2\Console\AddRecordCommand; -use OCC\OaiPmh2\Console\BulkUpdateCommand; +use OCC\OaiPmh2\Console\CsvImportCommand; use OCC\OaiPmh2\Console\DeleteRecordCommand; use OCC\OaiPmh2\Console\PruneRecordsCommand; use OCC\OaiPmh2\Console\PruneResumptionTokensCommand; @@ -37,7 +37,7 @@ require __DIR__ . '/../vendor/autoload.php'; $commands = [ new AddRecordCommand(), - new BulkUpdateCommand(), + new CsvImportCommand(), new DeleteRecordCommand(), new PruneRecordsCommand(), new PruneResumptionTokensCommand(), diff --git a/composer.json b/composer.json index e54c141..bbc3dc2 100644 --- a/composer.json +++ b/composer.json @@ -38,14 +38,13 @@ "symfony/cache": "^6.4", "symfony/console": "^6.4", "symfony/filesystem": "^6.4", - "symfony/serializer":"^6.4", "symfony/validator": "^6.4", "symfony/yaml": "^6.4" }, "require-dev": { "phpstan/phpstan": "^1.10", "phpstan/phpstan-strict-rules": "^1.5", - "friendsofphp/php-cs-fixer": "^3.45" + "friendsofphp/php-cs-fixer": "^3.46" }, "autoload": { "psr-4": { diff --git a/composer.lock b/composer.lock index c263e1e..5c7ee9b 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "816dd79b521706bbb444b1dbf37d384c", + "content-hash": "944cba5f372ca0ab2482551434ec9d4a", "packages": [ { "name": "doctrine/cache", @@ -2611,104 +2611,6 @@ ], "time": "2023-08-16T06:22:46+00:00" }, - { - "name": "symfony/serializer", - "version": "v6.4.2", - "source": { - "type": "git", - "url": "https://github.com/symfony/serializer.git", - "reference": "f87ea9d7bfd4cf2f7b72be554607e6c96e6664af" - }, - "dist": { - "type": "zip", - "url": "https://api.github.com/repos/symfony/serializer/zipball/f87ea9d7bfd4cf2f7b72be554607e6c96e6664af", - "reference": "f87ea9d7bfd4cf2f7b72be554607e6c96e6664af", - "shasum": "" - }, - "require": { - "php": ">=8.1", - "symfony/deprecation-contracts": "^2.5|^3", - "symfony/polyfill-ctype": "~1.8" - }, - "conflict": { - "doctrine/annotations": "<1.12", - "phpdocumentor/reflection-docblock": "<3.2.2", - "phpdocumentor/type-resolver": "<1.4.0", - "symfony/dependency-injection": "<5.4", - "symfony/property-access": "<5.4", - "symfony/property-info": "<5.4.24|>=6,<6.2.11", - "symfony/uid": "<5.4", - "symfony/validator": "<6.4", - "symfony/yaml": "<5.4" - }, - "require-dev": { - "doctrine/annotations": "^1.12|^2", - "phpdocumentor/reflection-docblock": "^3.2|^4.0|^5.0", - "seld/jsonlint": "^1.10", - "symfony/cache": "^5.4|^6.0|^7.0", - "symfony/config": "^5.4|^6.0|^7.0", - "symfony/console": "^5.4|^6.0|^7.0", - "symfony/dependency-injection": "^5.4|^6.0|^7.0", - "symfony/error-handler": "^5.4|^6.0|^7.0", - "symfony/filesystem": "^5.4|^6.0|^7.0", - "symfony/form": "^5.4|^6.0|^7.0", - "symfony/http-foundation": "^5.4|^6.0|^7.0", - "symfony/http-kernel": "^5.4|^6.0|^7.0", - "symfony/messenger": "^5.4|^6.0|^7.0", - "symfony/mime": "^5.4|^6.0|^7.0", - "symfony/property-access": "^5.4|^6.0|^7.0", - "symfony/property-info": "^5.4.24|^6.2.11|^7.0", - "symfony/translation-contracts": "^2.5|^3", - "symfony/uid": "^5.4|^6.0|^7.0", - "symfony/validator": "^6.4|^7.0", - "symfony/var-dumper": "^5.4|^6.0|^7.0", - "symfony/var-exporter": "^5.4|^6.0|^7.0", - "symfony/yaml": "^5.4|^6.0|^7.0" - }, - "type": "library", - "autoload": { - "psr-4": { - "Symfony\\Component\\Serializer\\": "" - }, - "exclude-from-classmap": [ - "/Tests/" - ] - }, - "notification-url": "https://packagist.org/downloads/", - "license": [ - "MIT" - ], - "authors": [ - { - "name": "Fabien Potencier", - "email": "fabien@symfony.com" - }, - { - "name": "Symfony Community", - "homepage": "https://symfony.com/contributors" - } - ], - "description": "Handles serializing and deserializing data structures, including object graphs, into array structures or other formats like XML and JSON.", - "homepage": "https://symfony.com", - "support": { - "source": "https://github.com/symfony/serializer/tree/v6.4.2" - }, - "funding": [ - { - "url": "https://symfony.com/sponsor", - "type": "custom" - }, - { - "url": "https://github.com/fabpot", - "type": "github" - }, - { - "url": "https://tidelift.com/funding/github/packagist/symfony/symfony", - "type": "tidelift" - } - ], - "time": "2023-12-29T15:34:34+00:00" - }, { "name": "symfony/service-contracts", "version": "v3.4.1", @@ -3420,21 +3322,22 @@ }, { "name": "friendsofphp/php-cs-fixer", - "version": "v3.45.0", + "version": "v3.46.0", "source": { "type": "git", "url": "https://github.com/PHP-CS-Fixer/PHP-CS-Fixer.git", - "reference": "c0daa33cb2533cd73f48dde1c70c2afa3e7953b5" + "reference": "be6831c9af1740470d2a773119b9273f8ac1c3d2" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/PHP-CS-Fixer/PHP-CS-Fixer/zipball/c0daa33cb2533cd73f48dde1c70c2afa3e7953b5", - "reference": "c0daa33cb2533cd73f48dde1c70c2afa3e7953b5", + "url": "https://api.github.com/repos/PHP-CS-Fixer/PHP-CS-Fixer/zipball/be6831c9af1740470d2a773119b9273f8ac1c3d2", + "reference": "be6831c9af1740470d2a773119b9273f8ac1c3d2", "shasum": "" }, "require": { "composer/semver": "^3.4", "composer/xdebug-handler": "^3.0.3", + "ext-filter": "*", "ext-json": "*", "ext-tokenizer": "*", "php": "^7.4 || ^8.0", @@ -3498,7 +3401,7 @@ ], "support": { "issues": "https://github.com/PHP-CS-Fixer/PHP-CS-Fixer/issues", - "source": "https://github.com/PHP-CS-Fixer/PHP-CS-Fixer/tree/v3.45.0" + "source": "https://github.com/PHP-CS-Fixer/PHP-CS-Fixer/tree/v3.46.0" }, "funding": [ { @@ -3506,7 +3409,7 @@ "type": "github" } ], - "time": "2023-12-30T02:07:07+00:00" + "time": "2024-01-03T21:38:46+00:00" }, { "name": "phpstan/phpstan", diff --git a/src/Console/AddRecordCommand.php b/src/Console/AddRecordCommand.php index 9a4f275..47f92a7 100644 --- a/src/Console/AddRecordCommand.php +++ b/src/Console/AddRecordCommand.php @@ -22,6 +22,7 @@ declare(strict_types=1); namespace OCC\OaiPmh2\Console; +use OCC\OaiPmh2\Database; use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputInterface; @@ -41,6 +42,7 @@ class AddRecordCommand extends Command { protected function execute(InputInterface $input, OutputInterface $output): int { + Database::getInstance()->pruneOrphanSets(); return Command::SUCCESS; } } diff --git a/src/Console/BulkUpdateCommand.php b/src/Console/BulkUpdateCommand.php deleted file mode 100644 index d36fe22..0000000 --- a/src/Console/BulkUpdateCommand.php +++ /dev/null @@ -1,48 +0,0 @@ - - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -declare(strict_types=1); - -namespace OCC\OaiPmh2\Console; - -use Symfony\Component\Console\Attribute\AsCommand; -use Symfony\Component\Console\Command\Command; -use Symfony\Component\Console\Input\InputInterface; -use Symfony\Component\Console\Output\OutputInterface; - -/** - * Update records in database from CSV file. - * - * @author Sebastian Meyer - * @package opencultureconsulting/oai-pmh2 - */ -#[AsCommand( - name: 'oai:records:bulk-update', - description: 'Update records in database from CSV file' -)] -class BulkUpdateCommand extends Command -{ - protected function execute(InputInterface $input, OutputInterface $output): int - { - // https://symfony.com/doc/current/console/input.html - // https://symfony.com/doc/current/components/serializer.html#the-csvencoder - return Command::SUCCESS; - } -} diff --git a/src/Console/CsvImportCommand.php b/src/Console/CsvImportCommand.php new file mode 100644 index 0000000..06417e3 --- /dev/null +++ b/src/Console/CsvImportCommand.php @@ -0,0 +1,175 @@ + + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +declare(strict_types=1); + +namespace OCC\OaiPmh2\Console; + +use DateTime; +use OCC\OaiPmh2\Database; +use OCC\OaiPmh2\Database\Record; +use Symfony\Component\Console\Attribute\AsCommand; +use Symfony\Component\Console\Command\Command; +use Symfony\Component\Console\Input\InputArgument; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; + +/** + * Import records into database from a CSV file. + * + * @author Sebastian Meyer + * @package opencultureconsulting/oai-pmh2 + */ +#[AsCommand( + name: 'oai:records:import:csv', + description: 'Import records from a CSV file' +)] +class CsvImportCommand extends Command +{ + protected function configure(): void + { + $this->addArgument( + 'format', + InputArgument::REQUIRED, + 'The format (metadata prefix) of the records.', + null, + function (): array { + return array_keys(Database::getInstance()->getMetadataFormats()->getQueryResult()); + } + ); + $this->addArgument( + 'file', + InputArgument::REQUIRED, + 'The CSV file containing the records.' + ); + $this->addOption( + 'idColumn', + null, + InputOption::VALUE_OPTIONAL, + 'Name of the CSV column which holds the records\' identifier.', + 'identifier' + ); + $this->addOption( + 'contentColumn', + null, + InputOption::VALUE_OPTIONAL, + 'Name of the CSV column which holds the records\' content.', + 'content' + ); + $this->addOption( + 'dateColumn', + null, + InputOption::VALUE_OPTIONAL, + 'Name of the CSV column which holds the records\' datetime of last change.', + 'lastChanged' + ); + $this->addOption( + 'setColumn', + null, + InputOption::VALUE_OPTIONAL, + 'Name of the CSV column which holds the records\' sets list.', + 'sets' + ); + parent::configure(); + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + /** @var array */ + $arguments = $input->getArguments(); + /** @var array */ + $options = $input->getOptions(); + + $formats = Database::getInstance()->getMetadataFormats()->getQueryResult(); + if (!in_array($arguments['format'], array_keys($formats), true)) { + // Error: Invalid metadata prefix + echo 1; + return Command::INVALID; + } + + $file = fopen($arguments['file'], 'r'); + if ($file === false) { + // Error: File not found or not readable + echo 2; + return Command::INVALID; + } + + $headers = fgetcsv($file); + if (!is_array($headers)) { + // Error: No CSV + echo 3; + return Command::INVALID; + } else { + $headers = array_flip($headers); + } + + $column = []; + foreach ($options as $option => $value) { + if (isset($headers[$value])) { + $column[$option] = $headers[$value]; + } + } + if (!isset($column['idColumn']) || !isset($column['contentColumn'])) { + // Error: Required columns missing + echo 4; + return Command::INVALID; + } + $lastChanged = new DateTime(); + + $count = 0; + while ($record = fgetcsv($file)) { + $identifier = $record[$column['idColumn']]; + $content = $record[$column['contentColumn']]; + if ($content === '') { + $content = null; + } + if (isset($column['dateColumn'])) { + $lastChanged = new DateTime($record[$column['dateColumn']]); + } + // TODO: Complete support for sets. + $sets = null; + Database::getInstance()->addOrUpdateRecord( + $identifier, + $arguments['format'], + $content, + $lastChanged, + $sets, + true + ); + ++$count; + if ($count % 500 === 0) { + Database::getInstance()->flush(true); + } + } + Database::getInstance()->flush(true); + + $output->writeln([ + '', + sprintf( + ' [OK] %d records with metadata prefix "%s" were imported successfully! ', + $count, + $arguments['format'] + ), + '' + ]); + return Command::SUCCESS; + } +} diff --git a/src/Console/DeleteRecordCommand.php b/src/Console/DeleteRecordCommand.php index f438c1b..01c3b34 100644 --- a/src/Console/DeleteRecordCommand.php +++ b/src/Console/DeleteRecordCommand.php @@ -22,6 +22,8 @@ declare(strict_types=1); namespace OCC\OaiPmh2\Console; +use OCC\OaiPmh2\Configuration; +use OCC\OaiPmh2\Database; use Symfony\Component\Console\Attribute\AsCommand; use Symfony\Component\Console\Command\Command; use Symfony\Component\Console\Input\InputInterface; @@ -41,6 +43,8 @@ class DeleteRecordCommand extends Command { protected function execute(InputInterface $input, OutputInterface $output): int { + $policy = Configuration::getInstance()->deletedRecords; + Database::getInstance()->pruneOrphanSets(); return Command::SUCCESS; } } diff --git a/src/Database.php b/src/Database.php index fba13b2..21f4a83 100644 --- a/src/Database.php +++ b/src/Database.php @@ -102,6 +102,68 @@ class Database $this->entityManager->flush(); } + /** + * Add or update record. + * + * @param string $identifier The record identifier + * @param Format|string $format The metadata prefix + * @param ?string $data The record's content + * @param ?DateTime $lastChanged The date of last change + * @param ?array $sets The record's associated sets + * @param bool $bulkMode Should we operate in bulk mode (no flush)? + * + * @return void + */ + public function addOrUpdateRecord( + string $identifier, + Format|string $format, + ?string $data = null, + ?DateTime $lastChanged = null, + // TODO: Complete support for sets + ?array $sets, + bool $bulkMode = false + ): void + { + if (!$format instanceof Format) { + /** @var Format */ + $format = $this->entityManager->getReference(Format::class, $format); + } + $record = $this->entityManager->find(Record::class, ['identifier' => $identifier, 'format' => $format]); + if (isset($record)) { + try { + $record->setContent($data); + $record->setLastChanged($lastChanged); + } catch (ValidationFailedException $exception) { + throw $exception; + } + } else { + try { + $record = new Record($identifier, $format, $data, $lastChanged); + } catch (ValidationFailedException $exception) { + throw $exception; + } + } + $this->entityManager->persist($record); + if (!$bulkMode) { + $this->entityManager->flush(); + } + } + + /** + * Flush all changes to the database. + * + * @param bool $clear Also clear the entity manager? + * + * @return void + */ + public function flush(bool $clear = false): void + { + $this->entityManager->flush(); + if ($clear) { + $this->entityManager->clear(); + } + } + /** * Get the earliest datestamp of any record. * diff --git a/src/Database/Format.php b/src/Database/Format.php index 7343258..d4d88a3 100644 --- a/src/Database/Format.php +++ b/src/Database/Format.php @@ -63,7 +63,7 @@ class Format * * @var Collection */ - #[ORM\OneToMany(targetEntity: Record::class, mappedBy: 'format', fetch: 'EXTRA_LAZY', orphanRemoval: true)] + #[ORM\OneToMany(targetEntity: Record::class, mappedBy: 'format', fetch: 'EXTRA_LAZY', cascade: ['persist'], orphanRemoval: true)] private Collection $records; /** diff --git a/src/Database/Record.php b/src/Database/Record.php index 7121d3e..e09ed20 100644 --- a/src/Database/Record.php +++ b/src/Database/Record.php @@ -51,7 +51,7 @@ class Record * The associated format. */ #[ORM\Id] - #[ORM\ManyToOne(targetEntity: Format::class, inversedBy: 'records')] + #[ORM\ManyToOne(targetEntity: Format::class, inversedBy: 'records', cascade: ['persist'])] #[ORM\JoinColumn(name: 'format', referencedColumnName: 'prefix')] private Format $format; @@ -185,7 +185,7 @@ class Record { if (isset($data)) { $data = trim($data); - if ($validate && $data !== '') { + if ($validate) { try { $data = $this->validate($data); } catch (ValidationFailedException $exception) { @@ -236,7 +236,13 @@ class Record protected function validate(string $xml): string { $validator = Validation::createValidator(); - $violations = $validator->validate($xml, new Assert\Type('string')); + $violations = $validator->validate( + $xml, + [ + new Assert\Type('string'), + new Assert\NotBlank() + ] + ); if ($violations->count() > 0) { throw new ValidationFailedException(null, $violations); } @@ -249,18 +255,17 @@ class Record * @param string $identifier The record identifier * @param Format $format The format * @param ?string $data The record's content + * @param ?DateTime $lastChanged The date of last change * * @throws ValidationFailedException */ - public function __construct(string $identifier, Format $format, ?string $data = null) + public function __construct(string $identifier, Format $format, ?string $data = null, ?DateTime $lastChanged = null) { try { $this->identifier = $identifier; $this->setFormat($format); - if (isset($data)) { - $this->setContent($data); - } - $this->setLastChanged(); + $this->setContent($data); + $this->setLastChanged($lastChanged); $this->sets = new ArrayCollection(); } catch (ValidationFailedException $exception) { throw $exception;