Support deletions by keeping 0-byte files

This commit is contained in:
Sebastian Meyer 2017-09-28 14:47:24 +02:00
parent e1f59e2c66
commit a46738633d
5 changed files with 46 additions and 22 deletions

View File

@ -1,7 +1,7 @@
Simple OAI-PMH 2.0 Data Provider Simple OAI-PMH 2.0 Data Provider
================================ ================================
This is a stand-alone and easy to install data provider for the [Open Archives Initiative's Protocol for Metadata Harvesting (OAI-PMH)](http://openarchives.org/pmh/) written in [PHP](http://php.net/). It serves records in any metadata format from a directory of XML files using the filename as identifier and the filemtime as datestamp. Resumption tokens are managed using files. Multiple metadata formats and sets are currently not supported. This is a stand-alone and easy to install data provider for the [Open Archives Initiative's Protocol for Metadata Harvesting (OAI-PMH)](http://openarchives.org/pmh/) written in [PHP](http://php.net/). It serves records in any metadata format from a directory of XML files using the filename as identifier and the filemtime as datestamp. 0-byte files are considered deleted records and handled accordingly. Resumption tokens are managed using files. Multiple metadata formats and sets are currently not supported.
Just put the records as XML files in the data directory, adjust a few configuration settings and you are ready to go! Just put the records as XML files in the data directory, adjust a few configuration settings and you are ready to go!
@ -16,6 +16,8 @@ Installation
3. Put the records into the specified data directory. Each record has to be a separate XML file with its identifier as filename (i.e. 12345678.xml). 3. Put the records into the specified data directory. Each record has to be a separate XML file with its identifier as filename (i.e. 12345678.xml).
3a. Optionally you can maintain deletions by keeping 0-byte files in the data directory for deleted records.
4. Congratulations! Now you are running an OAI-PMH 2.0 compatible data provider. 4. Congratulations! Now you are running an OAI-PMH 2.0 compatible data provider.
History History

View File

@ -23,13 +23,15 @@
require_once('oai2config.php'); require_once('oai2config.php');
require_once('oai2server.php'); require_once('oai2server.php');
// Get all available records and their respective timestamps // Get all available records and their respective status and timestamps
$records = array(); $records = array();
$deleted = array();
$timestamps = array(); $timestamps = array();
$files = glob(rtrim($config['dataDirectory'], '/').'/*.xml'); $files = glob(rtrim($config['dataDirectory'], '/').'/*.xml');
foreach($files as $file) { foreach($files as $file) {
$records[pathinfo($file, PATHINFO_FILENAME)] = $file; $records[pathinfo($file, PATHINFO_FILENAME)] = $file;
$deleted[pathinfo($file, PATHINFO_FILENAME)] = !filesize($file);
$timestamps[filemtime($file)][] = pathinfo($file, PATHINFO_FILENAME); $timestamps[filemtime($file)][] = pathinfo($file, PATHINFO_FILENAME);
}; };
@ -54,7 +56,7 @@ $identifyResponse = array(
'protocolVersion' => '2.0', 'protocolVersion' => '2.0',
'adminEmail' => $config['adminEmail'], 'adminEmail' => $config['adminEmail'],
'earliestDatestamp' => gmdate('Y-m-d\TH:i:s\Z', key($timestamps)), 'earliestDatestamp' => gmdate('Y-m-d\TH:i:s\Z', key($timestamps)),
'deletedRecord' => 'no', 'deletedRecord' => $config['deletedRecord'],
'granularity' => 'YYYY-MM-DDThh:mm:ssZ' 'granularity' => 'YYYY-MM-DDThh:mm:ssZ'
); );
@ -65,20 +67,21 @@ $oai2 = new OAI2Server(
array( array(
'GetRecord' => 'GetRecord' =>
function($identifier, $metadataPrefix) { function($identifier, $metadataPrefix) {
global $records; global $records, $deleted;
if (empty($records[$identifier])) { if (empty($records[$identifier])) {
return array(); return array();
} else { } else {
return array( return array(
'identifier' => $identifier, 'identifier' => $identifier,
'timestamp' => filemtime($records[$identifier]), 'timestamp' => filemtime($records[$identifier]),
'deleted' => $deleted[$identifier],
'metadata' => $records[$identifier] 'metadata' => $records[$identifier]
); );
} }
}, },
'ListRecords' => 'ListRecords' =>
function($metadataPrefix, $from = null, $until = null, $count = false, $deliveredRecords = 0, $maxItems = 100) { function($metadataPrefix, $from = null, $until = null, $count = false, $deliveredRecords = 0, $maxItems = 100) {
global $records, $timestamps; global $records, $deleted, $timestamps;
$resultSet = array(); $resultSet = array();
foreach($timestamps as $timestamp => $identifiers) { foreach($timestamps as $timestamp => $identifiers) {
if ((is_null($from) || $timestamp >= $from) && (is_null($until) || $timestamp <= $until)) { if ((is_null($from) || $timestamp >= $from) && (is_null($until) || $timestamp <= $until)) {
@ -86,6 +89,7 @@ $oai2 = new OAI2Server(
$resultSet[] = array( $resultSet[] = array(
'identifier' => $identifier, 'identifier' => $identifier,
'timestamp' => filemtime($records[$identifier]), 'timestamp' => filemtime($records[$identifier]),
'deleted' => $deleted[$identifier],
'metadata' => $records[$identifier] 'metadata' => $records[$identifier]
); );
} }

View File

@ -22,6 +22,7 @@
/** /**
* This file contains all configuration you need to change according to your preferences * This file contains all configuration you need to change according to your preferences
* @see http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm for further explanation
*/ */
$config = array(); $config = array();
@ -32,20 +33,31 @@ $config['repositoryName'] = 'Simple OAI 2.0 Data Provider';
// Email address for contacting the repository owner // Email address for contacting the repository owner
$config['adminEmail'] = 'admin@example.org'; $config['adminEmail'] = 'admin@example.org';
// Do you provide 0-byte files for deleted records?
// Possible values:
// "no" -> the repository does not maintain information about deletions
// "transient" -> the repository maintains information about deletions, but
// does not guarantee them to be persistent (default)
// "persistent" -> the repository maintains information about deletions with
// no time limit
$config['deletedRecord'] = 'transient';
// Metadata format, schema and namespace of your records // Metadata format, schema and namespace of your records
// (The default is OAI_DC which is also required by the OAI-PMH specification,
// but technically you can deliver any XML based data format you want.)
$config['metadataFormat'] = 'oai_dc'; $config['metadataFormat'] = 'oai_dc';
$config['metadataSchema'] = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'; $config['metadataSchema'] = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd';
$config['metadataNamespace'] = 'http://www.openarchives.org/OAI/2.0/oai_dc/'; $config['metadataNamespace'] = 'http://www.openarchives.org/OAI/2.0/oai_dc/';
// Directory containing the records // Directory containing the records
// (Make sure the given path is readable) // (Make sure the given path is readable.)
$config['dataDirectory'] = 'data/'; $config['dataDirectory'] = 'data/';
// Maximum number of records to return before giving a resumption token // Maximum number of records to return before giving a resumption token
$config['maxRecords'] = 100; $config['maxRecords'] = 100;
// Path and prefix for saving resumption tokens // Path and prefix for saving resumption tokens
// (Make sure the given path is writable) // (Make sure the given path is writable.)
$config['tokenPrefix'] = '/tmp/oai2-'; $config['tokenPrefix'] = '/tmp/oai2-';
// Number of seconds a resumption token should be valid // Number of seconds a resumption token should be valid

View File

@ -32,9 +32,10 @@ class OAI2Server {
public $errors = array(); public $errors = array();
private $args = array(); private $args = array();
private $verb = ''; private $verb = '';
private $deleted_record = 'transient';
private $max_records = 100;
private $token_prefix = '/tmp/oai2-'; private $token_prefix = '/tmp/oai2-';
private $token_valid = 86400; private $token_valid = 86400;
private $max_records = 100;
public function __construct($uri, $args, $identifyResponse, $callbacks, $config) { public function __construct($uri, $args, $identifyResponse, $callbacks, $config) {
$this->uri = $uri; $this->uri = $uri;
@ -50,9 +51,10 @@ class OAI2Server {
$this->listMetadataFormatsCallback = $callbacks['ListMetadataFormats']; $this->listMetadataFormatsCallback = $callbacks['ListMetadataFormats'];
$this->listRecordsCallback = $callbacks['ListRecords']; $this->listRecordsCallback = $callbacks['ListRecords'];
$this->getRecordCallback = $callbacks['GetRecord']; $this->getRecordCallback = $callbacks['GetRecord'];
$this->deleted_record = $config['deletedRecord'];
$this->max_records = $config['maxRecords'];
$this->token_prefix = $config['tokenPrefix']; $this->token_prefix = $config['tokenPrefix'];
$this->token_valid = $config['tokenValid']; $this->token_valid = $config['tokenValid'];
$this->max_records = $config['maxRecords'];
$this->response = new OAI2XMLResponse($this->uri, $this->verb, $this->args); $this->response = new OAI2XMLResponse($this->uri, $this->verb, $this->args);
call_user_func(array($this, $this->verb)); call_user_func(array($this, $this->verb));
} else { } else {
@ -141,8 +143,10 @@ class OAI2Server {
try { try {
if ($record = call_user_func($this->getRecordCallback, $this->args['identifier'], $this->args['metadataPrefix'])) { if ($record = call_user_func($this->getRecordCallback, $this->args['identifier'], $this->args['metadataPrefix'])) {
$cur_record = $this->response->addToVerbNode('record'); $cur_record = $this->response->addToVerbNode('record');
$cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp']), $cur_record); $cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp']), $record['deleted'], $cur_record);
$this->add_metadata($cur_record, $record['metadata']); if (!$record['deleted']) {
$this->addMetadata($cur_record, $record['metadata']);
}
} else { } else {
$this->errors[] = new OAI2Exception('idDoesNotExist'); $this->errors[] = new OAI2Exception('idDoesNotExist');
} }
@ -210,12 +214,10 @@ class OAI2Server {
} }
$records = call_user_func($this->listRecordsCallback, $metadataPrefix, $this->formatTimestamp($from), $this->formatTimestamp($until), false, $deliveredRecords, $maxItems); $records = call_user_func($this->listRecordsCallback, $metadataPrefix, $this->formatTimestamp($from), $this->formatTimestamp($until), false, $deliveredRecords, $maxItems);
foreach ($records as $record) { foreach ($records as $record) {
if ($this->verb == 'ListRecords') { $cur_record = $this->response->addToVerbNode('record');
$cur_record = $this->response->addToVerbNode('record'); $cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp']), $record['deleted'], $cur_record);
$cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp']), $cur_record); if (!$record['deleted'] && $this->verb == 'ListRecords') { // for ListIdentifiers, only identifiers will be returned.
$this->add_metadata($cur_record, $record['metadata']); $this->addMetadata($cur_record, $record['metadata']);
} else { // for ListIdentifiers, only identifiers will be returned.
$cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp']));
} }
} }
// Will we need a new ResumptionToken? // Will we need a new ResumptionToken?
@ -237,7 +239,7 @@ class OAI2Server {
} }
} }
private function add_metadata($cur_record, $file) { private function addMetadata($cur_record, $file) {
$meta_node = $this->response->addChild($cur_record, 'metadata'); $meta_node = $this->response->addChild($cur_record, 'metadata');
$fragment = new DOMDocument(); $fragment = new DOMDocument();
$fragment->load($file); $fragment->load($file);

View File

@ -60,7 +60,7 @@ class OAI2XMLResponse {
/** /**
* Add direct child nodes to verb node (OAI-PMH), e.g. response to ListMetadataFormats. * Add direct child nodes to verb node (OAI-PMH), e.g. response to ListMetadataFormats.
* Different verbs can have different required child nodes. * Different verbs can have different required child nodes.
* @see create_record, create_header * @see createHeader, importFragment
* *
* @param $nodeName Type: string. The name of appending node. * @param $nodeName Type: string. The name of appending node.
* @param $value Type: string. The content of appending node. * @param $value Type: string. The content of appending node.
@ -76,13 +76,14 @@ class OAI2XMLResponse {
* Headers are enclosed inside of <record> to the query of ListRecords, ListIdentifiers and etc. * Headers are enclosed inside of <record> to the query of ListRecords, ListIdentifiers and etc.
* *
* @param $identifier Type: string. The identifier string for node <identifier>. * @param $identifier Type: string. The identifier string for node <identifier>.
* @param $timestamp Type: timestamp. Timestapme in UTC format for node <datastamp>. * @param $timestamp Type: timestamp. Timestamp in UTC format for node <datastamp>.
* @param $deleted Type: boolean. Deleted status for the record.
* @param $add_to_node Type: DOMElement. Default value is null. * @param $add_to_node Type: DOMElement. Default value is null.
* In normal cases, $add_to_node is the <record> node created previously. * In normal cases, $add_to_node is the <record> node created previously.
* When it is null, the newly created header node is attatched to $this->verbNode. * When it is null, the newly created header node is attatched to $this->verbNode.
* Otherwise it will be attatched to the desired node defined in $add_to_node. * Otherwise it will be attached to the desired node defined in $add_to_node.
*/ */
function createHeader($identifier, $timestamp, $add_to_node = null) { function createHeader($identifier, $timestamp, $deleted = false, $add_to_node = null) {
if(is_null($add_to_node)) { if(is_null($add_to_node)) {
$header_node = $this->addToVerbNode('header'); $header_node = $this->addToVerbNode('header');
} else { } else {
@ -90,6 +91,9 @@ class OAI2XMLResponse {
} }
$this->addChild($header_node, 'identifier', $identifier); $this->addChild($header_node, 'identifier', $identifier);
$this->addChild($header_node, 'datestamp', $timestamp); $this->addChild($header_node, 'datestamp', $timestamp);
if($deleted) {
$header_node->setAttribute('status', 'deleted');
}
return $header_node; return $header_node;
} }