From dd8900ddbc9ad7896f72a405611ac9a38bef7c8b Mon Sep 17 00:00:00 2001 From: Sebastian Meyer Date: Thu, 5 Oct 2017 18:02:15 +0200 Subject: [PATCH] Support multiple data formats --- README.md | 2 +- data/oai_dc/sample.xml | 11 +++++++ index.php | 69 ++++++++++++++++++++++++------------------ oai2config.php | 28 +++++++++++------ oai2transform.xsl | 6 ++-- 5 files changed, 73 insertions(+), 43 deletions(-) create mode 100644 data/oai_dc/sample.xml diff --git a/README.md b/README.md index e99c2cb..69a9bfc 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Simple OAI-PMH 2.0 Data Provider ================================ -This is a stand-alone and easy to install data provider for the [Open Archives Initiative's Protocol for Metadata Harvesting (OAI-PMH)](http://openarchives.org/pmh/) written in [PHP](http://php.net/). It serves records in any metadata format from a directory of XML files using the filename as identifier and the filemtime as datestamp. 0-byte files are considered deleted records and handled accordingly. Resumption tokens are managed using files. Multiple metadata formats and sets are currently not supported. +This is a stand-alone and easy to install data provider for the [Open Archives Initiative's Protocol for Metadata Harvesting (OAI-PMH)](http://openarchives.org/pmh/) written in [PHP](http://php.net/). It serves records in any metadata format from directories of XML files using the directory name as metadata prefix, the filename as identifier and the filemtime as datestamp. 0-byte files are considered deleted records and handled accordingly. Resumption tokens are managed using files. Sets are currently not supported. Just put the records as XML files in the data directory, adjust a few configuration settings and you are ready to go! diff --git a/data/oai_dc/sample.xml b/data/oai_dc/sample.xml new file mode 100644 index 0000000..c5de084 --- /dev/null +++ b/data/oai_dc/sample.xml @@ -0,0 +1,11 @@ + + + Using Structural Metadata to Localize Experience of Digital Content + Dushay, Naomi + Digital Libraries + With the increasing technical sophistication of both information consumers and providers, there is increasing demand for more meaningful experiences of digital information. We present a framework that separates digital object experience, or rendering, from digital object storage and manipulation, so the rendering can be tailored to particular communities of users. + Comment: 23 pages including 2 appendices, 8 figures + 2001-12-14 + e-print + http://arXiv.org/abs/cs/0112017 + diff --git a/index.php b/index.php index d2c15d2..31985da 100644 --- a/index.php +++ b/index.php @@ -27,19 +27,23 @@ require_once('oai2server.php'); $records = array(); $deleted = array(); $timestamps = array(); +$earliest = time(); -$files = glob(rtrim($config['dataDirectory'], '/').'/*.xml'); -foreach($files as $file) { - $records[pathinfo($file, PATHINFO_FILENAME)] = $file; - $deleted[pathinfo($file, PATHINFO_FILENAME)] = !filesize($file); - $timestamps[filemtime($file)][] = pathinfo($file, PATHINFO_FILENAME); -}; - -ksort($records); -reset($records); - -ksort($timestamps); -reset($timestamps); +foreach($config['metadataFormats'] as $prefix => $uris) { + $files = glob(rtrim($config['dataDirectory'], '/').'/'.$prefix.'/*.xml'); + foreach($files as $file) { + $records[$prefix][pathinfo($file, PATHINFO_FILENAME)] = $file; + $deleted[$prefix][pathinfo($file, PATHINFO_FILENAME)] = !filesize($file); + $timestamps[$prefix][filemtime($file)][] = pathinfo($file, PATHINFO_FILENAME); + if (filemtime($file) < $earliest) { + $earliest = filemtime($file); + } + } + ksort($records[$prefix]); + reset($records[$prefix]); + ksort($timestamps[$prefix]); + reset($timestamps[$prefix]); +} // Get current base URL $baseURL = $_SERVER['HTTP_HOST'].parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH); @@ -55,7 +59,7 @@ $identifyResponse = array( 'baseURL' => $baseURL, 'protocolVersion' => '2.0', 'adminEmail' => $config['adminEmail'], - 'earliestDatestamp' => gmdate('Y-m-d\TH:i:s\Z', key($timestamps)), + 'earliestDatestamp' => gmdate('Y-m-d\TH:i:s\Z', $earliest), 'deletedRecord' => $config['deletedRecord'], 'granularity' => 'YYYY-MM-DDThh:mm:ssZ' ); @@ -68,14 +72,14 @@ $oai2 = new OAI2Server( 'GetRecord' => function($identifier, $metadataPrefix) { global $records, $deleted; - if (empty($records[$identifier])) { + if (empty($records[$metadataPrefix][$identifier])) { return array(); } else { return array( 'identifier' => $identifier, - 'timestamp' => filemtime($records[$identifier]), - 'deleted' => $deleted[$identifier], - 'metadata' => $records[$identifier] + 'timestamp' => filemtime($records[$metadataPrefix][$identifier]), + 'deleted' => $deleted[$metadataPrefix][$identifier], + 'metadata' => $records[$metadataPrefix][$identifier] ); } }, @@ -83,14 +87,14 @@ $oai2 = new OAI2Server( function($metadataPrefix, $from = null, $until = null, $count = false, $deliveredRecords = 0, $maxItems = 100) { global $records, $deleted, $timestamps; $resultSet = array(); - foreach($timestamps as $timestamp => $identifiers) { + foreach($timestamps[$metadataPrefix] as $timestamp => $identifiers) { if ((is_null($from) || $timestamp >= $from) && (is_null($until) || $timestamp <= $until)) { foreach($identifiers as $identifier) { $resultSet[] = array( 'identifier' => $identifier, - 'timestamp' => filemtime($records[$identifier]), - 'deleted' => $deleted[$identifier], - 'metadata' => $records[$identifier] + 'timestamp' => filemtime($records[$metadataPrefix][$identifier]), + 'deleted' => $deleted[$metadataPrefix][$identifier], + 'metadata' => $records[$metadataPrefix][$identifier] ); } } @@ -103,16 +107,21 @@ $oai2 = new OAI2Server( }, 'ListMetadataFormats' => function($identifier = '') { - global $config; - if (!empty($identifier) && empty($records[$identifier])) { - throw new OAI2Exception('idDoesNotExist'); + global $config, $records; + if (!empty($identifier) { + $formats = array(); + foreach($records as $format => $record) { + if (!empty($record[$identifier])) { + $formats[$format] = $config['metadataFormats'][$format]; + } + } + if (!empty($formats)) { + return $formats; + } else { + throw new OAI2Exception('idDoesNotExist'); + } } else { - return array( - $config['metadataFormat'] => array( - 'schema'=> $config['metadataSchema'], - 'namespace' => $config['metadataNamespace'] - ) - ); + return $config['metadataFormats']; } } ), diff --git a/oai2config.php b/oai2config.php index afe9d47..886e0be 100644 --- a/oai2config.php +++ b/oai2config.php @@ -34,7 +34,8 @@ $config['repositoryName'] = 'Simple OAI 2.0 Data Provider'; $config['adminEmail'] = 'admin@example.org'; // Do you provide 0-byte files for deleted records? -// Possible values: +// +// Possible values: // "no" -> the repository does not maintain information about deletions // "transient" -> the repository maintains information about deletions, but // does not guarantee them to be persistent (default) @@ -42,22 +43,31 @@ $config['adminEmail'] = 'admin@example.org'; // no time limit $config['deletedRecord'] = 'transient'; -// Metadata format, schema and namespace of your records -// (The default is OAI_DC which is also required by the OAI-PMH specification, -// but technically you can deliver any XML based data format you want.) -$config['metadataFormat'] = 'oai_dc'; -$config['metadataSchema'] = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd'; -$config['metadataNamespace'] = 'http://www.openarchives.org/OAI/2.0/oai_dc/'; +// Metadata formats, schemas and namespaces of your records +// +// The default is 'oai_dc' which is also required by the OAI-PMH specification, +// but technically you can deliver any XML based data format you want. Just add +// another entry with the 'metadataPrefix' as key and schema/namespace URIs as +// array values or replace the default 'oai_dc' entry (not recommended). +$config['metadataFormats'] = array( + 'oai_dc' => array( + 'schema' = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', + 'namespace' = 'http://www.openarchives.org/OAI/2.0/oai_dc/', + ), +); // Directory containing the records -// (Make sure the given path is readable.) +// +// Make sure the given path is readable and there is a subdirectory for every +// 'metadataPrefix' you specified above. $config['dataDirectory'] = 'data/'; // Maximum number of records to return before giving a resumption token $config['maxRecords'] = 100; // Path and prefix for saving resumption tokens -// (Make sure the given path is writable.) +// +// Make sure the given path is writable. $config['tokenPrefix'] = '/tmp/oai2-'; // Number of seconds a resumption token should be valid diff --git a/oai2transform.xsl b/oai2transform.xsl index 570bd95..f268889 100644 --- a/oai2transform.xsl +++ b/oai2transform.xsl @@ -147,11 +147,11 @@ p.intro { - OAI 2.0 Request Results + OAI-PMH 2.0 Request Results -

OAI 2.0 Request Results

+

OAI-PMH 2.0 Request Results

You are viewing an HTML version of the XML OAI response. To see the underlying XML as it appears to any OAI harvester use your web browser's view source option or disable XSLT processing.

@@ -230,7 +230,7 @@ p.intro { Admin Email - +