Support multiple data formats

This commit is contained in:
Sebastian Meyer 2017-10-05 18:02:15 +02:00
parent 5b1af2feda
commit dd8900ddbc
5 changed files with 73 additions and 43 deletions

View File

@ -1,7 +1,7 @@
Simple OAI-PMH 2.0 Data Provider
================================
This is a stand-alone and easy to install data provider for the [Open Archives Initiative's Protocol for Metadata Harvesting (OAI-PMH)](http://openarchives.org/pmh/) written in [PHP](http://php.net/). It serves records in any metadata format from a directory of XML files using the filename as identifier and the filemtime as datestamp. 0-byte files are considered deleted records and handled accordingly. Resumption tokens are managed using files. Multiple metadata formats and sets are currently not supported.
This is a stand-alone and easy to install data provider for the [Open Archives Initiative's Protocol for Metadata Harvesting (OAI-PMH)](http://openarchives.org/pmh/) written in [PHP](http://php.net/). It serves records in any metadata format from directories of XML files using the directory name as metadata prefix, the filename as identifier and the filemtime as datestamp. 0-byte files are considered deleted records and handled accordingly. Resumption tokens are managed using files. Sets are currently not supported.
Just put the records as XML files in the data directory, adjust a few configuration settings and you are ready to go!

11
data/oai_dc/sample.xml Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
<dc:title>Using Structural Metadata to Localize Experience of Digital Content</dc:title>
<dc:creator>Dushay, Naomi</dc:creator>
<dc:subject>Digital Libraries</dc:subject>
<dc:description>With the increasing technical sophistication of both information consumers and providers, there is increasing demand for more meaningful experiences of digital information. We present a framework that separates digital object experience, or rendering, from digital object storage and manipulation, so the rendering can be tailored to particular communities of users.</dc:description>
<dc:description>Comment: 23 pages including 2 appendices, 8 figures</dc:description>
<dc:date>2001-12-14</dc:date>
<dc:type>e-print</dc:type>
<dc:identifier>http://arXiv.org/abs/cs/0112017</dc:identifier>
</oai_dc:dc>

View File

@ -27,19 +27,23 @@ require_once('oai2server.php');
$records = array();
$deleted = array();
$timestamps = array();
$earliest = time();
$files = glob(rtrim($config['dataDirectory'], '/').'/*.xml');
foreach($files as $file) {
$records[pathinfo($file, PATHINFO_FILENAME)] = $file;
$deleted[pathinfo($file, PATHINFO_FILENAME)] = !filesize($file);
$timestamps[filemtime($file)][] = pathinfo($file, PATHINFO_FILENAME);
};
ksort($records);
reset($records);
ksort($timestamps);
reset($timestamps);
foreach($config['metadataFormats'] as $prefix => $uris) {
$files = glob(rtrim($config['dataDirectory'], '/').'/'.$prefix.'/*.xml');
foreach($files as $file) {
$records[$prefix][pathinfo($file, PATHINFO_FILENAME)] = $file;
$deleted[$prefix][pathinfo($file, PATHINFO_FILENAME)] = !filesize($file);
$timestamps[$prefix][filemtime($file)][] = pathinfo($file, PATHINFO_FILENAME);
if (filemtime($file) < $earliest) {
$earliest = filemtime($file);
}
}
ksort($records[$prefix]);
reset($records[$prefix]);
ksort($timestamps[$prefix]);
reset($timestamps[$prefix]);
}
// Get current base URL
$baseURL = $_SERVER['HTTP_HOST'].parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH);
@ -55,7 +59,7 @@ $identifyResponse = array(
'baseURL' => $baseURL,
'protocolVersion' => '2.0',
'adminEmail' => $config['adminEmail'],
'earliestDatestamp' => gmdate('Y-m-d\TH:i:s\Z', key($timestamps)),
'earliestDatestamp' => gmdate('Y-m-d\TH:i:s\Z', $earliest),
'deletedRecord' => $config['deletedRecord'],
'granularity' => 'YYYY-MM-DDThh:mm:ssZ'
);
@ -68,14 +72,14 @@ $oai2 = new OAI2Server(
'GetRecord' =>
function($identifier, $metadataPrefix) {
global $records, $deleted;
if (empty($records[$identifier])) {
if (empty($records[$metadataPrefix][$identifier])) {
return array();
} else {
return array(
'identifier' => $identifier,
'timestamp' => filemtime($records[$identifier]),
'deleted' => $deleted[$identifier],
'metadata' => $records[$identifier]
'timestamp' => filemtime($records[$metadataPrefix][$identifier]),
'deleted' => $deleted[$metadataPrefix][$identifier],
'metadata' => $records[$metadataPrefix][$identifier]
);
}
},
@ -83,14 +87,14 @@ $oai2 = new OAI2Server(
function($metadataPrefix, $from = null, $until = null, $count = false, $deliveredRecords = 0, $maxItems = 100) {
global $records, $deleted, $timestamps;
$resultSet = array();
foreach($timestamps as $timestamp => $identifiers) {
foreach($timestamps[$metadataPrefix] as $timestamp => $identifiers) {
if ((is_null($from) || $timestamp >= $from) && (is_null($until) || $timestamp <= $until)) {
foreach($identifiers as $identifier) {
$resultSet[] = array(
'identifier' => $identifier,
'timestamp' => filemtime($records[$identifier]),
'deleted' => $deleted[$identifier],
'metadata' => $records[$identifier]
'timestamp' => filemtime($records[$metadataPrefix][$identifier]),
'deleted' => $deleted[$metadataPrefix][$identifier],
'metadata' => $records[$metadataPrefix][$identifier]
);
}
}
@ -103,16 +107,21 @@ $oai2 = new OAI2Server(
},
'ListMetadataFormats' =>
function($identifier = '') {
global $config;
if (!empty($identifier) && empty($records[$identifier])) {
throw new OAI2Exception('idDoesNotExist');
global $config, $records;
if (!empty($identifier) {
$formats = array();
foreach($records as $format => $record) {
if (!empty($record[$identifier])) {
$formats[$format] = $config['metadataFormats'][$format];
}
}
if (!empty($formats)) {
return $formats;
} else {
throw new OAI2Exception('idDoesNotExist');
}
} else {
return array(
$config['metadataFormat'] => array(
'schema'=> $config['metadataSchema'],
'namespace' => $config['metadataNamespace']
)
);
return $config['metadataFormats'];
}
}
),

View File

@ -34,7 +34,8 @@ $config['repositoryName'] = 'Simple OAI 2.0 Data Provider';
$config['adminEmail'] = 'admin@example.org';
// Do you provide 0-byte files for deleted records?
// Possible values:
//
// Possible values:
// "no" -> the repository does not maintain information about deletions
// "transient" -> the repository maintains information about deletions, but
// does not guarantee them to be persistent (default)
@ -42,22 +43,31 @@ $config['adminEmail'] = 'admin@example.org';
// no time limit
$config['deletedRecord'] = 'transient';
// Metadata format, schema and namespace of your records
// (The default is OAI_DC which is also required by the OAI-PMH specification,
// but technically you can deliver any XML based data format you want.)
$config['metadataFormat'] = 'oai_dc';
$config['metadataSchema'] = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd';
$config['metadataNamespace'] = 'http://www.openarchives.org/OAI/2.0/oai_dc/';
// Metadata formats, schemas and namespaces of your records
//
// The default is 'oai_dc' which is also required by the OAI-PMH specification,
// but technically you can deliver any XML based data format you want. Just add
// another entry with the 'metadataPrefix' as key and schema/namespace URIs as
// array values or replace the default 'oai_dc' entry (not recommended).
$config['metadataFormats'] = array(
'oai_dc' => array(
'schema' = 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
'namespace' = 'http://www.openarchives.org/OAI/2.0/oai_dc/',
),
);
// Directory containing the records
// (Make sure the given path is readable.)
//
// Make sure the given path is readable and there is a subdirectory for every
// 'metadataPrefix' you specified above.
$config['dataDirectory'] = 'data/';
// Maximum number of records to return before giving a resumption token
$config['maxRecords'] = 100;
// Path and prefix for saving resumption tokens
// (Make sure the given path is writable.)
//
// Make sure the given path is writable.
$config['tokenPrefix'] = '/tmp/oai2-';
// Number of seconds a resumption token should be valid

View File

@ -147,11 +147,11 @@ p.intro {
<xsl:template match="/">
<html>
<head>
<title>OAI 2.0 Request Results</title>
<title>OAI-PMH 2.0 Request Results</title>
<style><xsl:call-template name="style"/></style>
</head>
<body>
<h1>OAI 2.0 Request Results</h1>
<h1>OAI-PMH 2.0 Request Results</h1>
<xsl:call-template name="quicklinks"/>
<p class="intro">You are viewing an HTML version of the XML OAI response. To see the underlying XML as it appears to any OAI harvester use your web browser's view source option or disable XSLT processing.</p>
<xsl:apply-templates select="/oai:OAI-PMH" />
@ -230,7 +230,7 @@ p.intro {
<xsl:template match="/oai:OAI-PMH/oai:Identify/oai:adminEmail">
<tr><td class="key">Admin Email</td>
<td class="value"><xsl:value-of select="."/></td></tr>
<td class="value"><a href="mailto:{.}"><xsl:value-of select="."/></a></td></tr>
</xsl:template>
<!--