diff --git a/oai2.php b/oai2.php index 6ec9bd8..95a1e2b 100644 --- a/oai2.php +++ b/oai2.php @@ -1,128 +1,116 @@ + * Copyright (C) 2017 Sebastian Meyer + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +require_once('oai2config.php'); require_once('oai2server.php'); -/** - * Identifier settings. It needs to have proper values to reflect the settings of the data provider. - * Is MUST be declared in this order - * - * - $identifyResponse['repositoryName'] : compulsory. A human readable name for the repository; - * - $identifyResponse['baseURL'] : compulsory. The base URL of the repository; - * - $identifyResponse['protocolVersion'] : compulsory. The version of the OAI-PMH supported by the repository; - * - $identifyResponse['earliestDatestamp'] : compulsory. A UTCdatetime that is the guaranteed lower limit of all datestamps recording changes, modifications, or deletions in the repository. A repository must not use datestamps lower than the one specified by the content of the earliestDatestamp element. earliestDatestamp must be expressed at the finest granularity supported by the repository. - * - $identifyResponse['deletedRecord'] : the manner in which the repository supports the notion of deleted records. Legitimate values are no ; transient ; persistent with meanings defined in the section on deletion. - * - $identifyResponse['granularity'] : the finest harvesting granularity supported by the repository. The legitimate values are YYYY-MM-DD and YYYY-MM-DDThh:mm:ssZ with meanings as defined in ISO8601. - * - */ -$identifyResponse = array(); -$identifyResponse["repositoryName"] = 'OAI2 PMH Test'; -$identifyResponse["baseURL"] = 'http://198.199.108.242/~neis/oai_pmh/oai2.php'; -$identifyResponse["protocolVersion"] = '2.0'; -$identifyResponse['adminEmail'] = 'danielneis@gmail.com'; -$identifyResponse["earliestDatestamp"] = '2013-01-01T12:00:00Z'; -$identifyResponse["deletedRecord"] = 'no'; // How your repository handles deletions - // no: The repository does not maintain status about deletions. - // It MUST NOT reveal a deleted status. - // persistent: The repository persistently keeps track about deletions - // with no time limit. It MUST consistently reveal the status - // of a deleted record over time. - // transient: The repository does not guarantee that a list of deletions is - // maintained. It MAY reveal a deleted status for records. -$identifyResponse["granularity"] = 'YYYY-MM-DDThh:mm:ssZ'; +// Get all available records and their respective timestamps +$records = array(); +$timestamps = array(); -$example_record = array('identifier' => 'a.b.c', - 'datestamp' => date('Y-m-d-H:s'), - 'set' => 'class:activity', - 'metadata' => array( - 'container_name' => 'oai_dc:dc', - 'container_attributes' => array( - 'xmlns:oai_dc' => "http://www.openarchives.org/OAI/2.0/oai_dc/", - 'xmlns:dc' => "http://purl.org/dc/elements/1.1/", - 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", - 'xsi:schemaLocation' => - 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd' - ), - 'fields' => array( - 'dc:title' => 'Testing records', - 'dc:author' => 'Neis' - ) - )); +$files = glob('data/*.xml'); +foreach($files as $file) { + $records[pathinfo($file, PATHINFO_FILENAME)] = $file; + $timestamps[filemtime($file)][] = pathinfo($file, PATHINFO_FILENAME); +}; -/* unit tests ;) */ -if (!isset($args)) { - $args = $_GET; -} -if (!isset($uri)) { - $uri = 'test.oai_pmh'; -} -$oai2 = new OAI2Server($uri, $args, $identifyResponse, - array( - 'ListMetadataFormats' => - function($identifier = '') { - if (!empty($identifier) && $identifier != 'a.b.c') { - throw new OAI2Exception('idDoesNotExist'); - } - return - array('rif' => array('metadataPrefix'=>'rif', - 'schema'=>'http://services.ands.org.au/sandbox/orca/schemata/registryObjects.xsd', - 'metadataNamespace'=>'http://ands.org.au/standards/rif-cs/registryObjects/', - ), - 'oai_dc' => array('metadataPrefix'=>'oai_dc', - 'schema'=>'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', - 'metadataNamespace'=>'http://www.openarchives.org/OAI/2.0/oai_dc/', - 'record_prefix'=>'dc', - 'record_namespace' => 'http://purl.org/dc/elements/1.1/')); - }, +ksort($records); +reset($records); - 'ListSets' => - function($resumptionToken = '') { - return - array ( - array('setSpec'=>'class:collection', 'setName'=>'Collections'), - array('setSpec'=>'math', 'setName'=>'Mathematics') , - array('setSpec'=>'phys', 'setName'=>'Physics'), - array('setSpec'=>'phdthesis', 'setName'=>'PHD Thesis', - 'setDescription'=> - ' '. - ' This set contains metadata describing '. - ' electronic music recordings made during the 1950ies '. - ' ')); - }, +ksort($timestamps); +reset($timestamps); - 'ListRecords' => - function($metadataPrefix, $from = '', $until = '', $set = '', $count = false, $deliveredRecords = 0, $maxItems = 0) use ($example_record) { - if ($count) { - return 1; - } - if ($set != '') { - throw new OAI2Exception('noSetHierarchy'); - } - if ($metadataPrefix != 'oai_dc') { - throw new OAI2Exception('noRecordsMatch'); - } - return array($example_record); - }, +// Build the Identify response +$identifyResponse = array( + 'repositoryName' => $config['repositoryName'], + 'baseURL' => $config['baseURL'], + 'protocolVersion' => '2.0', + 'adminEmail' => $config['adminEmail'], + 'earliestDatestamp' => gmdate('Y-m-d\TH:i:s\Z', key($timestamps)), + 'deletedRecord' => 'no', + 'granularity' => 'YYYY-MM-DDThh:mm:ssZ' +); - 'GetRecord' => - function($identifier, $metadataPrefix) use ($example_record) { - if ($identifier != 'a.b.c') { - throw new OAI2Exception('idDoesNotExist'); - } - return $example_record; - }, - ) +$oai2 = new OAI2Server( + 'http://'.$_SERVER['HTTP_HOST'].parse_url($_SERVER["REQUEST_URI"], PHP_URL_PATH), + $_GET, + $identifyResponse, + array( + 'GetRecord' => + function($identifier, $metadataPrefix) { + if (empty($records[$identifier])) { + return array(); + } else { + return array( + 'identifier' => $identifier, + 'timestamp' => filemtime($records[$identifier]), + 'metadata' => $records[$identifier] + ); + } + }, + 'ListRecords' => + function($metadataPrefix, $from = null, $until = null, $count = false, $deliveredRecords = 0, $maxItems = 100) { + $resultSet = array(); + foreach($timestamps as $timestamp => $identifiers) { + if ((is_null($from) || $timestamp >= $from) && (is_null($until) || $timestamp <= $until)) { + foreach($identifiers as $identifier) { + $resultSet[] = array( + 'identifier' => $identifier, + 'timestamp' => filemtime($records[$identifier]), + 'metadata' => $records[$identifier] + ); + } + } + } + if ($count) { + return count($resultSet); + } else { + return array_slice($resultSet, $deliveredRecords, $maxItems); + } + }, + 'ListMetadataFormats' => + function($identifier = '') { + if (!empty($identifier) && empty($records[$identifier]) { + throw new OAI2Exception('idDoesNotExist'); + } else { + return array( + $config['metadataFormat'] => array ( + 'metadataPrefix' => $config['metadataFormat'], + 'schema'=> $config['metadataSchema'], + 'metadataNamespace' => $config['metadataNamespace'] + ) + ); + } + } + ), + $config ); $response = $oai2->response(); + if (isset($return)) { - return $response; + return $response; } else { - $response->formatOutput = true; - $response->preserveWhiteSpace = false; - header('Content-Type: text/xml'); - echo $response->saveXML(); + $response->formatOutput = true; + $response->preserveWhiteSpace = false; + header('Content-Type: text/xml'); + echo $response->saveXML(); } diff --git a/oai2config.php b/oai2config.php new file mode 100644 index 0000000..6b20bdd --- /dev/null +++ b/oai2config.php @@ -0,0 +1,50 @@ + + * Copyright (C) 2017 Sebastian Meyer + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * This file contains all configuration you need to change according to your preferences + */ + +$config = array(); + +// A human readable name for the repository +$config['repositoryName'] = 'German Literature Archive, Marbach'; + +// The base URL of the repository, i.e. the URL of the oai2.php file +$config['baseURL'] = 'http://www.dla-marbach.de/oai2/oai2.php'; + +// Email address for contacting the repository owner +$config['adminEmail'] = 'info@dla-marbach.de'; + +// Metadata format, schema and namespace of your records +$config['metadataFormat'] = 'ead'; +$config['metadataSchema'] = 'https://www.loc.gov/ead/ead.xsd'; +$config['metadataNamespace'] = 'urn:isbn:1-931666-22-9'; + +// Maximum number of records to return before giving a resumption token +$config['maxRecords'] = 100; + +// Path and prefix for saving resumption tokens +// (Make sure the given path is writable) +$config['tokenPrefix'] = '/tmp/oai2-'; + +// Number of seconds a resumption token should be valid +$config['tokenValid'] = 86400; // 24 hours diff --git a/oai2server.php b/oai2server.php index 830c432..293eb07 100644 --- a/oai2server.php +++ b/oai2server.php @@ -1,4 +1,24 @@ + * Copyright (C) 2017 Sebastian Meyer + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + require_once('oai2exception.php'); require_once('oai2xml.php'); @@ -11,41 +31,33 @@ class OAI2Server { public $errors = array(); private $args = array(); private $verb = ''; - private $token_prefix = '/tmp/oai_pmh-'; + private $token_prefix = '/tmp/oai2-'; private $token_valid = 86400; + private $max_records = 100; - function __construct($uri, $args, $identifyResponse, $callbacks) { - + public function __construct($uri, $args, $identifyResponse, $callbacks, $config) { $this->uri = $uri; - if (!isset($args['verb']) || empty($args['verb'])) { $this->errors[] = new OAI2Exception('badVerb'); } else { $verbs = array('Identify', 'ListMetadataFormats', 'ListSets', 'ListIdentifiers', 'ListRecords', 'GetRecord'); if (in_array($args['verb'], $verbs)) { - $this->verb = $args['verb']; - unset($args['verb']); - $this->args = $args; - $this->identifyResponse = $identifyResponse; - $this->listMetadataFormatsCallback = $callbacks['ListMetadataFormats']; - $this->listSetsCallback = $callbacks['ListSets']; $this->listRecordsCallback = $callbacks['ListRecords']; $this->getRecordCallback = $callbacks['GetRecord']; - + $this->token_prefix = $config['tokenPrefix']; + $this->token_valid = $config['tokenValid']; + $this->max_records = $config['maxRecords']; $this->response = new OAI2XMLResponse($this->uri, $this->verb, $this->args); - call_user_func(array($this, $this->verb)); - } else { $this->errors[] = new OAI2Exception('badVerb'); } } - } public function response() { @@ -63,7 +75,6 @@ class OAI2Server { } public function Identify() { - if (count($this->args) > 0) { foreach($this->args as $key => $val) { $this->errors[] = new OAI2Exception('badArgument'); @@ -76,7 +87,6 @@ class OAI2Server { } public function ListMetadataFormats() { - foreach ($this->args as $argument => $value) { if ($argument != 'identifier') { $this->errors[] = new OAI2Exception('badArgument'); @@ -91,10 +101,10 @@ class OAI2Server { try { if ($formats = call_user_func($this->listMetadataFormatsCallback, $identifier)) { foreach($formats as $key => $val) { - $cmf = $this->response->addToVerbNode("metadataFormat"); - $this->response->addChild($cmf,'metadataPrefix',$key); - $this->response->addChild($cmf,'schema',$val['schema']); - $this->response->addChild($cmf,'metadataNamespace',$val['metadataNamespace']); + $cmf = $this->response->addToVerbNode('metadataFormat'); + $this->response->addChild($cmf, 'metadataPrefix', $key); + $this->response->addChild($cmf, 'schema', $val['schema']); + $this->response->addChild($cmf, 'metadataNamespace', $val['metadataNamespace']); } } else { $this->errors[] = new OAI2Exception('noMetadataFormats'); @@ -106,45 +116,18 @@ class OAI2Server { } public function ListSets() { - if (isset($this->args['resumptionToken'])) { if (count($this->args) > 1) { $this->errors[] = new OAI2Exception('badArgument'); } else { - if ((int)$val+$this->token_valid < time()) { - $this->errors[] = new OAI2Exception('badResumptionToken'); - } + $this->errors[] = new OAI2Exception('badResumptionToken'); } - $resumptionToken = $this->args['resumptionToken']; } else { - $resumptionToken = null; - } - if (empty($this->errors)) { - if ($sets = call_user_func($this->listSetsCallback, $resumptionToken)) { - - foreach($sets as $set) { - - $setNode = $this->response->addToVerbNode("set"); - - foreach($set as $key => $val) { - if($key=='setDescription') { - $desNode = $this->response->addChild($setNode,$key); - $des = $this->response->doc->createDocumentFragment(); - $des->appendXML($val); - $desNode->appendChild($des); - } else { - $this->response->addChild($setNode,$key,$val); - } - } - } - } else { - $this->errors[] = new OAI2Exception('noSetHierarchy'); - } + $this->errors[] = new OAI2Exception('noSetHierarchy'); } } public function GetRecord() { - if (!isset($this->args['metadataPrefix'])) { $this->errors[] = new OAI2Exception('badArgument'); } else { @@ -156,28 +139,12 @@ class OAI2Server { if (!isset($this->args['identifier'])) { $this->errors[] = new OAI2Exception('badArgument'); } - if (empty($this->errors)) { try { if ($record = call_user_func($this->getRecordCallback, $this->args['identifier'], $this->args['metadataPrefix'])) { - - $identifier = $record['identifier']; - - $datestamp = $this->formatDatestamp($record['datestamp']); - - $set = $record['set']; - - $status_deleted = (isset($record['deleted']) && ($record['deleted'] == 'true') && - (($this->identifyResponse['deletedRecord'] == 'transient') || - ($this->identifyResponse['deletedRecord'] == 'persistent'))); - $cur_record = $this->response->addToVerbNode('record'); - $cur_header = $this->response->createHeader($identifier, $datestamp, $set, $cur_record); - if ($status_deleted) { - $cur_header->setAttribute("status","deleted"); - } else { - $this->add_metadata($cur_record, $record); - } + $cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp']), $cur_record); + $this->add_metadata($cur_record, $record['metadata']); } else { $this->errors[] = new OAI2Exception('idDoesNotExist'); } @@ -192,14 +159,11 @@ class OAI2Server { } public function ListRecords() { - - $maxItems = 1000; + $maxItems = $this->max_records; $deliveredRecords = 0; $metadataPrefix = $this->args['metadataPrefix']; $from = isset($this->args['from']) ? $this->args['from'] : ''; $until = isset($this->args['until']) ? $this->args['until'] : ''; - $set = isset($this->args['set']) ? $this->args['set'] : ''; - if (isset($this->args['resumptionToken'])) { if (count($this->args) > 1) { $this->errors[] = new OAI2Exception('badArgument'); @@ -211,7 +175,7 @@ class OAI2Server { $this->errors[] = new OAI2Exception('badResumptionToken'); } else { if ($readings = $this->readResumptionToken($this->token_prefix.$this->args['resumptionToken'])) { - list($deliveredRecords, $metadataPrefix, $from, $until, $set) = $readings; + list($deliveredRecords, $metadataPrefix, $from, $until) = $readings; } else { $this->errors[] = new OAI2Exception('badResumptionToken'); } @@ -228,99 +192,69 @@ class OAI2Server { } } if (isset($this->args['from'])) { - if(!$this->checkDateFormat($this->args['from'])) { + if (!$this->checkDateFormat($this->args['from'])) { $this->errors[] = new OAI2Exception('badArgument'); } } if (isset($this->args['until'])) { - if(!$this->checkDateFormat($this->args['until'])) { + if (!$this->checkDateFormat($this->args['until'])) { $this->errors[] = new OAI2Exception('badArgument'); } } + if (isset($this->args['set'])) { + $this->errors[] = new OAI2Exception('noSetHierarchy'); + } } - if (empty($this->errors)) { try { - - $records_count = call_user_func($this->listRecordsCallback, $metadataPrefix, $from, $until, $set, true); - - $records = call_user_func($this->listRecordsCallback, $metadataPrefix, $from, $until, $set, false, $deliveredRecords, $maxItems); - + $records_count = call_user_func($this->listRecordsCallback, $metadataPrefix, $this->formatTimestamp($from), $this->formatTimestamp($until), true); + $records = call_user_func($this->listRecordsCallback, $metadataPrefix, $this->formatTimestamp($from), $this->formatTimestamp($until), false, $deliveredRecords, $maxItems); foreach ($records as $record) { - - $identifier = $record['identifier']; - $datestamp = $this->formatDatestamp($record['datestamp']); - $setspec = $record['set']; - - $status_deleted = (isset($record['deleted']) && ($record['deleted'] === true) && - (($this->identifyResponse['deletedRecord'] == 'transient') || - ($this->identifyResponse['deletedRecord'] == 'persistent'))); - - if($this->verb == 'ListRecords') { + if ($this->verb == 'ListRecords') { $cur_record = $this->response->addToVerbNode('record'); - $cur_header = $this->response->createHeader($identifier, $datestamp,$setspec,$cur_record); - if (!$status_deleted) { - $this->add_metadata($cur_record, $record); - } + $cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp']), $cur_record); + $this->add_metadata($cur_record, $record['metadata']); } else { // for ListIdentifiers, only identifiers will be returned. - $cur_header = $this->response->createHeader($identifier, $datestamp,$setspec); - } - if ($status_deleted) { - $cur_header->setAttribute("status","deleted"); + $cur_header = $this->response->createHeader($record['identifier'], $this->formatDatestamp($record['timestamp'])); } } - // Will we need a new ResumptionToken? if ($records_count - $deliveredRecords > $maxItems) { - $deliveredRecords += $maxItems; $restoken = $this->createResumptionToken($deliveredRecords); - - $expirationDatetime = gmstrftime('%Y-%m-%dT%TZ', time()+$this->token_valid); - + $expirationDatetime = gmstrftime('%Y-%m-%dT%TZ', time()+$this->token_valid); } elseif (isset($args['resumptionToken'])) { // Last delivery, return empty ResumptionToken $restoken = null; $expirationDatetime = null; } - if (isset($restoken)) { - $this->response->createResumptionToken($restoken,$expirationDatetime,$records_count,$deliveredRecords); + $this->response->createResumptionToken($restoken, $expirationDatetime, $records_count, $deliveredRecords); } - } catch (OAI2Exception $e) { $this->errors[] = $e; } } } - private function add_metadata($cur_record, $record) { - - $meta_node = $this->response->addChild($cur_record ,"metadata"); - - $schema_node = $this->response->addChild($meta_node, $record['metadata']['container_name']); - foreach ($record['metadata']['container_attributes'] as $name => $value) { - $schema_node->setAttribute($name, $value); - } - foreach ($record['metadata']['fields'] as $name => $value) { - $this->response->addChild($schema_node, $name, $value); - } + private function add_metadata($cur_record, $file) { + $meta_node = $this->response->addChild($cur_record, 'metadata'); + $fragment = new DOMDocument(); + $fragment->load($file); + $this->response->importFragment($meta_node, $fragment); } private function createResumptionToken($delivered_records) { - list($usec, $sec) = explode(" ", microtime()); $token = ((int)($usec*1000) + (int)($sec*1000)); - $fp = fopen ($this->token_prefix.$token, 'w'); if($fp==false) { - exit("Cannot write. Writer permission needs to be changed."); - } + exit('Cannot write resumption token. Writing permission needs to be changed.'); + } fputs($fp, "$delivered_records#"); fputs($fp, "$metadataPrefix#"); fputs($fp, "{$this->args['from']}#"); fputs($fp, "{$this->args['until']}#"); - fputs($fp, "{$this->args['set']}#"); fclose($fp); return $token; } @@ -338,26 +272,24 @@ class OAI2Server { return $rtVal; } - /** - * All datestamps used in this system are GMT even - * return value from database has no TZ information - */ - private function formatDatestamp($datestamp) { - return date("Y-m-d\TH:i:s\Z",strtotime($datestamp)); + private function formatDatestamp($timestamp) { + return gmdate('Y-m-d\TH:i:s\Z', $timestamp); } - /** - * The database uses datastamp without time-zone information. - * It needs to clean all time-zone informaion from time string and reformat it - */ - private function checkDateFormat($date) { - $date = str_replace(array("T","Z")," ",$date); - $time_val = strtotime($date); - if(!$time_val) return false; - if(strstr($date,":")) { - return date("Y-m-d H:i:s",$time_val); + private function formatTimestamp($datestamp) { + if (is_array($time = strptime($datestamp, '%Y-%m-%dT%H:%M:%SZ')) || is_array($time = strptime($datestamp, '%Y-%m-%d'))) { + return gmmktime($time['tm_hour'], $time['tm_min'], $time['tm_sec'], $time['tm_mon'] + 1, $time['tm_mday'], $time['tm_year']+1900); } else { - return date("Y-m-d",$time_val); + return null; } } + + private function checkDateFormat($date) { + $dt = DateTime::createFromFormat('Y-m-d\TH:i:s\Z', $date); + if ($dt === false) { + $dt = DateTime::createFromFormat('Y-m-d', $date); + } + return ($dt !== false) && !array_sum($dt->getLastErrors()); + } + }