You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2210 lines
51 KiB

<?php
/***************************************************************
* Copyright notice
*
* (c) 2011 Sebastian Meyer <sebastian.meyer@slub-dresden.de>
* All rights reserved
*
* This script is part of the TYPO3 project. The TYPO3 project is
* free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* The GNU General Public License can be found at
* http://www.gnu.org/copyleft/gpl.html.
*
* This script is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
/**
* [CLASS/FUNCTION INDEX of SCRIPT]
*/
/**
* Document class 'tx_dlf_document' for the 'dlf' extension.
*
* @author Sebastian Meyer <sebastian.meyer@slub-dresden.de>
* @copyright Copyright (c) 2011, Sebastian Meyer, SLUB Dresden
* @package TYPO3
* @subpackage tx_dlf
* @access public
*/
final class tx_dlf_document {
/**
* This holds the whole XML file as string for serialization purposes
* @see __sleep() / __wakeup()
*
* @var string
* @access protected
*/
protected $asXML = '';
/**
* This holds the PID for the configuration
*
* @var integer
* @access protected
*/
protected $cPid = 0;
/**
* This holds the XML file's dmdSec parts with their IDs as array key
*
* @var array
* @access protected
*/
protected $dmdSec = array ();
/**
* Are the METS file's dmdSecs loaded?
* @see $dmdSec
*
* @var boolean
* @access protected
*/
protected $dmdSecLoaded = FALSE;
/**
* The extension key
*
* @var string
* @access public
*/
public $extKey = 'dlf';
/**
* This holds the configuration for all supported metadata encodings
* @see loadFormats()
*
* @var array
* @access protected
*/
protected $formats = array (
'METS' => array (
'rootElement' => 'mets',
'namespaceURI' => 'http://www.loc.gov/METS/',
),
// This one can become a problem, because MODS uses its own custom XLINK schema.
// @see http://comments.gmane.org/gmane.comp.text.mods/1126
'XLINK' => array (
'rootElement' => 'xlink',
'namespaceURI' => 'http://www.w3.org/1999/xlink',
),
'DVRIGHTS' => array (
'rootElement' => 'rights',
'namespaceURI' => 'http://dfg-viewer.de/',
),
'DVLINKS' => array (
'rootElement' => 'links',
'namespaceURI' => 'http://dfg-viewer.de/',
)
);
/**
* Are the available metadata formats loaded?
* @see $formats
*
* @var boolean
* @access protected
*/
protected $formatsLoaded = FALSE;
/**
* This holds the hook objects for this class
*
* @var array
* @access protected
*/
protected $hookObjects = array ();
/**
* Is the hook objects array loaded?
* @see $hookObjects
*
* @var boolean
* @access protected
*/
protected $hookObjectsLoaded = FALSE;
/**
* This holds the logical units
*
* @var array
* @access protected
*/
protected $logicalUnits = array ();
/**
* This holds the documents' parsed metadata array with their corresponding structMap//div's ID as array key
*
* @var array
* @access protected
*/
protected $metadataArray = array ();
/**
* Is the metadata array loaded?
* @see $metadataArray
*
* @var boolean
* @access protected
*/
protected $metadataArrayLoaded = FALSE;
/**
* This holds the XML file's METS part as SimpleXMLElement object
*
* @var SimpleXMLElement
* @access protected
*/
protected $mets;
/**
* The holds the total number of pages
*
* @var integer
* @access protected
*/
protected $numPages = 0;
/**
* This holds the UID of the parent document or zero if not multi-volumed
*
* @var integer
* @access protected
*/
protected $parentId = 0;
/**
* This holds the physical pages
*
* @var array
* @access protected
*/
protected $physicalPages = array ();
/**
* This holds the physical pages' metadata
*
* @var array
* @access protected
*/
protected $physicalPagesInfo = array ();
/**
* Are the physical pages loaded?
* @see $physicalPages
*
* @var boolean
* @access protected
*/
protected $physicalPagesLoaded = FALSE;
/**
* This holds the PID of the document or zero if not in database
*
* @var integer
* @access protected
*/
protected $pid = 0;
/**
* Is the document instantiated successfully?
*
* @var boolean
* @access protected
*/
protected $ready = FALSE;
/**
* The METS file's record identifier
*
* @var string
* @access protected
*/
protected $recordId;
/**
* This holds the singleton object of each document with its UID as array key
*
* @var array(tx_dlf_document)
* @access protected
*/
protected static $registry = array ();
/**
* This holds the smLinks between logical and physical structMap
*
* @var array
* @access protected
*/
protected $smLinks = array ('l2p' => array (), 'p2l' => array ());
/**
* Are the smLinks loaded?
* @see $smLinks
*
* @var boolean
* @access protected
*/
protected $smLinksLoaded = FALSE;
/**
* This holds the logical structure
*
* @var array
* @access protected
*/
protected $tableOfContents = array ();
/**
* Is the table of contents loaded?
* @see $tableOfContents
*
* @var boolean
* @access protected
*/
protected $tableOfContentsLoaded = FALSE;
/**
* This holds the toplevel structure's @ID
*
* @var string
* @access protected
*/
protected $toplevelId = '';
/**
* This holds the UID or the URL of the document
*
* @var mixed
* @access protected
*/
protected $uid = 0;
/**
* This holds the whole XML file as SimpleXMLElement object
*
* @var SimpleXMLElement
* @access protected
*/
protected $xml;
/**
* This gets the location of a file representing a physical page
*
* @access public
*
* @param string $id: The @ID attribute of the file node
*
* @return string The file's location as URL
*/
public function getFileLocation($id) {
if (($location = $this->mets->xpath('./mets:fileSec/mets:fileGrp/mets:file[@ID="'.$id.'"]/mets:FLocat[@LOCTYPE="URL"]'))) {
return (string) $location[0]->attributes('http://www.w3.org/1999/xlink')->href;
} else {
if (TYPO3_DLOG) {
t3lib_div::devLog('[tx_dlf_document->getFileLocation('.$id.')] There is no file node with @ID "'.$id.'"', $this->extKey, SYSLOG_SEVERITY_WARNING);
}
return '';
}
}
/**
* This gets the registered hook objects for this class.
*
* @access protected
*
* @return void
*/
protected function getHookObjects() {
if (!$this->hookObjectsLoaded && is_array($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['dlf/common/class.tx_dlf_document.php']['hookClass'])) {
foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['dlf/common/class.tx_dlf_document.php']['hookClass'] as $classRef) {
$this->hookObjects[] = t3lib_div::getUserObj($classRef);
}
$this->hookObjectsLoaded = TRUE;
}
}
/**
* This is a singleton class, thus instances must be created by this method
*
* @access public
*
* @param mixed $uid: The unique identifier of the document to parse or URL of XML file
* @param integer $pid: If > 0, then only document with this PID gets loaded
* @param boolean $forceReload: Force reloading the document instead of returning the cached instance
*
* @return tx_dlf_document Instance of this class
*/
public static function getInstance($uid, $pid = 0, $forceReload = FALSE) {
// Sanitize input.
$pid = max(intval($pid), 0);
if (!$forceReload && is_object(self::$registry[$uid]) && self::$registry[$uid] instanceof self) {
// Check if instance has given PID.
if (($pid && self::$registry[$uid]->pid == $pid) || !$pid) {
// Return singleton instance if available.
return self::$registry[$uid];
}
} elseif (!$forceReload) {
// Check the user's session...
$sessionData = tx_dlf_helper::loadFromSession(get_called_class());
if (is_object($sessionData[$uid]) && $sessionData[$uid] instanceof self) {
// Check if instance has given PID.
if (($pid && $sessionData[$uid]->pid == $pid) || !$pid) {
// ...and restore registry.
self::$registry[$uid] = $sessionData[$uid];
return self::$registry[$uid];
}
}
}
// Create new instance...
$instance = new self($uid, $pid);
// ...and save it to registry.
if ($instance->ready) {
self::$registry[$instance->uid] = $instance;
// Load extension configuration
$extConf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['dlf']);
// Save document to session if caching is enabled.
if (!empty($extConf['caching'])) {
tx_dlf_helper::saveToSession(self::$registry, get_class($instance));
}
}
// Return new instance.
return $instance;
}
/**
* This gets details about a logical structure element
*
* @access public
*
* @param string $id: The @ID attribute of the logical structure node
* @param boolean $recursive: Whether to include the child elements
*
* @return array Array of the element's id, label, type and physical page indexes/mptr link
*/
public function getLogicalStructure($id, $recursive = FALSE) {
$details = array ();
// Is the requested logical unit already loaded?
if (!$recursive && !empty($this->logicalUnits[$id])) {
// Yes. Return it.
return $this->logicalUnits[$id];
} elseif (!empty($id)) {
// Get specified logical unit.
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]');
} else {
// Get all logical units at top level.
$divs = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]/mets:div');
}
if (!empty($divs)) {
if (!$recursive) {
// Get the details for the first xpath hit.
$details = $this->getLogicalStructureInfo($divs[0]);
} else {
// Walk the logical structure recursively and fill the whole table of contents.
foreach ($divs as $div) {
$this->tableOfContents[] = $this->getLogicalStructureInfo($div, TRUE);
}
}
}
return $details;
}
/**
* This gets details about a logical structure element
*
* @access protected
*
* @param SimpleXMLElement $structure: The logical structure node
* @param boolean $recursive: Whether to include the child elements
*
* @return array Array of the element's id, label, type and physical page indexes/mptr link
*/
protected function getLogicalStructureInfo(SimpleXMLElement $structure, $recursive = FALSE) {
// Get attributes.
foreach ($structure->attributes() as $attribute => $value) {
$attributes[$attribute] = (string) $value;
}
// Extract identity information.
$details = array ();
$details['id'] = $attributes['ID'];
$details['dmdId'] = (isset($attributes['DMDID']) ? $attributes['DMDID'] : '');
$details['label'] = (isset($attributes['LABEL']) ? $attributes['LABEL'] : '');
$details['volume'] = '';
// Set volume information only if no label is set and this is the toplevel structure element.
if (empty($details['label']) && $details['id'] == $this->_getToplevelId()) {
$metadata = $this->getMetadata($details['id']);
if (!empty($metadata['volume'][0])) {
$details['volume'] = $metadata['volume'][0];
}
}
$details['pagination'] = '';
$details['type'] = $attributes['TYPE'];
// Load smLinks.
$this->_getSmLinks();
// Get the physical page or external file this structure element is pointing at.
$details['points'] = '';
// Is there a mptr node?
if (count($structure->children('http://www.loc.gov/METS/')->mptr)) {
// Yes. Get the file reference.
$details['points'] = (string) $structure->children('http://www.loc.gov/METS/')->mptr[0]->attributes('http://www.w3.org/1999/xlink')->href;
// Are there any physical pages and is this logical unit linked to at least one of them?
} elseif ($this->_getPhysicalPages() && array_key_exists($details['id'], $this->smLinks['l2p'])) {
$details['points'] = max(intval(array_search($this->smLinks['l2p'][$details['id']][0], $this->physicalPages, TRUE)), 1);
// Get page number of the first page related to this structure element.
$details['pagination'] = $this->physicalPagesInfo[$id]['label'];
// Is this the toplevel structure element?
} elseif ($details['id'] == $this->_getToplevelId()) {
// Yes. Point to itself.
$details['points'] = 1;
}
// Keep for later usage.
$this->logicalUnits[$details['id']] = $details;
// Walk the structure recursively? And are there any children of the current element?
if ($recursive && count($structure->children('http://www.loc.gov/METS/')->div)) {
$details['children'] = array ();
foreach ($structure->children('http://www.loc.gov/METS/')->div as $child) {
// Repeat for all children.
$details['children'][] = $this->getLogicalStructureInfo($child, TRUE);
}
}
return $details;
}
/**
* This extracts all the metadata for a logical structure node
*
* @access public
*
* @param string $id: The @ID attribute of the logical structure node
* @param integer $cPid: The PID for the metadata definitions
* (defaults to $this->cPid or $this->pid)
*
* @return array The logical structure node's parsed metadata array
*/
public function getMetadata($id, $cPid = 0) {
// Save parameter for logging purposes.
$_cPid = $cPid;
// Make sure $cPid is a non-negative integer.
$cPid = max(intval($cPid), 0);
// If $cPid is not given, try to get it elsewhere.
if (!$cPid && ($this->cPid || $this->pid)) {
// Retain current PID.
$cPid = ($this->cPid ? $this->cPid : $this->pid);
} elseif (!$cPid) {
if (TYPO3_DLOG) {
t3lib_div::devLog('[tx_dlf_document->getMetadata('.$id.', '.$_cPid.')] Invalid PID "'.$cPid.'" for metadata definitions', $this->extKey, SYSLOG_SEVERITY_ERROR);
}
return array ();
}
// Get metadata from parsed metadata array if available.
if (!empty($this->metadataArray[$id]) && $this->metadataArray[0] == $cPid) {
return $this->metadataArray[$id];
}
// Initialize metadata array with empty values.
$metadata = array (
'title' => array (),
'title_sorting' => array (),
'author' => array (),
'place' => array (),
'year' => array (),
'prod_id' => array (),
'record_id' => array (),
'opac_id' => array (),
'union_id' => array (),
'urn' => array (),
'purl' => array (),
'type' => array (),
'volume' => array (),
'volume_sorting' => array (),
'collection' => array (),
'owner' => array (),
);
// Get the logical structure node's DMDID.
if (!empty($this->logicalUnits[$id])) {
$dmdId = $this->logicalUnits[$id]['dmdId'];
} else {
$dmdId = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@DMDID');
$dmdId = (string) $dmdId[0];
}
if (!empty($dmdId)) {
// Load available metadata formats and dmdSecs.
$this->loadFormats();
$this->_getDmdSec();
// Is this metadata format supported?
if (!empty($this->formats[$this->dmdSec[$dmdId]['type']])) {
if (!empty($this->formats[$this->dmdSec[$dmdId]['type']]['class'])) {
$class = $this->formats[$this->dmdSec[$dmdId]['type']]['class'];
// Get the metadata from class.
if (class_exists($class) && ($obj = t3lib_div::makeInstance($class)) instanceof tx_dlf_format) {
$obj->extractMetadata($this->dmdSec[$dmdId]['xml'], $metadata);
} else {
if (TYPO3_DLOG) {
t3lib_div::devLog('[tx_dlf_document->getMetadata('.$id.', '.$_cPid.')] Invalid class/method "'.$class.'->extractMetadata()" for metadata format "'.$this->dmdSec[$dmdId]['type'].'"', $this->extKey, SYSLOG_SEVERITY_WARNING);
}
}
}
} else {
if (TYPO3_DLOG) {
t3lib_div::devLog('[tx_dlf_document->getMetadata('.$id.', '.$_cPid.')] Unsupported metadata format "'.$this->dmdSec[$dmdId]['type'].'" in dmdSec with @ID "'.$dmdId.'"', $this->extKey, SYSLOG_SEVERITY_WARNING);
}
return array ();
}
// Get the structure's type.
if (!empty($this->logicalUnits[$id])) {
$metadata['type'] = array ($this->logicalUnits[$id]['type']);
} else {
$struct = $this->mets->xpath('./mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="'.$id.'"]/@TYPE');
$metadata['type'] = array ((string) $struct[0]);
}
// Get the additional metadata from database.
$result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
'tx_dlf_metadata.index_name AS index_name,tx_dlf_metadata.xpath AS xpath,tx_dlf_metadata.xpath_sorting AS xpath_sorting,tx_dlf_metadata.is_sortable AS is_sortable,tx_dlf_metadata.default_value AS default_value',
'tx_dlf_metadata,tx_dlf_formats',
'tx_dlf_metadata.pid='.$cPid.' AND ((tx_dlf_metadata.encoded=tx_dlf_formats.uid AND tx_dlf_formats.type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->dmdSec[$dmdId]['type'], 'tx_dlf_formats').') OR tx_dlf_metadata.encoded=0)'.tx_dlf_helper::whereClause('tx_dlf_metadata', TRUE).tx_dlf_helper::whereClause('tx_dlf_formats'),
'',
'',
''
);
// We need a DOMDocument here, because SimpleXML doesn't support XPath functions properly.
$domNode = dom_import_simplexml($this->dmdSec[$dmdId]['xml']);
$domXPath = new DOMXPath($domNode->ownerDocument);
$this->registerNamespaces($domXPath);
// OK, now make the XPath queries.
while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($result)) {
// Set metadata field's value(s).
if ($resArray['xpath'] && ($values = $domXPath->evaluate($resArray['xpath'], $domNode))) {
if ($values instanceof DOMNodeList && $values->length > 0) {
$metadata[$resArray['index_name']] = array ();
foreach ($values as $value) {
$metadata[$resArray['index_name']][] = trim((string) $value->nodeValue);
}
} elseif (!($values instanceof DOMNodeList)) {
$metadata[$resArray['index_name']] = array (trim((string) $values));
}
}
// Set default value if applicable.
if (empty($metadata[$resArray['index_name']][0]) && $resArray['default_value']) {
$metadata[$resArray['index_name']] = array ($resArray['default_value']);
}
// Set sorting value if applicable.
if (!empty($metadata[$resArray['index_name']]) && $resArray['is_sortable']) {
if ($resArray['xpath_sorting'] && ($values = $domXPath->evaluate($resArray['xpath_sorting'], $domNode))) {
if ($values instanceof DOMNodeList && $values->length > 0) {
$metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values->item(0)->nodeValue);
} elseif (!($values instanceof DOMNodeList)) {
$metadata[$resArray['index_name'].'_sorting'][0] = trim((string) $values);
}
}
if (empty($metadata[$resArray['index_name'].'_sorting'][0])) {
$metadata[$resArray['index_name'].'_sorting'][0] = $metadata[$resArray['index_name']][0];
}
}
}
// Set title to empty string if not present.
if (empty($metadata['title'][0])) {
$metadata['title'][0] = '';
$metadata['title_sorting'][0] = '';
}
} else {
// There is no dmdSec for this structure node.
return array ();
}
return $metadata;
}
/**
* This determines a title for the given document
*
* @access public
*
* @param integer $uid: The UID of the document
* @param boolean $recursive: Search superior documents for a title, too?
*
* @return string The title of the document itself or a parent document
*/
public static function getTitle($uid, $recursive = FALSE) {
// Save parameter for logging purposes.
$_uid = $uid;
$title = '';
// Sanitize input.
$uid = max(intval($uid), 0);
if ($uid) {
$result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
'tx_dlf_documents.title,tx_dlf_documents.partof',
'tx_dlf_documents',
'tx_dlf_documents.uid='.$uid.tx_dlf_helper::whereClause('tx_dlf_documents'),
'',
'',
'1'
);
if ($GLOBALS['TYPO3_DB']->sql_num_rows($result)) {
// Get title information.
list ($title, $partof) = $GLOBALS['TYPO3_DB']->sql_fetch_row($result);
// Search parent documents recursively for a title?
if ($recursive && empty($title) && intval($partof)) {
$title = self::getTitle($partof, TRUE);
}
} else {
if (TYPO3_DLOG) {
t3lib_div::devLog('[tx_dlf_document->getTitle('.$_uid.', ['.($recursive ? 'TRUE' : 'FALSE').'])] No document with UID "'.$uid.'" found or document not accessible', $this->extKey, SYSLOG_SEVERITY_WARNING);
}
}
} else {
if (TYPO3_DLOG) {
t3lib_div::devLog('[tx_dlf_document->getTitle('.$_uid.', ['.($recursive ? 'TRUE' : 'FALSE').'])] Invalid UID "'.$uid.'" for document', $this->extKey, SYSLOG_SEVERITY_ERROR);
}
}
return $title;
}
/**
* This extracts all the metadata for the toplevel logical structure node
*
* @access public
*
* @param integer $cPid: The PID for the metadata definitions
*
* @return array The logical structure node's parsed metadata array
*/