2019-04-04 16:09:20 +02:00
< ? php
2019-10-30 15:37:44 +01:00
2019-04-04 16:09:20 +02:00
/**
* ( c ) Kitodo . Key to digital objects e . V . < contact @ kitodo . org >
*
* This file is part of the Kitodo and TYPO3 projects .
*
* @ license GNU General Public License version 3 or later .
* For the full copyright and license information , please read the
* LICENSE . txt file that was distributed with this source code .
*/
2019-11-13 13:41:26 +01:00
namespace Kitodo\Dlf\Common ;
2019-08-22 09:41:15 +02:00
use TYPO3\CMS\Core\Database\ConnectionPool ;
2019-12-17 14:38:34 +01:00
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction ;
2019-08-22 09:41:15 +02:00
use TYPO3\CMS\Core\Utility\GeneralUtility ;
2019-09-03 15:15:52 +02:00
use Ubl\Iiif\Tools\IiifHelper ;
use Ubl\Iiif\Services\AbstractImageService ;
2019-08-22 09:41:15 +02:00
2019-04-04 16:09:20 +02:00
/**
* MetsDocument class for the 'dlf' extension .
*
2019-11-13 13:41:26 +01:00
* @ author Sebastian Meyer < sebastian . meyer @ slub - dresden . de >
* @ author Henrik Lochmann < dev @ mentalmotive . com >
* @ package TYPO3
* @ subpackage dlf
* @ access public
2019-11-13 12:51:19 +01:00
* @ property - write int $cPid This holds the PID for the configuration
2019-11-04 16:14:18 +01:00
* @ property - read array $dmdSec This holds the XML file ' s dmdSec parts with their IDs as array key
* @ property - read array $fileGrps This holds the file ID -> USE concordance
2019-11-13 12:51:19 +01:00
* @ property - read bool $hasFulltext Are there any fulltext files available ?
2019-11-04 16:14:18 +01:00
* @ property - read string $location This holds the documents location
* @ property - read array $metadataArray This holds the documents ' parsed metadata array
* @ property - read \SimpleXMLElement $mets This holds the XML file ' s METS part as \SimpleXMLElement object
2019-11-13 12:51:19 +01:00
* @ property - read int $numPages The holds the total number of pages
* @ property - read int $parentId This holds the UID of the parent document or zero if not multi - volumed
2019-11-04 16:14:18 +01:00
* @ property - read array $physicalStructure This holds the physical structure
* @ property - read array $physicalStructureInfo This holds the physical structure metadata
2019-11-13 12:51:19 +01:00
* @ property - read int $pid This holds the PID of the document or zero if not in database
* @ property - read bool $ready Is the document instantiated successfully ?
2019-11-04 16:14:18 +01:00
* @ property - read string $recordId The METS file 's / IIIF manifest' s record identifier
2019-11-13 12:51:19 +01:00
* @ property - read int $rootId This holds the UID of the root document or zero if not multi - volumed
2019-11-04 16:14:18 +01:00
* @ property - read array $smLinks This holds the smLinks between logical and physical structMap
* @ property - read array $tableOfContents This holds the logical structure
* @ property - read string $thumbnail This holds the document ' s thumbnail location
* @ property - read string $toplevelId This holds the toplevel structure 's @ID (METS) or the manifest' s @ id ( IIIF )
* @ property - read mixed $uid This holds the UID or the URL of the document
2019-04-04 16:09:20 +02:00
*/
final class MetsDocument extends Document
{
/**
* This holds the whole XML file as string for serialization purposes
* @ see __sleep () / __wakeup ()
*
* @ var string
* @ access protected
*/
protected $asXML = '' ;
/**
* This holds the XML file ' s dmdSec parts with their IDs as array key
*
* @ var array
* @ access protected
*/
protected $dmdSec = [];
/**
* Are the METS file ' s dmdSecs loaded ?
* @ see $dmdSec
*
2019-11-13 12:51:19 +01:00
* @ var bool
2019-04-04 16:09:20 +02:00
* @ access protected
*/
2019-11-13 13:09:00 +01:00
protected $dmdSecLoaded = false ;
2019-04-04 16:09:20 +02:00
/**
* The extension key
*
* @ var string
* @ access public
*/
public static $extKey = 'dlf' ;
/**
* This holds the file ID -> USE concordance
* @ see _getFileGrps ()
*
* @ var array
* @ access protected
*/
protected $fileGrps = [];
/**
2021-02-17 09:04:40 +01:00
* Are the image file groups loaded ?
2019-04-04 16:09:20 +02:00
* @ see $fileGrps
*
2019-11-13 12:51:19 +01:00
* @ var bool
2019-04-04 16:09:20 +02:00
* @ access protected
*/
2019-11-13 13:09:00 +01:00
protected $fileGrpsLoaded = false ;
2019-04-04 16:09:20 +02:00
/**
* This holds the XML file ' s METS part as \SimpleXMLElement object
*
* @ var \SimpleXMLElement
* @ access protected
*/
protected $mets ;
2019-04-09 08:56:54 +02:00
2019-04-04 16:09:20 +02:00
/**
* This holds the whole XML file as \SimpleXMLElement object
*
* @ var \SimpleXMLElement
* @ access protected
*/
protected $xml ;
2019-04-09 08:56:54 +02:00
2019-10-30 14:57:59 +01:00
/**
* This adds metadata from METS structural map to metadata array .
*
* @ access public
*
* @ param array & $metadata : The metadata array to extend
* @ param string $id : The @ ID attribute of the logical structure node
*
* @ return void
*/
2019-10-30 18:51:09 +01:00
public function addMetadataFromMets ( & $metadata , $id )
{
2019-10-30 14:57:59 +01:00
$details = $this -> getLogicalStructure ( $id );
if ( ! empty ( $details )) {
2021-03-22 21:30:05 +01:00
$metadata [ 'mets_order' ][ 0 ] = $details [ 'order' ];
2019-10-30 14:57:59 +01:00
$metadata [ 'mets_label' ][ 0 ] = $details [ 'label' ];
$metadata [ 'mets_orderlabel' ][ 0 ] = $details [ 'orderlabel' ];
}
}
2019-04-04 16:09:20 +02:00
/**
2019-04-09 08:56:54 +02:00
*
2019-04-04 16:09:20 +02:00
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: establishRecordId ()
*/
2019-10-30 15:37:44 +01:00
protected function establishRecordId ( $pid )
{
2019-04-04 16:09:20 +02:00
// Check for METS object @ID.
if ( ! empty ( $this -> mets [ 'OBJID' ])) {
$this -> recordId = ( string ) $this -> mets [ 'OBJID' ];
}
// Get hook objects.
$hookObjects = Helper :: getHookObjects ( 'Classes/Common/MetsDocument.php' );
// Apply hooks.
foreach ( $hookObjects as $hookObj ) {
if ( method_exists ( $hookObj , 'construct_postProcessRecordId' )) {
$hookObj -> construct_postProcessRecordId ( $this -> xml , $this -> recordId );
}
}
}
2019-04-09 08:56:54 +02:00
2019-04-04 16:09:20 +02:00
/**
2019-04-09 08:56:54 +02:00
*
2019-04-04 16:09:20 +02:00
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: getDownloadLocation ()
*/
2019-10-30 15:37:44 +01:00
public function getDownloadLocation ( $id )
{
2019-04-04 16:09:20 +02:00
$fileMimeType = $this -> getFileMimeType ( $id );
$fileLocation = $this -> getFileLocation ( $id );
2020-05-13 06:35:13 +02:00
if ( $fileMimeType === 'application/vnd.kitodo.iiif' ) {
$fileLocation = ( strrpos ( $fileLocation , 'info.json' ) === strlen ( $fileLocation ) - 9 ) ? $fileLocation : ( strrpos ( $fileLocation , '/' ) === strlen ( $fileLocation ) ? $fileLocation . 'info.json' : $fileLocation . '/info.json' );
2019-04-04 16:09:20 +02:00
$conf = unserialize ( $GLOBALS [ 'TYPO3_CONF_VARS' ][ 'EXT' ][ 'extConf' ][ self :: $extKey ]);
IiifHelper :: setUrlReader ( IiifUrlReader :: getInstance ());
IiifHelper :: setMaxThumbnailHeight ( $conf [ 'iiifThumbnailHeight' ]);
IiifHelper :: setMaxThumbnailWidth ( $conf [ 'iiifThumbnailWidth' ]);
$service = IiifHelper :: loadIiifResource ( $fileLocation );
2020-05-13 06:35:13 +02:00
if ( $service !== null && $service instanceof AbstractImageService ) {
2019-04-04 16:09:20 +02:00
return $service -> getImageUrl ();
}
2020-05-13 06:35:13 +02:00
} elseif ( $fileMimeType === 'application/vnd.netfpx' ) {
$baseURL = $fileLocation . ( strpos ( $fileLocation , '?' ) === false ? '?' : '' );
2019-04-04 16:09:20 +02:00
// TODO CVT is an optional IIP server capability; in theory, capabilities should be determined in the object request with '&obj=IIP-server'
2020-05-13 06:35:13 +02:00
return $baseURL . '&CVT=jpeg' ;
2019-04-04 16:09:20 +02:00
}
return $fileLocation ;
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: getFileLocation ()
*/
2019-10-30 15:37:44 +01:00
public function getFileLocation ( $id )
{
2019-11-08 19:23:54 +01:00
$location = $this -> mets -> xpath ( './mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/mets:FLocat[@LOCTYPE="URL"]' );
2019-10-30 15:37:44 +01:00
if (
! empty ( $id )
2019-11-08 19:23:54 +01:00
&& ! empty ( $location )
2019-10-30 15:37:44 +01:00
) {
return ( string ) $location [ 0 ] -> attributes ( 'http://www.w3.org/1999/xlink' ) -> href ;
2019-04-04 16:09:20 +02:00
} else {
2019-10-30 15:37:44 +01:00
Helper :: devLog ( 'There is no file node with @ID "' . $id . '"' , DEVLOG_SEVERITY_WARNING );
2019-04-04 16:09:20 +02:00
return '' ;
}
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: getFileMimeType ()
*/
2019-10-30 15:37:44 +01:00
public function getFileMimeType ( $id )
{
2019-11-08 19:23:54 +01:00
$mimetype = $this -> mets -> xpath ( './mets:fileSec/mets:fileGrp/mets:file[@ID="' . $id . '"]/@MIMETYPE' );
2019-10-30 15:37:44 +01:00
if (
! empty ( $id )
2019-11-08 19:23:54 +01:00
&& ! empty ( $mimetype )
2019-10-30 15:37:44 +01:00
) {
return ( string ) $mimetype [ 0 ];
2019-04-04 16:09:20 +02:00
} else {
2019-10-30 15:37:44 +01:00
Helper :: devLog ( 'There is no file node with @ID "' . $id . '" or no MIME type specified' , DEVLOG_SEVERITY_WARNING );
2019-04-04 16:09:20 +02:00
return '' ;
}
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: getLogicalStructure ()
*/
2019-11-13 13:09:00 +01:00
public function getLogicalStructure ( $id , $recursive = false )
2019-10-30 15:37:44 +01:00
{
2019-04-04 16:09:20 +02:00
$details = [];
// Is the requested logical unit already loaded?
2019-10-30 15:37:44 +01:00
if (
! $recursive
&& ! empty ( $this -> logicalUnits [ $id ])
) {
2019-04-04 16:09:20 +02:00
// Yes. Return it.
return $this -> logicalUnits [ $id ];
} elseif ( ! empty ( $id )) {
// Get specified logical unit.
2019-10-30 15:37:44 +01:00
$divs = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]' );
2019-04-04 16:09:20 +02:00
} else {
// Get all logical units at top level.
$divs = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]/mets:div' );
}
if ( ! empty ( $divs )) {
if ( ! $recursive ) {
// Get the details for the first xpath hit.
$details = $this -> getLogicalStructureInfo ( $divs [ 0 ]);
} else {
// Walk the logical structure recursively and fill the whole table of contents.
foreach ( $divs as $div ) {
2019-11-11 18:26:45 +01:00
$this -> tableOfContents [] = $this -> getLogicalStructureInfo ( $div , $recursive );
2019-04-04 16:09:20 +02:00
}
}
}
return $details ;
}
/**
* This gets details about a logical structure element
*
* @ access protected
*
* @ param \SimpleXMLElement $structure : The logical structure node
2019-11-13 12:51:19 +01:00
* @ param bool $recursive : Whether to include the child elements
2019-04-04 16:09:20 +02:00
*
* @ return array Array of the element ' s id , label , type and physical page indexes / mptr link
*/
2019-11-13 13:09:00 +01:00
protected function getLogicalStructureInfo ( \SimpleXMLElement $structure , $recursive = false )
2019-10-30 15:37:44 +01:00
{
2019-04-04 16:09:20 +02:00
// Get attributes.
foreach ( $structure -> attributes () as $attribute => $value ) {
$attributes [ $attribute ] = ( string ) $value ;
}
// Load plugin configuration.
$extConf = unserialize ( $GLOBALS [ 'TYPO3_CONF_VARS' ][ 'EXT' ][ 'extConf' ][ self :: $extKey ]);
// Extract identity information.
$details = [];
$details [ 'id' ] = $attributes [ 'ID' ];
$details [ 'dmdId' ] = ( isset ( $attributes [ 'DMDID' ]) ? $attributes [ 'DMDID' ] : '' );
2019-10-30 14:57:59 +01:00
$details [ 'order' ] = ( isset ( $attributes [ 'ORDER' ]) ? $attributes [ 'ORDER' ] : '' );
2019-04-04 16:09:20 +02:00
$details [ 'label' ] = ( isset ( $attributes [ 'LABEL' ]) ? $attributes [ 'LABEL' ] : '' );
$details [ 'orderlabel' ] = ( isset ( $attributes [ 'ORDERLABEL' ]) ? $attributes [ 'ORDERLABEL' ] : '' );
$details [ 'contentIds' ] = ( isset ( $attributes [ 'CONTENTIDS' ]) ? $attributes [ 'CONTENTIDS' ] : '' );
$details [ 'volume' ] = '' ;
// Set volume information only if no label is set and this is the toplevel structure element.
2019-10-30 15:37:44 +01:00
if (
empty ( $details [ 'label' ])
&& $details [ 'id' ] == $this -> _getToplevelId ()
) {
2019-04-04 16:09:20 +02:00
$metadata = $this -> getMetadata ( $details [ 'id' ]);
if ( ! empty ( $metadata [ 'volume' ][ 0 ])) {
$details [ 'volume' ] = $metadata [ 'volume' ][ 0 ];
}
}
$details [ 'pagination' ] = '' ;
$details [ 'type' ] = $attributes [ 'TYPE' ];
$details [ 'thumbnailId' ] = '' ;
// Load smLinks.
$this -> _getSmLinks ();
// Load physical structure.
$this -> _getPhysicalStructure ();
// Get the physical page or external file this structure element is pointing at.
$details [ 'points' ] = '' ;
// Is there a mptr node?
if ( count ( $structure -> children ( 'http://www.loc.gov/METS/' ) -> mptr )) {
// Yes. Get the file reference.
$details [ 'points' ] = ( string ) $structure -> children ( 'http://www.loc.gov/METS/' ) -> mptr [ 0 ] -> attributes ( 'http://www.w3.org/1999/xlink' ) -> href ;
2019-10-30 15:37:44 +01:00
} elseif (
! empty ( $this -> physicalStructure )
&& array_key_exists ( $details [ 'id' ], $this -> smLinks [ 'l2p' ])
2019-11-05 14:21:40 +01:00
) {
// Link logical structure to the first corresponding physical page/track.
2019-11-13 13:09:00 +01:00
$details [ 'points' ] = max ( intval ( array_search ( $this -> smLinks [ 'l2p' ][ $details [ 'id' ]][ 0 ], $this -> physicalStructure , true )), 1 );
2021-02-23 09:26:17 +01:00
$fileGrpsThumb = GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpThumbs' ]);
while ( $fileGrpThumb = array_shift ( $fileGrpsThumb )) {
2021-02-17 20:08:21 +01:00
if ( ! empty ( $this -> physicalStructureInfo [ $this -> smLinks [ 'l2p' ][ $details [ 'id' ]][ 0 ]][ 'files' ][ $fileGrpThumb ])) {
$details [ 'thumbnailId' ] = $this -> physicalStructureInfo [ $this -> smLinks [ 'l2p' ][ $details [ 'id' ]][ 0 ]][ 'files' ][ $fileGrpThumb ];
break ;
}
2019-04-04 16:09:20 +02:00
}
// Get page/track number of the first page/track related to this structure element.
$details [ 'pagination' ] = $this -> physicalStructureInfo [ $this -> smLinks [ 'l2p' ][ $details [ 'id' ]][ 0 ]][ 'orderlabel' ];
2019-11-05 14:21:40 +01:00
} elseif ( $details [ 'id' ] == $this -> _getToplevelId ()) {
// Point to self if this is the toplevel structure.
2019-04-04 16:09:20 +02:00
$details [ 'points' ] = 1 ;
2021-02-23 09:26:17 +01:00
$fileGrpsThumb = GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpThumbs' ]);
while ( $fileGrpThumb = array_shift ( $fileGrpsThumb )) {
2021-02-17 20:08:21 +01:00
if (
! empty ( $this -> physicalStructure )
&& ! empty ( $this -> physicalStructureInfo [ $this -> physicalStructure [ 1 ]][ 'files' ][ $fileGrpThumb ])
) {
$details [ 'thumbnailId' ] = $this -> physicalStructureInfo [ $this -> physicalStructure [ 1 ]][ 'files' ][ $fileGrpThumb ];
break ;
}
2019-04-04 16:09:20 +02:00
}
}
// Get the files this structure element is pointing at.
$details [ 'files' ] = [];
$fileUse = $this -> _getFileGrps ();
// Get the file representations from fileSec node.
foreach ( $structure -> children ( 'http://www.loc.gov/METS/' ) -> fptr as $fptr ) {
// Check if file has valid @USE attribute.
if ( ! empty ( $fileUse [( string ) $fptr -> attributes () -> FILEID ])) {
$details [ 'files' ][ $fileUse [( string ) $fptr -> attributes () -> FILEID ]] = ( string ) $fptr -> attributes () -> FILEID ;
}
}
// Keep for later usage.
$this -> logicalUnits [ $details [ 'id' ]] = $details ;
// Walk the structure recursively? And are there any children of the current element?
2019-10-30 15:37:44 +01:00
if (
$recursive
&& count ( $structure -> children ( 'http://www.loc.gov/METS/' ) -> div )
) {
2019-04-04 16:09:20 +02:00
$details [ 'children' ] = [];
foreach ( $structure -> children ( 'http://www.loc.gov/METS/' ) -> div as $child ) {
// Repeat for all children.
2019-11-13 13:09:00 +01:00
$details [ 'children' ][] = $this -> getLogicalStructureInfo ( $child , true );
2019-04-04 16:09:20 +02:00
}
}
return $details ;
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: getMetadata ()
*/
2019-10-30 15:37:44 +01:00
public function getMetadata ( $id , $cPid = 0 )
{
2019-04-04 16:09:20 +02:00
// Make sure $cPid is a non-negative integer.
$cPid = max ( intval ( $cPid ), 0 );
// If $cPid is not given, try to get it elsewhere.
2019-10-30 15:37:44 +01:00
if (
! $cPid
&& ( $this -> cPid || $this -> pid )
) {
2019-04-04 16:09:20 +02:00
// Retain current PID.
$cPid = ( $this -> cPid ? $this -> cPid : $this -> pid );
} elseif ( ! $cPid ) {
2019-10-30 15:37:44 +01:00
Helper :: devLog ( 'Invalid PID ' . $cPid . ' for metadata definitions' , DEVLOG_SEVERITY_WARNING );
2019-04-04 16:09:20 +02:00
return [];
}
// Get metadata from parsed metadata array if available.
2019-10-30 15:37:44 +01:00
if (
! empty ( $this -> metadataArray [ $id ])
&& $this -> metadataArray [ 0 ] == $cPid
) {
2019-04-04 16:09:20 +02:00
return $this -> metadataArray [ $id ];
}
// Initialize metadata array with empty values.
$metadata = [
'title' => [],
'title_sorting' => [],
'author' => [],
'place' => [],
'year' => [],
'prod_id' => [],
'record_id' => [],
'opac_id' => [],
'union_id' => [],
'urn' => [],
'purl' => [],
'type' => [],
'volume' => [],
'volume_sorting' => [],
2019-10-02 12:24:20 +02:00
'license' => [],
'terms' => [],
'restrictions' => [],
'out_of_print' => [],
'rights_info' => [],
2019-04-04 16:09:20 +02:00
'collection' => [],
'owner' => [],
2019-10-30 14:57:59 +01:00
'mets_label' => [],
'mets_orderlabel' => [],
2019-11-14 14:08:21 +01:00
'document_format' => [ 'METS' ],
2019-04-04 16:09:20 +02:00
];
2019-11-04 13:40:04 +01:00
// Get the logical structure node's @DMDID.
2019-04-04 16:09:20 +02:00
if ( ! empty ( $this -> logicalUnits [ $id ])) {
2019-11-04 13:40:04 +01:00
$dmdIds = $this -> logicalUnits [ $id ][ 'dmdId' ];
2019-04-04 16:09:20 +02:00
} else {
2019-11-04 13:40:04 +01:00
$dmdIds = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@DMDID' );
$dmdIds = ( string ) $dmdIds [ 0 ];
2019-04-04 16:09:20 +02:00
}
2019-11-08 19:23:54 +01:00
if ( ! empty ( $dmdIds )) {
2019-11-04 13:40:04 +01:00
// Handle multiple DMDIDs separately.
$dmdIds = explode ( ' ' , $dmdIds );
2019-11-13 13:09:00 +01:00
$hasSupportedMetadata = false ;
2019-11-04 13:40:04 +01:00
} else {
// There is no dmdSec for this structure node.
return [];
}
// Load available metadata formats and dmdSecs.
$this -> loadFormats ();
$this -> _getDmdSec ();
foreach ( $dmdIds as $dmdId ) {
2019-04-04 16:09:20 +02:00
// Is this metadata format supported?
if ( ! empty ( $this -> formats [ $this -> dmdSec [ $dmdId ][ 'type' ]])) {
if ( ! empty ( $this -> formats [ $this -> dmdSec [ $dmdId ][ 'type' ]][ 'class' ])) {
$class = $this -> formats [ $this -> dmdSec [ $dmdId ][ 'type' ]][ 'class' ];
// Get the metadata from class.
2019-10-30 15:37:44 +01:00
if (
class_exists ( $class )
2021-02-17 20:08:21 +01:00
&& ( $obj = GeneralUtility :: makeInstance ( $class )) instanceof MetadataInterface
2019-10-30 15:37:44 +01:00
) {
2019-04-04 16:09:20 +02:00
$obj -> extractMetadata ( $this -> dmdSec [ $dmdId ][ 'xml' ], $metadata );
} else {
2019-10-30 15:37:44 +01:00
Helper :: devLog ( 'Invalid class/method "' . $class . '->extractMetadata()" for metadata format "' . $this -> dmdSec [ $dmdId ][ 'type' ] . '"' , DEVLOG_SEVERITY_WARNING );
2019-04-04 16:09:20 +02:00
}
}
} else {
2019-11-04 13:40:04 +01:00
Helper :: devLog ( 'Unsupported metadata format "' . $this -> dmdSec [ $dmdId ][ 'type' ] . '" in dmdSec with @ID "' . $dmdId . '"' , DEVLOG_SEVERITY_NOTICE );
// Continue searching for supported metadata with next @DMDID.
continue ;
2019-04-04 16:09:20 +02:00
}
// Get the structure's type.
if ( ! empty ( $this -> logicalUnits [ $id ])) {
$metadata [ 'type' ] = [ $this -> logicalUnits [ $id ][ 'type' ]];
} else {
2019-10-30 15:37:44 +01:00
$struct = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $id . '"]/@TYPE' );
2019-11-08 19:23:54 +01:00
if ( ! empty ( $struct )) {
$metadata [ 'type' ] = [( string ) $struct [ 0 ]];
}
2019-04-04 16:09:20 +02:00
}
// Get the additional metadata from database.
2019-12-15 14:18:12 +01:00
$queryBuilder = GeneralUtility :: makeInstance ( ConnectionPool :: class )
-> getQueryBuilderForTable ( 'tx_dlf_metadata' );
2019-12-17 14:38:34 +01:00
// Get hidden records, too.
2019-12-15 14:18:12 +01:00
$queryBuilder
-> getRestrictions ()
-> removeByType ( HiddenRestriction :: class );
2019-12-17 14:38:34 +01:00
// Get all metadata with configured xpath and applicable format first.
$resultWithFormat = $queryBuilder
2019-12-15 14:18:12 +01:00
-> select (
2019-12-15 21:33:21 +01:00
'tx_dlf_metadata.index_name AS index_name' ,
'tx_dlf_metadataformat_joins.xpath AS xpath' ,
'tx_dlf_metadataformat_joins.xpath_sorting AS xpath_sorting' ,
'tx_dlf_metadata.is_sortable AS is_sortable' ,
'tx_dlf_metadata.default_value AS default_value' ,
'tx_dlf_metadata.format AS format'
2019-12-15 14:18:12 +01:00
)
-> from ( 'tx_dlf_metadata' )
-> innerJoin (
'tx_dlf_metadata' ,
'tx_dlf_metadataformat' ,
'tx_dlf_metadataformat_joins' ,
$queryBuilder -> expr () -> eq (
'tx_dlf_metadataformat_joins.parent_id' ,
'tx_dlf_metadata.uid'
)
)
-> innerJoin (
'tx_dlf_metadataformat_joins' ,
'tx_dlf_formats' ,
'tx_dlf_formats_joins' ,
2019-12-17 14:43:58 +01:00
$queryBuilder -> expr () -> eq (
'tx_dlf_formats_joins.uid' ,
'tx_dlf_metadataformat_joins.encoded'
2019-12-15 14:18:12 +01:00
)
)
-> where (
$queryBuilder -> expr () -> eq ( 'tx_dlf_metadata.pid' , intval ( $cPid )),
$queryBuilder -> expr () -> eq ( 'tx_dlf_metadata.l18n_parent' , 0 ),
2019-12-17 14:38:34 +01:00
$queryBuilder -> expr () -> eq ( 'tx_dlf_metadataformat_joins.pid' , intval ( $cPid )),
2019-12-17 14:43:58 +01:00
$queryBuilder -> expr () -> eq ( 'tx_dlf_formats_joins.type' , $queryBuilder -> createNamedParameter ( $this -> dmdSec [ $dmdId ][ 'type' ]))
2019-12-15 14:18:12 +01:00
)
-> execute ();
2019-12-17 14:38:34 +01:00
// Get all metadata without a format, but with a default value next.
$queryBuilder = GeneralUtility :: makeInstance ( ConnectionPool :: class )
-> getQueryBuilderForTable ( 'tx_dlf_metadata' );
// Get hidden records, too.
$queryBuilder
-> getRestrictions ()
-> removeByType ( HiddenRestriction :: class );
$resultWithoutFormat = $queryBuilder
-> select (
'tx_dlf_metadata.index_name AS index_name' ,
'tx_dlf_metadata.is_sortable AS is_sortable' ,
'tx_dlf_metadata.default_value AS default_value' ,
'tx_dlf_metadata.format AS format'
)
-> from ( 'tx_dlf_metadata' )
-> where (
$queryBuilder -> expr () -> eq ( 'tx_dlf_metadata.pid' , intval ( $cPid )),
$queryBuilder -> expr () -> eq ( 'tx_dlf_metadata.l18n_parent' , 0 ),
$queryBuilder -> expr () -> eq ( 'tx_dlf_metadata.format' , 0 ),
2019-12-17 14:43:58 +01:00
$queryBuilder -> expr () -> neq ( 'tx_dlf_metadata.default_value' , $queryBuilder -> createNamedParameter ( '' ))
2019-12-17 14:38:34 +01:00
)
-> execute ();
// Merge both result sets.
$allResults = array_merge ( $resultWithFormat -> fetchAll (), $resultWithoutFormat -> fetchAll ());
2019-04-04 16:09:20 +02:00
// We need a \DOMDocument here, because SimpleXML doesn't support XPath functions properly.
$domNode = dom_import_simplexml ( $this -> dmdSec [ $dmdId ][ 'xml' ]);
$domXPath = new \DOMXPath ( $domNode -> ownerDocument );
$this -> registerNamespaces ( $domXPath );
// OK, now make the XPath queries.
2019-12-15 21:00:40 +01:00
foreach ( $allResults as $resArray ) {
2019-04-04 16:09:20 +02:00
// Set metadata field's value(s).
2019-10-30 15:37:44 +01:00
if (
$resArray [ 'format' ] > 0
2019-06-13 13:10:16 +02:00
&& ! empty ( $resArray [ 'xpath' ])
2019-10-30 15:37:44 +01:00
&& ( $values = $domXPath -> evaluate ( $resArray [ 'xpath' ], $domNode ))
) {
if (
$values instanceof \DOMNodeList
&& $values -> length > 0
) {
2019-04-04 16:09:20 +02:00
$metadata [ $resArray [ 'index_name' ]] = [];
foreach ( $values as $value ) {
$metadata [ $resArray [ 'index_name' ]][] = trim (( string ) $value -> nodeValue );
}
} elseif ( ! ( $values instanceof \DOMNodeList )) {
$metadata [ $resArray [ 'index_name' ]] = [ trim (( string ) $values )];
}
}
// Set default value if applicable.
2019-10-30 15:37:44 +01:00
if (
empty ( $metadata [ $resArray [ 'index_name' ]][ 0 ])
&& strlen ( $resArray [ 'default_value' ]) > 0
) {
2019-04-04 16:09:20 +02:00
$metadata [ $resArray [ 'index_name' ]] = [ $resArray [ 'default_value' ]];
}
// Set sorting value if applicable.
2019-10-30 15:37:44 +01:00
if (
! empty ( $metadata [ $resArray [ 'index_name' ]])
&& $resArray [ 'is_sortable' ]
) {
if (
$resArray [ 'format' ] > 0
2019-06-13 13:10:16 +02:00
&& ! empty ( $resArray [ 'xpath_sorting' ])
2019-10-30 15:37:44 +01:00
&& ( $values = $domXPath -> evaluate ( $resArray [ 'xpath_sorting' ], $domNode ))
) {
if (
$values instanceof \DOMNodeList
&& $values -> length > 0
) {
$metadata [ $resArray [ 'index_name' ] . '_sorting' ][ 0 ] = trim (( string ) $values -> item ( 0 ) -> nodeValue );
2019-04-04 16:09:20 +02:00
} elseif ( ! ( $values instanceof \DOMNodeList )) {
2019-10-30 15:37:44 +01:00
$metadata [ $resArray [ 'index_name' ] . '_sorting' ][ 0 ] = trim (( string ) $values );
2019-04-04 16:09:20 +02:00
}
}
2019-10-30 15:37:44 +01:00
if ( empty ( $metadata [ $resArray [ 'index_name' ] . '_sorting' ][ 0 ])) {
$metadata [ $resArray [ 'index_name' ] . '_sorting' ][ 0 ] = $metadata [ $resArray [ 'index_name' ]][ 0 ];
2019-04-04 16:09:20 +02:00
}
}
}
// Set title to empty string if not present.
if ( empty ( $metadata [ 'title' ][ 0 ])) {
$metadata [ 'title' ][ 0 ] = '' ;
$metadata [ 'title_sorting' ][ 0 ] = '' ;
}
// Add collections from database to toplevel element if document is already saved.
2019-10-30 15:37:44 +01:00
if (
\TYPO3\CMS\Core\Utility\MathUtility :: canBeInterpretedAsInteger ( $this -> uid )
&& $id == $this -> _getToplevelId ()
) {
2020-02-05 14:14:06 +01:00
$queryBuilder = GeneralUtility :: makeInstance ( ConnectionPool :: class )
-> getQueryBuilderForTable ( 'tx_dlf_documents' );
$result = $queryBuilder
-> select (
2020-04-08 16:20:00 +02:00
'tx_dlf_collections_join.index_name AS index_name'
2020-02-05 14:14:06 +01:00
)
-> from ( 'tx_dlf_documents' )
-> innerJoin (
'tx_dlf_documents' ,
'tx_dlf_relations' ,
'tx_dlf_relations_joins' ,
$queryBuilder -> expr () -> eq (
'tx_dlf_relations_joins.uid_local' ,
'tx_dlf_documents.uid'
)
)
-> innerJoin (
'tx_dlf_relations_joins' ,
'tx_dlf_collections' ,
'tx_dlf_collections_join' ,
$queryBuilder -> expr () -> eq (
'tx_dlf_relations_joins.uid_foreign' ,
'tx_dlf_collections_join.uid'
)
)
-> where (
$queryBuilder -> expr () -> eq ( 'tx_dlf_documents.pid' , intval ( $cPid )),
2020-04-08 16:20:00 +02:00
$queryBuilder -> expr () -> eq ( 'tx_dlf_documents.uid' , intval ( $this -> uid ))
2020-02-05 14:14:06 +01:00
)
-> orderBy ( 'tx_dlf_collections_join.index_name' , 'ASC' )
-> execute ();
$allResults = $result -> fetchAll ();
foreach ( $allResults as $resArray ) {
2019-04-04 16:09:20 +02:00
if ( ! in_array ( $resArray [ 'index_name' ], $metadata [ 'collection' ])) {
$metadata [ 'collection' ][] = $resArray [ 'index_name' ];
}
}
}
2019-11-04 13:40:04 +01:00
// Extract metadata only from first supported dmdSec.
2019-11-13 13:09:00 +01:00
$hasSupportedMetadata = true ;
2019-11-04 13:40:04 +01:00
break ;
}
if ( $hasSupportedMetadata ) {
return $metadata ;
2019-04-04 16:09:20 +02:00
} else {
2019-11-04 13:40:04 +01:00
Helper :: devLog ( 'No supported metadata found for logical structure with @ID "' . $id . '"' , DEVLOG_SEVERITY_WARNING );
2019-04-04 16:09:20 +02:00
return [];
}
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: getRawText ()
*/
2019-10-30 15:37:44 +01:00
public function getRawText ( $id )
{
2019-04-04 16:09:20 +02:00
$rawText = '' ;
// Get text from raw text array if available.
if ( ! empty ( $this -> rawTextArray [ $id ])) {
return $this -> rawTextArray [ $id ];
}
// Load fileGrps and check for fulltext files.
$this -> _getFileGrps ();
if ( $this -> hasFulltext ) {
$rawText = $this -> getRawTextFromXml ( $id );
}
return $rawText ;
}
/**
* { @ inheritDoc }
* @ see Document :: getStructureDepth ()
*/
public function getStructureDepth ( $logId )
{
2019-11-08 19:23:54 +01:00
$ancestors = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $logId . '"]/ancestor::*' );
if ( ! empty ( $ancestors )) {
return count ( $ancestors );
} else {
return 0 ;
}
2019-04-04 16:09:20 +02:00
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: init ()
*/
2019-10-30 15:37:44 +01:00
protected function init ()
{
2019-04-04 16:09:20 +02:00
// Get METS node from XML file.
$this -> registerNamespaces ( $this -> xml );
$mets = $this -> xml -> xpath ( '//mets:mets' );
2019-11-08 19:23:54 +01:00
if ( ! empty ( $mets )) {
2019-04-04 16:09:20 +02:00
$this -> mets = $mets [ 0 ];
// Register namespaces.
$this -> registerNamespaces ( $this -> mets );
} else {
2019-10-30 15:37:44 +01:00
Helper :: devLog ( 'No METS part found in document with UID ' . $this -> uid , DEVLOG_SEVERITY_ERROR );
2019-04-04 16:09:20 +02:00
}
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: loadLocation ()
*/
2019-10-30 15:37:44 +01:00
protected function loadLocation ( $location )
{
2019-11-06 16:01:18 +01:00
$fileResource = GeneralUtility :: getUrl ( $location );
2019-11-13 13:09:00 +01:00
if ( $fileResource !== false ) {
2019-11-06 16:01:18 +01:00
// Turn off libxml's error logging.
2019-11-13 13:09:00 +01:00
$libxmlErrors = libxml_use_internal_errors ( true );
2019-11-06 16:01:18 +01:00
// Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
2019-11-13 13:09:00 +01:00
$previousValueOfEntityLoader = libxml_disable_entity_loader ( true );
2019-11-06 16:01:18 +01:00
// Load XML from file.
$xml = simplexml_load_string ( $fileResource );
// reset entity loader setting
libxml_disable_entity_loader ( $previousValueOfEntityLoader );
// Reset libxml's error logging.
libxml_use_internal_errors ( $libxmlErrors );
// Set some basic properties.
2019-11-13 13:09:00 +01:00
if ( $xml !== false ) {
2019-11-06 16:01:18 +01:00
$this -> xml = $xml ;
2019-11-13 13:09:00 +01:00
return true ;
2019-11-06 16:01:18 +01:00
}
2019-04-04 16:09:20 +02:00
}
2019-11-06 16:01:18 +01:00
Helper :: devLog ( 'Could not load XML file from "' . $location . '"' , DEVLOG_SEVERITY_ERROR );
2019-11-13 13:09:00 +01:00
return false ;
2019-04-04 16:09:20 +02:00
}
2019-06-13 11:11:58 +02:00
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: ensureHasFulltextIsSet ()
*/
2019-10-30 15:37:44 +01:00
protected function ensureHasFulltextIsSet ()
{
2019-04-04 16:09:20 +02:00
// Are the fileGrps already loaded?
if ( ! $this -> fileGrpsLoaded ) {
$this -> _getFileGrps ();
}
}
/**
* { @ inheritDoc }
* @ see Document :: getParentDocumentUid ()
*/
2019-04-09 08:33:25 +02:00
protected function getParentDocumentUidForSaving ( $pid , $core )
2019-04-04 16:09:20 +02:00
{
$partof = 0 ;
// Get the closest ancestor of the current document which has a MPTR child.
2019-10-30 15:37:44 +01:00
$parentMptr = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@ID="' . $this -> _getToplevelId () . '"]/ancestor::mets:div[./mets:mptr][1]/mets:mptr' );
2019-11-08 19:23:54 +01:00
if ( ! empty ( $parentMptr )) {
2019-04-04 16:09:20 +02:00
$parentLocation = ( string ) $parentMptr [ 0 ] -> attributes ( 'http://www.w3.org/1999/xlink' ) -> href ;
if ( $parentLocation != $this -> location ) {
$parentDoc = self :: getInstance ( $parentLocation , $pid );
if ( $parentDoc -> ready ) {
if ( $parentDoc -> pid != $pid ) {
$parentDoc -> save ( $pid , $core );
}
$partof = $parentDoc -> uid ;
}
}
}
return $partof ;
}
/**
* { @ inheritDoc }
* @ see Document :: setPreloadedDocument ()
*/
2019-10-30 15:37:44 +01:00
protected function setPreloadedDocument ( $preloadedDocument )
{
2019-04-09 08:56:54 +02:00
2019-04-04 16:09:20 +02:00
if ( $preloadedDocument instanceof \SimpleXMLElement ) {
$this -> xml = $preloadedDocument ;
2019-11-13 13:09:00 +01:00
return true ;
2019-04-04 16:09:20 +02:00
}
2019-11-13 13:09:00 +01:00
return false ;
2019-04-04 16:09:20 +02:00
}
/**
* { @ inheritDoc }
* @ see Document :: getDocument ()
*/
2019-10-30 15:37:44 +01:00
protected function getDocument ()
{
2019-04-04 16:09:20 +02:00
return $this -> mets ;
}
/**
* This returns $this -> cPid via __get ()
*
* @ access protected
*
2019-11-13 12:51:19 +01:00
* @ return int The PID of the metadata definitions
2019-04-04 16:09:20 +02:00
*/
2019-10-30 15:37:44 +01:00
protected function _getCPid ()
{
2019-04-04 16:09:20 +02:00
return $this -> cPid ;
}
/**
* This builds an array of the document ' s dmdSecs
*
* @ access protected
*
* @ return array Array of dmdSecs with their IDs as array key
*/
2019-10-30 15:37:44 +01:00
protected function _getDmdSec ()
{
2019-04-04 16:09:20 +02:00
if ( ! $this -> dmdSecLoaded ) {
// Get available data formats.
$this -> loadFormats ();
// Get dmdSec nodes from METS.
$dmdIds = $this -> mets -> xpath ( './mets:dmdSec/@ID' );
2019-11-08 19:23:54 +01:00
if ( ! empty ( $dmdIds )) {
foreach ( $dmdIds as $dmdId ) {
if ( $type = $this -> mets -> xpath ( './mets:dmdSec[@ID="' . ( string ) $dmdId . '"]/mets:mdWrap[not(@MDTYPE="OTHER")]/@MDTYPE' )) {
if ( ! empty ( $this -> formats [( string ) $type [ 0 ]])) {
$type = ( string ) $type [ 0 ];
$xml = $this -> mets -> xpath ( './mets:dmdSec[@ID="' . ( string ) $dmdId . '"]/mets:mdWrap[@MDTYPE="' . $type . '"]/mets:xmlData/' . strtolower ( $type ) . ':' . $this -> formats [ $type ][ 'rootElement' ]);
}
} elseif ( $type = $this -> mets -> xpath ( './mets:dmdSec[@ID="' . ( string ) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"]/@OTHERMDTYPE' )) {
if ( ! empty ( $this -> formats [( string ) $type [ 0 ]])) {
$type = ( string ) $type [ 0 ];
$xml = $this -> mets -> xpath ( './mets:dmdSec[@ID="' . ( string ) $dmdId . '"]/mets:mdWrap[@MDTYPE="OTHER"][@OTHERMDTYPE="' . $type . '"]/mets:xmlData/' . strtolower ( $type ) . ':' . $this -> formats [ $type ][ 'rootElement' ]);
}
2019-04-04 16:09:20 +02:00
}
2019-11-08 19:23:54 +01:00
if ( ! empty ( $xml )) {
$this -> dmdSec [( string ) $dmdId ][ 'type' ] = $type ;
$this -> dmdSec [( string ) $dmdId ][ 'xml' ] = $xml [ 0 ];
$this -> registerNamespaces ( $this -> dmdSec [( string ) $dmdId ][ 'xml' ]);
2019-04-04 16:09:20 +02:00
}
}
}
2019-11-13 13:09:00 +01:00
$this -> dmdSecLoaded = true ;
2019-04-04 16:09:20 +02:00
}
return $this -> dmdSec ;
}
/**
* This builds the file ID -> USE concordance
*
* @ access protected
*
* @ return array Array of file use groups with file IDs
*/
2019-10-30 15:37:44 +01:00
protected function _getFileGrps ()
{
2019-04-04 16:09:20 +02:00
if ( ! $this -> fileGrpsLoaded ) {
// Get configured USE attributes.
$extConf = unserialize ( $GLOBALS [ 'TYPO3_CONF_VARS' ][ 'EXT' ][ 'extConf' ][ self :: $extKey ]);
2021-02-17 21:48:20 +01:00
$useGrps = GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpImages' ]);
2019-04-04 16:09:20 +02:00
if ( ! empty ( $extConf [ 'fileGrpThumbs' ])) {
2021-02-17 20:08:21 +01:00
$useGrps = array_merge ( $useGrps , GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpThumbs' ]));
2019-04-04 16:09:20 +02:00
}
if ( ! empty ( $extConf [ 'fileGrpDownload' ])) {
2021-02-17 20:08:21 +01:00
$useGrps = array_merge ( $useGrps , GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpDownload' ]));
2019-04-04 16:09:20 +02:00
}
if ( ! empty ( $extConf [ 'fileGrpFulltext' ])) {
2021-02-17 20:08:21 +01:00
$useGrps = array_merge ( $useGrps , GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpFulltext' ]));
2019-04-04 16:09:20 +02:00
}
if ( ! empty ( $extConf [ 'fileGrpAudio' ])) {
2021-02-17 20:08:21 +01:00
$useGrps = array_merge ( $useGrps , GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpAudio' ]));
2019-04-04 16:09:20 +02:00
}
// Get all file groups.
$fileGrps = $this -> mets -> xpath ( './mets:fileSec/mets:fileGrp' );
2019-11-08 19:23:54 +01:00
if ( ! empty ( $fileGrps )) {
// Build concordance for configured USE attributes.
foreach ( $fileGrps as $fileGrp ) {
if ( in_array (( string ) $fileGrp [ 'USE' ], $useGrps )) {
foreach ( $fileGrp -> children ( 'http://www.loc.gov/METS/' ) -> file as $file ) {
$this -> fileGrps [( string ) $file -> attributes () -> ID ] = ( string ) $fileGrp [ 'USE' ];
}
2019-04-04 16:09:20 +02:00
}
}
}
// Are there any fulltext files available?
2019-10-30 15:37:44 +01:00
if (
! empty ( $extConf [ 'fileGrpFulltext' ])
2021-02-17 20:08:21 +01:00
&& array_intersect ( GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpFulltext' ]), $this -> fileGrps ) !== []
2019-10-30 15:37:44 +01:00
) {
2019-11-13 13:09:00 +01:00
$this -> hasFulltext = true ;
2019-04-09 08:33:25 +02:00
}
2019-11-13 13:09:00 +01:00
$this -> fileGrpsLoaded = true ;
2019-04-04 16:09:20 +02:00
}
return $this -> fileGrps ;
}
2019-06-13 11:11:58 +02:00
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: prepareMetadataArray ()
*/
2019-10-30 15:37:44 +01:00
protected function prepareMetadataArray ( $cPid )
{
2019-11-08 19:23:54 +01:00
$ids = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID]/@ID' );
2019-04-04 16:09:20 +02:00
// Get all logical structure nodes with metadata.
2019-11-08 19:23:54 +01:00
if ( ! empty ( $ids )) {
2019-04-04 16:09:20 +02:00
foreach ( $ids as $id ) {
$this -> metadataArray [( string ) $id ] = $this -> getMetadata (( string ) $id , $cPid );
}
}
// Set current PID for metadata definitions.
}
2019-04-09 08:56:54 +02:00
2019-04-04 16:09:20 +02:00
/**
* This returns $this -> mets via __get ()
*
* @ access protected
*
* @ return \SimpleXMLElement The XML ' s METS part as \SimpleXMLElement object
*/
2019-10-30 15:37:44 +01:00
protected function _getMets ()
{
2019-04-04 16:09:20 +02:00
return $this -> mets ;
}
2019-06-13 11:11:58 +02:00
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: _getPhysicalStructure ()
*/
2019-10-30 15:37:44 +01:00
protected function _getPhysicalStructure ()
{
2019-04-04 16:09:20 +02:00
// Is there no physical structure array yet?
if ( ! $this -> physicalStructureLoaded ) {
// Does the document have a structMap node of type "PHYSICAL"?
$elementNodes = $this -> mets -> xpath ( './mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]/mets:div' );
2019-11-08 19:23:54 +01:00
if ( ! empty ( $elementNodes )) {
2019-04-04 16:09:20 +02:00
// Get file groups.
$fileUse = $this -> _getFileGrps ();
// Get the physical sequence's metadata.
$physNode = $this -> mets -> xpath ( './mets:structMap[@TYPE="PHYSICAL"]/mets:div[@TYPE="physSequence"]' );
$physSeq [ 0 ] = ( string ) $physNode [ 0 ][ 'ID' ];
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'id' ] = ( string ) $physNode [ 0 ][ 'ID' ];
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'dmdId' ] = ( isset ( $physNode [ 0 ][ 'DMDID' ]) ? ( string ) $physNode [ 0 ][ 'DMDID' ] : '' );
2019-10-30 14:57:59 +01:00
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'order' ] = ( isset ( $physNode [ 0 ][ 'ORDER' ]) ? ( string ) $physNode [ 0 ][ 'ORDER' ] : '' );
2019-04-04 16:09:20 +02:00
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'label' ] = ( isset ( $physNode [ 0 ][ 'LABEL' ]) ? ( string ) $physNode [ 0 ][ 'LABEL' ] : '' );
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'orderlabel' ] = ( isset ( $physNode [ 0 ][ 'ORDERLABEL' ]) ? ( string ) $physNode [ 0 ][ 'ORDERLABEL' ] : '' );
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'type' ] = ( string ) $physNode [ 0 ][ 'TYPE' ];
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'contentIds' ] = ( isset ( $physNode [ 0 ][ 'CONTENTIDS' ]) ? ( string ) $physNode [ 0 ][ 'CONTENTIDS' ] : '' );
// Get the file representations from fileSec node.
foreach ( $physNode [ 0 ] -> children ( 'http://www.loc.gov/METS/' ) -> fptr as $fptr ) {
// Check if file has valid @USE attribute.
if ( ! empty ( $fileUse [( string ) $fptr -> attributes () -> FILEID ])) {
$this -> physicalStructureInfo [ $physSeq [ 0 ]][ 'files' ][ $fileUse [( string ) $fptr -> attributes () -> FILEID ]] = ( string ) $fptr -> attributes () -> FILEID ;
}
}
// Build the physical elements' array from the physical structMap node.
foreach ( $elementNodes as $elementNode ) {
$elements [( int ) $elementNode [ 'ORDER' ]] = ( string ) $elementNode [ 'ID' ];
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'id' ] = ( string ) $elementNode [ 'ID' ];
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'dmdId' ] = ( isset ( $elementNode [ 'DMDID' ]) ? ( string ) $elementNode [ 'DMDID' ] : '' );
2019-10-30 14:57:59 +01:00
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'order' ] = ( isset ( $elementNode [ 'ORDER' ]) ? ( string ) $elementNode [ 'ORDER' ] : '' );
2019-04-04 16:09:20 +02:00
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'label' ] = ( isset ( $elementNode [ 'LABEL' ]) ? ( string ) $elementNode [ 'LABEL' ] : '' );
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'orderlabel' ] = ( isset ( $elementNode [ 'ORDERLABEL' ]) ? ( string ) $elementNode [ 'ORDERLABEL' ] : '' );
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'type' ] = ( string ) $elementNode [ 'TYPE' ];
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'contentIds' ] = ( isset ( $elementNode [ 'CONTENTIDS' ]) ? ( string ) $elementNode [ 'CONTENTIDS' ] : '' );
// Get the file representations from fileSec node.
foreach ( $elementNode -> children ( 'http://www.loc.gov/METS/' ) -> fptr as $fptr ) {
// Check if file has valid @USE attribute.
if ( ! empty ( $fileUse [( string ) $fptr -> attributes () -> FILEID ])) {
$this -> physicalStructureInfo [ $elements [( int ) $elementNode [ 'ORDER' ]]][ 'files' ][ $fileUse [( string ) $fptr -> attributes () -> FILEID ]] = ( string ) $fptr -> attributes () -> FILEID ;
}
}
}
// Sort array by keys (= @ORDER).
if ( ksort ( $elements )) {
// Set total number of pages/tracks.
$this -> numPages = count ( $elements );
// Merge and re-index the array to get nice numeric indexes.
$this -> physicalStructure = array_merge ( $physSeq , $elements );
}
}
2019-11-13 13:09:00 +01:00
$this -> physicalStructureLoaded = true ;
2019-04-04 16:09:20 +02:00
}
return $this -> physicalStructure ;
}
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: _getSmLinks ()
*/
2019-10-30 15:37:44 +01:00
protected function _getSmLinks ()
{
2019-04-04 16:09:20 +02:00
if ( ! $this -> smLinksLoaded ) {
$smLinks = $this -> mets -> xpath ( './mets:structLink/mets:smLink' );
2019-11-08 19:23:54 +01:00
if ( ! empty ( $smLinks )) {
foreach ( $smLinks as $smLink ) {
$this -> smLinks [ 'l2p' ][( string ) $smLink -> attributes ( 'http://www.w3.org/1999/xlink' ) -> from ][] = ( string ) $smLink -> attributes ( 'http://www.w3.org/1999/xlink' ) -> to ;
$this -> smLinks [ 'p2l' ][( string ) $smLink -> attributes ( 'http://www.w3.org/1999/xlink' ) -> to ][] = ( string ) $smLink -> attributes ( 'http://www.w3.org/1999/xlink' ) -> from ;
}
2019-04-04 16:09:20 +02:00
}
2019-11-13 13:09:00 +01:00
$this -> smLinksLoaded = true ;
2019-04-04 16:09:20 +02:00
}
return $this -> smLinks ;
}
2019-06-13 15:33:30 +02:00
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: _getThumbnail ()
*/
2019-11-13 13:09:00 +01:00
protected function _getThumbnail ( $forceReload = false )
2019-10-30 15:37:44 +01:00
{
if (
! $this -> thumbnailLoaded
|| $forceReload
) {
2019-06-17 13:50:06 +02:00
// Retain current PID.
$cPid = ( $this -> cPid ? $this -> cPid : $this -> pid );
if ( ! $cPid ) {
2019-10-30 15:37:44 +01:00
Helper :: devLog ( 'Invalid PID ' . $cPid . ' for structure definitions' , DEVLOG_SEVERITY_ERROR );
2019-11-13 13:09:00 +01:00
$this -> thumbnailLoaded = true ;
2019-06-17 13:50:06 +02:00
return $this -> thumbnail ;
}
// Load extension configuration.
$extConf = unserialize ( $GLOBALS [ 'TYPO3_CONF_VARS' ][ 'EXT' ][ 'extConf' ][ self :: $extKey ]);
if ( empty ( $extConf [ 'fileGrpThumbs' ])) {
Helper :: devLog ( 'No fileGrp for thumbnails specified' , DEVLOG_SEVERITY_WARNING );
2019-11-13 13:09:00 +01:00
$this -> thumbnailLoaded = true ;
2019-06-17 13:50:06 +02:00
return $this -> thumbnail ;
}
$strctId = $this -> _getToplevelId ();
$metadata = $this -> getTitledata ( $cPid );
2019-08-22 09:41:15 +02:00
$queryBuilder = GeneralUtility :: makeInstance ( ConnectionPool :: class )
-> getQueryBuilderForTable ( 'tx_dlf_structures' );
2019-06-17 13:50:06 +02:00
// Get structure element to get thumbnail from.
2019-08-22 09:41:15 +02:00
$result = $queryBuilder
-> select ( 'tx_dlf_structures.thumbnail AS thumbnail' )
-> from ( 'tx_dlf_structures' )
-> where (
$queryBuilder -> expr () -> eq ( 'tx_dlf_structures.pid' , intval ( $cPid )),
$queryBuilder -> expr () -> eq ( 'tx_dlf_structures.index_name' , $queryBuilder -> expr () -> literal ( $metadata [ 'type' ][ 0 ])),
Helper :: whereExpression ( 'tx_dlf_structures' )
)
-> setMaxResults ( 1 )
-> execute ();
$allResults = $result -> fetchAll ();
if ( count ( $allResults ) == 1 ) {
$resArray = $allResults [ 0 ];
2019-06-17 13:50:06 +02:00
// Get desired thumbnail structure if not the toplevel structure itself.
if ( ! empty ( $resArray [ 'thumbnail' ])) {
$strctType = Helper :: getIndexNameFromUid ( $resArray [ 'thumbnail' ], 'tx_dlf_structures' , $cPid );
// Check if this document has a structure element of the desired type.
2019-10-30 15:37:44 +01:00
$strctIds = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@TYPE="' . $strctType . '"]/@ID' );
2019-06-17 13:50:06 +02:00
if ( ! empty ( $strctIds )) {
$strctId = ( string ) $strctIds [ 0 ];
2019-06-13 15:33:30 +02:00
}
}
2019-06-17 13:50:06 +02:00
// Load smLinks.
$this -> _getSmLinks ();
// Get thumbnail location.
2021-02-23 09:26:17 +01:00
$fileGrpsThumb = GeneralUtility :: trimExplode ( ',' , $extConf [ 'fileGrpThumbs' ]);
while ( $fileGrpThumb = array_shift ( $fileGrpsThumb )) {
2021-02-17 20:08:21 +01:00
if (
$this -> _getPhysicalStructure ()
&& ! empty ( $this -> smLinks [ 'l2p' ][ $strctId ])
&& ! empty ( $this -> physicalStructureInfo [ $this -> smLinks [ 'l2p' ][ $strctId ][ 0 ]][ 'files' ][ $fileGrpThumb ])
) {
$this -> thumbnail = $this -> getFileLocation ( $this -> physicalStructureInfo [ $this -> smLinks [ 'l2p' ][ $strctId ][ 0 ]][ 'files' ][ $fileGrpThumb ]);
break ;
} elseif ( ! empty ( $this -> physicalStructureInfo [ $this -> physicalStructure [ 1 ]][ 'files' ][ $fileGrpThumb ])) {
$this -> thumbnail = $this -> getFileLocation ( $this -> physicalStructureInfo [ $this -> physicalStructure [ 1 ]][ 'files' ][ $fileGrpThumb ]);
break ;
}
2019-06-17 14:00:51 +02:00
}
2019-06-17 13:50:06 +02:00
} else {
2019-10-30 15:37:44 +01:00
Helper :: devLog ( 'No structure of type "' . $metadata [ 'type' ][ 0 ] . '" found in database' , DEVLOG_SEVERITY_ERROR );
2019-06-13 15:33:30 +02:00
}
2019-11-13 13:09:00 +01:00
$this -> thumbnailLoaded = true ;
2019-06-17 13:50:06 +02:00
}
return $this -> thumbnail ;
2019-06-13 15:33:30 +02:00
}
2019-04-04 16:09:20 +02:00
/**
* { @ inheritDoc }
* @ see \Kitodo\Dlf\Common\Document :: _getToplevelId ()
*/
2019-10-30 15:37:44 +01:00
protected function _getToplevelId ()
{
2019-04-04 16:09:20 +02:00
if ( empty ( $this -> toplevelId )) {
// Get all logical structure nodes with metadata, but without associated METS-Pointers.
2019-11-08 19:23:54 +01:00
$divs = $this -> mets -> xpath ( './mets:structMap[@TYPE="LOGICAL"]//mets:div[@DMDID and not(./mets:mptr)]' );
if ( ! empty ( $divs )) {
2019-04-04 16:09:20 +02:00
// Load smLinks.
$this -> _getSmLinks ();
foreach ( $divs as $div ) {
$id = ( string ) $div [ 'ID' ];
// Are there physical structure nodes for this logical structure?
if ( array_key_exists ( $id , $this -> smLinks [ 'l2p' ])) {
// Yes. That's what we're looking for.
$this -> toplevelId = $id ;
break ;
} elseif ( empty ( $this -> toplevelId )) {
// No. Remember this anyway, but keep looking for a better one.
$this -> toplevelId = $id ;
}
}
}
}
return $this -> toplevelId ;
}
/**
* This magic method is executed prior to any serialization of the object
* @ see __wakeup ()
*
* @ access public
*
* @ return array Properties to be serialized
*/
2019-10-30 15:37:44 +01:00
public function __sleep ()
{
2019-04-04 16:09:20 +02:00
// \SimpleXMLElement objects can't be serialized, thus save the XML as string for serialization
$this -> asXML = $this -> xml -> asXML ();
return [ 'uid' , 'pid' , 'recordId' , 'parentId' , 'asXML' ];
}
2019-04-09 08:56:54 +02:00
2019-04-04 16:09:20 +02:00
/**
* This magic method is used for setting a string value for the object
*
* @ access public
*
* @ return string String representing the METS object
*/
2019-10-30 15:37:44 +01:00
public function __toString ()
{
2019-04-04 16:09:20 +02:00
$xml = new \DOMDocument ( '1.0' , 'utf-8' );
2019-11-13 13:09:00 +01:00
$xml -> appendChild ( $xml -> importNode ( dom_import_simplexml ( $this -> mets ), true ));
$xml -> formatOutput = true ;
2019-04-04 16:09:20 +02:00
return $xml -> saveXML ();
}
2019-04-09 08:33:25 +02:00
2019-04-04 16:09:20 +02:00
/**
* This magic method is executed after the object is deserialized
* @ see __sleep ()
*
* @ access public
*
* @ return void
*/
2019-10-30 15:37:44 +01:00
public function __wakeup ()
{
2019-04-04 16:09:20 +02:00
// Turn off libxml's error logging.
2019-11-13 13:09:00 +01:00
$libxmlErrors = libxml_use_internal_errors ( true );
2019-04-04 16:09:20 +02:00
// Reload XML from string.
$xml = @ simplexml_load_string ( $this -> asXML );
// Reset libxml's error logging.
libxml_use_internal_errors ( $libxmlErrors );
2019-11-13 13:09:00 +01:00
if ( $xml !== false ) {
2019-04-04 16:09:20 +02:00
$this -> asXML = '' ;
$this -> xml = $xml ;
// Rebuild the unserializable properties.
$this -> init ();
} else {
Helper :: devLog ( 'Could not load XML after deserialization' , DEVLOG_SEVERITY_ERROR );
}
}
}