Merge branch 'master' into rise-number-of-snippets

This commit is contained in:
Alexander Bigga 2021-10-06 16:51:12 +02:00 committed by GitHub
commit 0e9125a7b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 32 additions and 57 deletions

View File

@ -516,17 +516,7 @@ abstract class Document
}
$content = GeneralUtility::getUrl($location);
if ($content !== false) {
// TODO use single place to load xml
// Turn off libxml's error logging.
$libxmlErrors = libxml_use_internal_errors(true);
// Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
$previousValueOfEntityLoader = libxml_disable_entity_loader(true);
// Try to load XML from file.
$xml = simplexml_load_string($content);
// reset entity loader setting
libxml_disable_entity_loader($previousValueOfEntityLoader);
// Reset libxml's error logging.
libxml_use_internal_errors($libxmlErrors);
$xml = Helper::getXmlFileAsString($content);
if ($xml !== false) {
/* @var $xml \SimpleXMLElement */
$xml->registerXPathNamespace('mets', 'http://www.loc.gov/METS/');
@ -703,7 +693,7 @@ abstract class Document
&& ($obj = GeneralUtility::makeInstance($class)) instanceof FulltextInterface
) {
// Load XML from file.
$ocrTextXml = $this->getXmlObject($fileContent);
$ocrTextXml = Helper::getXmlFileAsString($fileContent);
$textMiniOcr = $obj->getTextAsMiniOcr($ocrTextXml);
$this->rawTextArray[$id] = $textMiniOcr;
} else {
@ -729,32 +719,7 @@ abstract class Document
private function getTextFormat($fileContent)
{
// Get the root element's name as text format.
return strtoupper($this->getXmlObject($fileContent)->getName());
}
/**
* Get the OCR full text as object
*
* @access private
*
* @param string $fileContent: content of the XML file
*
* @return \SimpleXMLElement The OCR full text as object
*/
private function getXmlObject($fileContent)
{
// Turn off libxml's error logging.
$libxmlErrors = libxml_use_internal_errors(true);
// Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept.
$previousValueOfEntityLoader = libxml_disable_entity_loader(true);
// Load XML from file.
$ocrTextXml = simplexml_load_string($fileContent);
// Reset entity loader setting.
libxml_disable_entity_loader($previousValueOfEntityLoader);
// Reset libxml's error logging.
libxml_use_internal_errors($libxmlErrors);
// Get the root element.
return $ocrTextXml;
return strtoupper(Helper::getXmlFileAsString($fileContent)->getName());
}
/**

View File

@ -195,6 +195,30 @@ class Helper
return $decrypted;
}
/**
* Get content of XML file as string or false if there is nothing.
*
* @access public
*
* @param string $content: content of file to read
*
* @return mixed
*/
public static function getXmlFileAsString($content)
{
// Turn off libxml's error logging.
$libxmlErrors = libxml_use_internal_errors(true);
// Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
$previousValueOfEntityLoader = libxml_disable_entity_loader(true);
// Try to load XML from file.
$xml = simplexml_load_string($content);
// reset entity loader setting
libxml_disable_entity_loader($previousValueOfEntityLoader);
// Reset libxml's error logging.
libxml_use_internal_errors($libxmlErrors);
return $xml;
}
/**
* Add a message to the TYPO3 log
*
@ -221,7 +245,7 @@ class Helper
case 2:
$logger->warning($message);
break;
case 23:
case 3:
$logger->error($message);
break;
default:

View File

@ -721,16 +721,7 @@ final class MetsDocument extends Document
{
$fileResource = GeneralUtility::getUrl($location);
if ($fileResource !== false) {
// Turn off libxml's error logging.
$libxmlErrors = libxml_use_internal_errors(true);
// Disables the functionality to allow external entities to be loaded when parsing the XML, must be kept
$previousValueOfEntityLoader = libxml_disable_entity_loader(true);
// Load XML from file.
$xml = simplexml_load_string($fileResource);
// reset entity loader setting
libxml_disable_entity_loader($previousValueOfEntityLoader);
// Reset libxml's error logging.
libxml_use_internal_errors($libxmlErrors);
$xml = Helper::getXmlFileAsString($fileResource);
// Set some basic properties.
if ($xml !== false) {
$this->xml = $xml;
@ -1145,12 +1136,7 @@ final class MetsDocument extends Document
*/
public function __wakeup()
{
// Turn off libxml's error logging.
$libxmlErrors = libxml_use_internal_errors(true);
// Reload XML from string.
$xml = @simplexml_load_string($this->asXML);
// Reset libxml's error logging.
libxml_use_internal_errors($libxmlErrors);
$xml = Helper::getXmlFileAsString($this->asXML);
if ($xml !== false) {
$this->asXML = '';
$this->xml = $xml;

View File

@ -73,7 +73,7 @@ dlfViewerFullTextDownloadControl.prototype.createFullTextFile = function() {
/**
* Append text line
*
* @param {string} textLine
* @param {ol.Feature} textLine
*/
dlfViewerFullTextDownloadControl.prototype.appendTextLine = function(textLine) {
var fileContent = '';
@ -89,5 +89,5 @@ dlfViewerFullTextDownloadControl.prototype.appendTextLine = function(textLine) {
}
}
}
return fileContent.concat(fileContent);
return fileContent;
};