kitodo-presentation/Classes/Common/Solr.php

530 lines
16 KiB
PHP
Raw Normal View History

2011-03-09 16:36:27 +01:00
<?php
2019-03-13 08:29:51 +01:00
namespace Kitodo\Dlf\Common;
2011-03-09 16:36:27 +01:00
/**
2016-09-23 12:24:46 +02:00
* (c) Kitodo. Key to digital objects e.V. <contact@kitodo.org>
*
* This file is part of the Kitodo and TYPO3 projects.
*
* @license GNU General Public License version 3 or later.
* For the full copyright and license information, please read the
* LICENSE.txt file that was distributed with this source code.
2011-03-09 16:36:27 +01:00
*/
/**
2019-03-14 19:57:18 +01:00
* Solr class for the 'dlf' extension
*
2019-03-14 19:57:18 +01:00
* @author Sebastian Meyer <sebastian.meyer@slub-dresden.de>
* @author Henrik Lochmann <dev@mentalmotive.com>
* @package TYPO3
* @subpackage dlf
* @access public
*/
2019-03-14 22:41:35 +01:00
class Solr {
/**
* This holds the core name
*
* @var string
* @access protected
*/
protected $core = '';
/**
* This holds the PID for the configuration
*
* @var integer
* @access protected
*/
protected $cPid = 0;
/**
* The extension key
*
* @var string
* @access public
*/
public static $extKey = 'dlf';
/**
* This holds the max results
*
* @var integer
* @access protected
*/
protected $limit = 50000;
/**
* This holds the number of hits for last search
*
* @var integer
* @access protected
*/
protected $numberOfHits = 0;
/**
* This holds the additional query parameters
*
* @var array
* @access protected
*/
2019-03-14 17:39:19 +01:00
protected $params = [];
/**
* Is the search instantiated successfully?
*
* @var boolean
* @access protected
*/
protected $ready = FALSE;
/**
* This holds the singleton search objects with their core as array key
*
2019-03-13 08:29:51 +01:00
* @var array (\Kitodo\Dlf\Common\Solr)
* @access protected
*/
2019-03-14 17:39:19 +01:00
protected static $registry = [];
/**
* This holds the Solr service object
*
2019-03-14 19:57:18 +01:00
* @var \Solarium\Client
* @access protected
*/
protected $service;
/**
* Escape all special characters in a query string
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param string $query: The query string
*
2019-03-14 19:57:18 +01:00
* @return string The escaped query string
*/
2019-03-14 22:41:35 +01:00
public static function escapeQuery($query) {
2019-03-13 17:55:50 +01:00
$helper = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\Solarium\Core\Query\Helper::class);
// Escape query phrase or term.
2019-03-14 22:41:35 +01:00
if (preg_match('/^".*"$/', $query)) {
return '"'.$helper->escapePhrase(trim($query, '"')).'"';
} else {
return $helper->escapeTerm($query);
}
}
/**
* Escape all special characters in a query string while retaining valid field queries
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param string $query: The query string
* @param integer $pid: The PID for the field configuration
*
2019-03-14 19:57:18 +01:00
* @return string The escaped query string
*/
2019-03-14 22:41:35 +01:00
public static function escapeQueryKeepField($query, $pid) {
// Is there a field query?
2019-03-14 22:41:35 +01:00
if (preg_match('/^[[:alnum:]]+_[tu][su]i:\(?.*\)?$/', $query)) {
// Get all indexed fields.
2019-03-14 17:39:19 +01:00
$fields = [];
$result = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
'tx_dlf_metadata.index_name,tx_dlf_metadata.index_tokenized,tx_dlf_metadata.index_stored',
'tx_dlf_metadata',
2019-03-14 19:57:18 +01:00
'tx_dlf_metadata.index_indexed=1'
.' AND tx_dlf_metadata.pid='.intval($pid)
.' AND (tx_dlf_metadata.sys_language_uid IN (-1,0) OR tx_dlf_metadata.l18n_parent=0)'
.Helper::whereClause('tx_dlf_metadata'),
'',
'',
''
);
2019-03-14 22:41:35 +01:00
if ($GLOBALS['TYPO3_DB']->sql_num_rows($result) > 0) {
while ($resArray = $GLOBALS['TYPO3_DB']->sql_fetch_row($result)) {
$fields[] = $resArray[0].'_'.($resArray[1] ? 't' : 'u').($resArray[2] ? 's' : 'u').'i';
}
}
// Check if queried field is valid.
$splitQuery = explode(':', $query, 2);
2019-03-14 22:41:35 +01:00
if (in_array($splitQuery[0], $fields)) {
$query = $splitQuery[0].':('.self::escapeQuery(trim($splitQuery[1], '()')).')';
} else {
$query = self::escapeQuery($query);
}
2019-03-14 19:57:18 +01:00
} elseif (!empty($query)
2019-03-14 22:41:35 +01:00
&& $query !== '*') {
// Don't escape plain asterisk search.
$query = self::escapeQuery($query);
}
return $query;
}
/**
* This is a singleton class, thus instances must be created by this method
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param mixed $core: Name or UID of the core to load
*
2019-03-14 19:57:18 +01:00
* @return \Kitodo\Dlf\Common\Solr Instance of this class
*/
2019-03-14 22:41:35 +01:00
public static function getInstance($core) {
// Save parameter for logging purposes.
$_core = $core;
// Get core name if UID is given.
2019-03-14 22:41:35 +01:00
if (\TYPO3\CMS\Core\Utility\MathUtility::canBeInterpretedAsInteger($core)) {
2019-03-13 08:29:51 +01:00
$core = Helper::getIndexName($core, 'tx_dlf_solrcores');
}
// Check if core is set.
2019-03-14 22:41:35 +01:00
if (empty($core)) {
2019-03-15 11:03:54 +01:00
Helper::devLog('Invalid core name "'.$core.'" for Apache Solr', DEVLOG_SEVERITY_ERROR);
return;
}
// Check if there is an instance in the registry already.
2019-03-14 19:57:18 +01:00
if (is_object(self::$registry[$core])
2019-03-14 22:41:35 +01:00
&& self::$registry[$core] instanceof self) {
// Return singleton instance if available.
return self::$registry[$core];
}
// Create new instance...
$instance = new self($core);
// ...and save it to registry.
2019-03-14 22:41:35 +01:00
if ($instance->ready) {
self::$registry[$core] = $instance;
// Return new instance.
return $instance;
} else {
2019-03-15 11:03:54 +01:00
Helper::devLog('Could not connect to Apache Solr server', DEVLOG_SEVERITY_ERROR);
return;
}
}
2011-03-09 16:36:27 +01:00
/**
2018-10-15 17:32:37 +02:00
* Returns the connection information for Solr
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @return string The connection parameters for a specific Solr core
*/
2019-03-14 22:41:35 +01:00
public static function getSolrConnectionInfo() {
2019-03-14 17:39:19 +01:00
$solrInfo = [];
// Extract extension configuration.
$conf = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf'][self::$extKey]);
// Derive Solr scheme
2018-11-06 13:04:37 +01:00
$solrInfo['scheme'] = empty($conf['solrHttps']) ? 'http' : 'https';
// Derive Solr host name.
$solrInfo['host'] = ($conf['solrHost'] ? $conf['solrHost'] : '127.0.0.1');
2018-10-15 17:32:37 +02:00
// Set username and password.
$solrInfo['username'] = $conf['solrUser'];
$solrInfo['password'] = $conf['solrPass'];
// Set port if not set.
$solrInfo['port'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($conf['solrPort'], 1, 65535, 8983);
// Append core name to path.
$solrInfo['path'] = trim($conf['solrPath'], '/');
2018-11-15 16:20:10 +01:00
// Timeout
2018-11-15 16:35:38 +01:00
$solrInfo['timeout'] = \TYPO3\CMS\Core\Utility\MathUtility::forceIntegerInRange($conf['solrTimeout'], 1, intval(ini_get('max_execution_time')), 10);
return $solrInfo;
}
/**
* Returns the request URL for a specific Solr core
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param string $core: Name of the core to load
*
2019-03-14 19:57:18 +01:00
* @return string The request URL for a specific Solr core
*/
2019-03-14 22:41:35 +01:00
public static function getSolrUrl($core = '') {
// Get Solr connection information.
2018-10-15 17:32:37 +02:00
$solrInfo = self::getSolrConnectionInfo();
2019-03-14 19:57:18 +01:00
if ($solrInfo['username']
2019-03-14 22:41:35 +01:00
&& $solrInfo['password']) {
2018-10-15 17:32:37 +02:00
$host = $solrInfo['username'].':'.$solrInfo['password'].'@'.$solrInfo['host'];
} else {
2018-10-15 17:32:37 +02:00
$host = $solrInfo['host'];
}
// Return entire request URL.
return $solrInfo['scheme'].'://'.$host.':'.$solrInfo['port'].'/'.$solrInfo['path'].'/'.$core;
}
/**
* Get next unused Solr core number
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param integer $start: Number to start with
*
2019-03-14 19:57:18 +01:00
* @return integer First unused core number found
*/
2019-03-14 22:41:35 +01:00
public static function solrGetCoreNumber($start = 0) {
$start = max(intval($start), 0);
// Check if core already exists.
2019-03-14 22:41:35 +01:00
if (self::getInstance('dlfCore'.$start) === NULL) {
return $start;
} else {
return self::solrGetCoreNumber($start + 1);
}
}
2011-03-09 16:36:27 +01:00
/**
* Processes a search request.
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @return \Kitodo\Dlf\Common\DocumentList The result list
*/
2019-03-14 22:41:35 +01:00
public function search() {
2019-03-14 17:39:19 +01:00
$toplevel = [];
// Take over query parameters.
2018-12-04 16:34:07 +01:00
$params = $this->params;
2019-03-14 17:39:19 +01:00
$params['filterquery'] = isset($params['filterquery']) ? $params['filterquery'] : [];
// Set some query parameters.
$params['start'] = 0;
$params['rows'] = 0;
// Perform search to determine the total number of hits without fetching them.
$selectQuery = $this->service->createSelect($params);
$results = $this->service->select($selectQuery);
$this->numberOfHits = $results->getNumFound();
// Restore query parameters
2018-12-04 16:34:07 +01:00
$params = $this->params;
2019-03-14 17:39:19 +01:00
$params['filterquery'] = isset($params['filterquery']) ? $params['filterquery'] : [];
// Restrict the fields to the required ones.
$params['fields'] = 'uid,id';
// Extend filter query to get all documents with the same uids.
2019-03-14 22:41:35 +01:00
foreach ($params['filterquery'] as $key => $value) {
if (isset($value['query'])) {
$params['filterquery'][$key]['query'] = '{!join from=uid to=uid}'.$value['query'];
}
}
// Set filter query to just get toplevel documents.
2019-03-14 17:39:19 +01:00
$params['filterquery'][] = ['query' => 'toplevel:true'];
// Set join query to get all documents with the same uids.
2018-12-04 16:34:07 +01:00
$params['query'] = '{!join from=uid to=uid}'.$params['query'];
// Perform search to determine the total number of toplevel hits and fetch the required rows.
$selectQuery = $this->service->createSelect($params);
$results = $this->service->select($selectQuery);
$numberOfToplevelHits = $results->getNumFound();
// Process results.
2019-03-14 22:41:35 +01:00
foreach ($results as $doc) {
2019-03-14 17:39:19 +01:00
$toplevel[$doc->id] = [
'u' => $doc->uid,
'h' => '',
's' => '',
2019-03-14 17:39:19 +01:00
'p' => []
];
}
// Save list of documents.
2019-03-13 08:29:51 +01:00
$list = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(DocumentList::class);
$list->reset();
$list->add(array_values($toplevel));
// Set metadata for search.
2019-03-14 17:39:19 +01:00
$list->metadata = [
'label' => '',
'description' => '',
2019-03-14 17:39:19 +01:00
'options' => [
'source' => 'search',
'engine' => 'solr',
'select' => $this->params['query'],
'userid' => 0,
'params' => $this->params,
'core' => $this->core,
'pid' => $this->cPid,
'order' => 'score',
'order.asc' => TRUE,
'numberOfHits' => $this->numberOfHits,
'numberOfToplevelHits' => $numberOfToplevelHits
2019-03-14 17:39:19 +01:00
]
];
return $list;
}
/**
* Processes a search request and returns the raw Apache Solr Documents.
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param string $query: The search query
* @param array $parameters: Additional search parameters
*
2019-03-14 19:57:18 +01:00
* @return array The Apache Solr Documents that were fetched
*/
2019-03-14 22:41:35 +01:00
public function search_raw($query = '', $parameters = []) {
// Set additional query parameters.
$parameters['start'] = 0;
$parameters['rows'] = $this->limit;
// Set query.
$parameters['query'] = $query;
// Perform search.
$selectQuery = $this->service->createSelect(array_merge($this->params, $parameters));
2018-12-10 12:12:02 +01:00
$result = $this->service->select($selectQuery);
2019-03-14 17:39:19 +01:00
$resultSet = [];
2019-03-14 22:41:35 +01:00
foreach ($result as $doc) {
2018-12-10 12:12:02 +01:00
$resultSet[] = $doc;
}
2018-12-10 12:12:02 +01:00
return $resultSet;
}
/**
* This returns $this->limit via __get()
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @return integer The max number of results
*/
2019-03-14 22:41:35 +01:00
protected function _getLimit() {
return $this->limit;
}
2013-09-13 15:04:30 +02:00
/**
* This returns $this->numberOfHits via __get()
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @return integer Total number of hits for last search
*/
2019-03-14 22:41:35 +01:00
protected function _getNumberOfHits() {
return $this->numberOfHits;
}
/**
* This returns $this->ready via __get()
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @return boolean Is the search instantiated successfully?
*/
2019-03-14 22:41:35 +01:00
protected function _getReady() {
return $this->ready;
}
/**
* This returns $this->service via __get()
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @return \Solarium\Client Apache Solr service object
*/
2019-03-14 22:41:35 +01:00
protected function _getService() {
return $this->service;
}
/**
* This sets $this->cPid via __set()
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @param integer $value: The new PID for the metadata definitions
*
2019-03-14 19:57:18 +01:00
* @return void
*/
2019-03-14 22:41:35 +01:00
protected function _setCPid($value) {
$this->cPid = max(intval($value), 0);
}
/**
* This sets $this->limit via __set()
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @param integer $value: The max number of results
*
2019-03-14 19:57:18 +01:00
* @return void
*/
2019-03-14 22:41:35 +01:00
protected function _setLimit($value) {
$this->limit = max(intval($value), 0);
}
/**
* This sets $this->params via __set()
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @param array $value: The query parameters
*
2019-03-14 19:57:18 +01:00
* @return void
*/
2019-03-14 22:41:35 +01:00
protected function _setParams(array $value) {
$this->params = $value;
}
/**
* This magic method is called each time an invisible property is referenced from the object
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param string $var: Name of variable to get
*
2019-03-14 19:57:18 +01:00
* @return mixed Value of $this->$var
*/
2019-03-14 22:41:35 +01:00
public function __get($var) {
$method = '_get'.ucfirst($var);
2019-03-14 19:57:18 +01:00
if (!property_exists($this, $var)
2019-03-14 22:41:35 +01:00
|| !method_exists($this, $method)) {
2019-03-15 11:03:54 +01:00
Helper::devLog('There is no getter function for property "'.$var.'"', DEVLOG_SEVERITY_WARNING);
return;
} else {
return $this->$method();
}
}
2011-03-09 16:36:27 +01:00
/**
* This magic method is called each time an invisible property is referenced from the object
*
2019-03-14 19:57:18 +01:00
* @access public
*
2019-03-14 19:57:18 +01:00
* @param string $var: Name of variable to set
* @param mixed $value: New value of variable
*
2019-03-14 19:57:18 +01:00
* @return void
*/
2019-03-14 22:41:35 +01:00
public function __set($var, $value) {
2019-03-14 19:57:18 +01:00
$method = '_set'.ucfirst($var);
if (!property_exists($this, $var)
2019-03-14 22:41:35 +01:00
|| !method_exists($this, $method)) {
2019-03-15 11:03:54 +01:00
Helper::devLog('There is no setter function for property "'.$var.'"', DEVLOG_SEVERITY_WARNING);
} else {
$this->$method($value);
}
}
2011-03-09 16:36:27 +01:00
/**
* This is a singleton class, thus the constructor should be private/protected
*
2019-03-14 19:57:18 +01:00
* @access protected
*
2019-03-14 19:57:18 +01:00
* @param string $core: The name of the core to use
*
2019-03-14 19:57:18 +01:00
* @return void
*/
2019-03-14 22:41:35 +01:00
protected function __construct($core) {
2018-10-15 17:32:37 +02:00
$solrInfo = self::getSolrConnectionInfo();
2019-03-14 17:39:19 +01:00
$config = [
'endpoint' => [
'dlf' => [
'scheme' => $solrInfo['scheme'],
'host' => $solrInfo['host'],
'port' => $solrInfo['port'],
'path' => '/'.$solrInfo['path'].'/',
'core' => $core,
'username' => $solrInfo['username'],
2018-11-15 16:20:10 +01:00
'password' => $solrInfo['password'],
'timeout' => $solrInfo['timeout']
2019-03-14 17:39:19 +01:00
]
]
];
// Instantiate Solarium\Client class.
2019-03-13 17:55:50 +01:00
$this->service = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\Solarium\Client::class, $config);
// Check if connection is established.
$ping = $this->service->createPing();
try {
$this->service->ping($ping);
// Set core name.
$this->core = $core;
// Instantiation successful!
$this->ready = TRUE;
2019-03-14 22:41:35 +01:00
} catch (\Exception $e) {
2018-08-02 10:01:45 +02:00
// Nothing to do here.
}
}
2011-03-09 16:36:27 +01:00
}