We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
| 1 | <?php |
||||
| 2 | |||||
| 3 | namespace Kitodo\Dlf\Common\Solr; |
||||
| 4 | |||||
| 5 | use Exception; |
||||
| 6 | use Kitodo\Dlf\Common\AbstractDocument; |
||||
| 7 | use Kitodo\Dlf\Common\Helper; |
||||
| 8 | use Kitodo\Dlf\Common\Indexer; |
||||
| 9 | use Kitodo\Dlf\Common\Solr\SearchResult\ResultDocument; |
||||
| 10 | use Kitodo\Dlf\Domain\Repository\DocumentRepository; |
||||
| 11 | use Solarium\QueryType\Select\Result\Document; |
||||
| 12 | use TYPO3\CMS\Core\Cache\CacheManager; |
||||
| 13 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||
| 14 | use TYPO3\CMS\Core\Utility\MathUtility; |
||||
| 15 | use TYPO3\CMS\Extbase\Persistence\Generic\QueryResult; |
||||
| 16 | use TYPO3\CMS\Extbase\Persistence\QueryResultInterface; |
||||
| 17 | use TYPO3\CMS\Extbase\Persistence\QueryInterface; |
||||
| 18 | |||||
| 19 | /** |
||||
| 20 | * Targeted towards being used in ``PaginateController`` (``<f:widget.paginate>``). |
||||
| 21 | * |
||||
| 22 | * Notes on implementation: |
||||
| 23 | * - `Countable`: `count()` returns the number of toplevel documents. |
||||
| 24 | * - `getNumLoadedDocuments()`: Number of toplevel documents that have been fetched from Solr. |
||||
| 25 | * - `ArrayAccess`/`Iterator`: Access *fetched* toplevel documents indexed in order of their ranking. |
||||
| 26 | * |
||||
| 27 | * @package TYPO3 |
||||
| 28 | * @subpackage dlf |
||||
| 29 | * |
||||
| 30 | * @access public |
||||
| 31 | */ |
||||
| 32 | class SolrSearch implements \Countable, \Iterator, \ArrayAccess, QueryResultInterface |
||||
| 33 | { |
||||
| 34 | /** |
||||
| 35 | * @access private |
||||
| 36 | * @var DocumentRepository |
||||
| 37 | */ |
||||
| 38 | private DocumentRepository $documentRepository; |
||||
| 39 | |||||
| 40 | /** |
||||
| 41 | * @access private |
||||
| 42 | * @var array|QueryResultInterface |
||||
| 43 | */ |
||||
| 44 | private $collections; |
||||
| 45 | |||||
| 46 | /** |
||||
| 47 | * @access private |
||||
| 48 | * @var array |
||||
| 49 | */ |
||||
| 50 | private array $settings; |
||||
| 51 | |||||
| 52 | /** |
||||
| 53 | * @access private |
||||
| 54 | * @var array |
||||
| 55 | */ |
||||
| 56 | private array $searchParams; |
||||
| 57 | |||||
| 58 | /** |
||||
| 59 | * @access private |
||||
| 60 | * @var QueryResult|null |
||||
| 61 | */ |
||||
| 62 | private ?QueryResult $listedMetadata; |
||||
| 63 | |||||
| 64 | /** |
||||
| 65 | * @access private |
||||
| 66 | * @var QueryResult|null |
||||
| 67 | */ |
||||
| 68 | private ?QueryResult $indexedMetadata; |
||||
| 69 | |||||
| 70 | /** |
||||
| 71 | * @access private |
||||
| 72 | * @var array |
||||
| 73 | */ |
||||
| 74 | private array $params; |
||||
| 75 | |||||
| 76 | /** |
||||
| 77 | * @access private |
||||
| 78 | * @var array |
||||
| 79 | */ |
||||
| 80 | private $result; |
||||
| 81 | |||||
| 82 | /** |
||||
| 83 | * @access private |
||||
| 84 | * @var int |
||||
| 85 | */ |
||||
| 86 | protected int $position = 0; |
||||
| 87 | |||||
| 88 | /** |
||||
| 89 | * Constructs SolrSearch instance. |
||||
| 90 | * |
||||
| 91 | * @access public |
||||
| 92 | * |
||||
| 93 | * @param DocumentRepository $documentRepository |
||||
| 94 | * @param array|QueryResultInterface $collections can contain 0, 1 or many Collection objects |
||||
| 95 | * @param array $settings |
||||
| 96 | * @param array $searchParams |
||||
| 97 | * @param QueryResult $listedMetadata |
||||
| 98 | * @param QueryResult $indexedMetadata |
||||
| 99 | * |
||||
| 100 | * @return void |
||||
| 101 | */ |
||||
| 102 | public function __construct(DocumentRepository $documentRepository, $collections, array $settings, array $searchParams, QueryResult $listedMetadata = null, QueryResult $indexedMetadata = null) |
||||
| 103 | { |
||||
| 104 | $this->documentRepository = $documentRepository; |
||||
| 105 | $this->collections = $collections; |
||||
| 106 | $this->settings = $settings; |
||||
| 107 | $this->searchParams = $searchParams; |
||||
| 108 | $this->listedMetadata = $listedMetadata; |
||||
| 109 | $this->indexedMetadata = $indexedMetadata; |
||||
| 110 | } |
||||
| 111 | |||||
| 112 | /** |
||||
| 113 | * Gets amount of loaded documents. |
||||
| 114 | * |
||||
| 115 | * @access public |
||||
| 116 | * |
||||
| 117 | * @return int |
||||
| 118 | */ |
||||
| 119 | public function getNumLoadedDocuments(): int |
||||
| 120 | { |
||||
| 121 | return count($this->result['documents']); |
||||
| 122 | } |
||||
| 123 | |||||
| 124 | /** |
||||
| 125 | * Count results. |
||||
| 126 | * |
||||
| 127 | * @access public |
||||
| 128 | * |
||||
| 129 | * @return int |
||||
| 130 | */ |
||||
| 131 | public function count(): int |
||||
| 132 | { |
||||
| 133 | if ($this->result === null) { |
||||
| 134 | return 0; |
||||
| 135 | } |
||||
| 136 | |||||
| 137 | return $this->result['numberOfToplevels']; |
||||
| 138 | } |
||||
| 139 | |||||
| 140 | /** |
||||
| 141 | * Current result. |
||||
| 142 | * |
||||
| 143 | * @access public |
||||
| 144 | * |
||||
| 145 | * @return array |
||||
| 146 | */ |
||||
| 147 | public function current(): array |
||||
| 148 | { |
||||
| 149 | return $this[$this->position]; |
||||
| 150 | } |
||||
| 151 | |||||
| 152 | /** |
||||
| 153 | * Current key. |
||||
| 154 | * |
||||
| 155 | * @access public |
||||
| 156 | * |
||||
| 157 | * @return int |
||||
| 158 | */ |
||||
| 159 | public function key(): int |
||||
| 160 | { |
||||
| 161 | return $this->position; |
||||
| 162 | } |
||||
| 163 | |||||
| 164 | /** |
||||
| 165 | * Next key. |
||||
| 166 | * |
||||
| 167 | * @access public |
||||
| 168 | * |
||||
| 169 | * @return void |
||||
| 170 | */ |
||||
| 171 | public function next(): void |
||||
| 172 | { |
||||
| 173 | $this->position++; |
||||
| 174 | } |
||||
| 175 | |||||
| 176 | /** |
||||
| 177 | * First key. |
||||
| 178 | * |
||||
| 179 | * @access public |
||||
| 180 | * |
||||
| 181 | * @return void |
||||
| 182 | */ |
||||
| 183 | public function rewind(): void |
||||
| 184 | { |
||||
| 185 | $this->position = 0; |
||||
| 186 | } |
||||
| 187 | |||||
| 188 | /** |
||||
| 189 | * @access public |
||||
| 190 | * |
||||
| 191 | * @return bool |
||||
| 192 | */ |
||||
| 193 | public function valid(): bool |
||||
| 194 | { |
||||
| 195 | return isset($this[$this->position]); |
||||
| 196 | } |
||||
| 197 | |||||
| 198 | /** |
||||
| 199 | * Checks if the document with given offset exists. |
||||
| 200 | * |
||||
| 201 | * @access public |
||||
| 202 | * |
||||
| 203 | * @param int $offset |
||||
| 204 | * |
||||
| 205 | * @return bool |
||||
| 206 | */ |
||||
| 207 | public function offsetExists($offset): bool |
||||
| 208 | { |
||||
| 209 | $idx = $this->result['document_keys'][$offset]; |
||||
| 210 | return isset($this->result['documents'][$idx]); |
||||
| 211 | } |
||||
| 212 | |||||
| 213 | /** |
||||
| 214 | * Gets the document with given offset. |
||||
| 215 | * |
||||
| 216 | * @access public |
||||
| 217 | * |
||||
| 218 | * @param int $offset |
||||
| 219 | * |
||||
| 220 | * @return mixed |
||||
| 221 | */ |
||||
| 222 | #[\ReturnTypeWillChange] |
||||
| 223 | public function offsetGet($offset) |
||||
| 224 | { |
||||
| 225 | $idx = $this->result['document_keys'][$offset]; |
||||
| 226 | $document = $this->result['documents'][$idx] ?? null; |
||||
| 227 | |||||
| 228 | if ($document !== null) { |
||||
| 229 | // It may happen that a Solr group only includes non-toplevel results, |
||||
| 230 | // in which case metadata of toplevel entry isn't yet filled. |
||||
| 231 | if (empty($document['metadata'])) { |
||||
| 232 | $document['metadata'] = $this->fetchToplevelMetadataFromSolr([ |
||||
| 233 | 'query' => 'uid:' . $document['uid'], |
||||
| 234 | 'start' => 0, |
||||
| 235 | 'rows' => 1, |
||||
| 236 | 'sort' => ['score' => 'desc'], |
||||
| 237 | ])[$document['uid']] ?? []; |
||||
| 238 | } |
||||
| 239 | |||||
| 240 | // get title of parent/grandparent/... if empty |
||||
| 241 | if (empty($document['title']) && $document['partOf'] > 0) { |
||||
| 242 | $superiorTitle = AbstractDocument::getTitle($document['partOf'], true); |
||||
| 243 | if (!empty($superiorTitle)) { |
||||
| 244 | $document['title'] = '[' . $superiorTitle . ']'; |
||||
| 245 | } |
||||
| 246 | } |
||||
| 247 | } |
||||
| 248 | |||||
| 249 | return $document; |
||||
| 250 | } |
||||
| 251 | |||||
| 252 | /** |
||||
| 253 | * Not supported. |
||||
| 254 | * |
||||
| 255 | * @access public |
||||
| 256 | * |
||||
| 257 | * @param int $offset |
||||
| 258 | * @param int $value |
||||
| 259 | * |
||||
| 260 | * @return void |
||||
| 261 | * |
||||
| 262 | * @throws \Exception |
||||
| 263 | */ |
||||
| 264 | public function offsetSet($offset, $value): void |
||||
| 265 | { |
||||
| 266 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||||
| 267 | } |
||||
| 268 | |||||
| 269 | /** |
||||
| 270 | * Not supported. |
||||
| 271 | * |
||||
| 272 | * @access public |
||||
| 273 | * |
||||
| 274 | * @param int $offset |
||||
| 275 | * |
||||
| 276 | * @return void |
||||
| 277 | * |
||||
| 278 | * @throws \Exception |
||||
| 279 | */ |
||||
| 280 | public function offsetUnset($offset): void |
||||
| 281 | { |
||||
| 282 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||||
| 283 | } |
||||
| 284 | |||||
| 285 | /** |
||||
| 286 | * Gets SOLR results. |
||||
| 287 | * |
||||
| 288 | * @access public |
||||
| 289 | * |
||||
| 290 | * @return mixed |
||||
| 291 | */ |
||||
| 292 | public function getSolrResults() |
||||
| 293 | { |
||||
| 294 | return $this->result['solrResults']; |
||||
| 295 | } |
||||
| 296 | |||||
| 297 | /** |
||||
| 298 | * Gets by UID. |
||||
| 299 | * |
||||
| 300 | * @access public |
||||
| 301 | * |
||||
| 302 | * @param int $uid |
||||
| 303 | * |
||||
| 304 | * @return mixed |
||||
| 305 | */ |
||||
| 306 | public function getByUid($uid) |
||||
| 307 | { |
||||
| 308 | return $this->result['documents'][$uid]; |
||||
| 309 | } |
||||
| 310 | |||||
| 311 | /** |
||||
| 312 | * Gets query. |
||||
| 313 | * |
||||
| 314 | * @access public |
||||
| 315 | * |
||||
| 316 | * @return SolrSearchQuery |
||||
| 317 | */ |
||||
| 318 | public function getQuery() |
||||
| 319 | { |
||||
| 320 | return new SolrSearchQuery($this); |
||||
| 321 | } |
||||
| 322 | |||||
| 323 | /** |
||||
| 324 | * Sets query. |
||||
| 325 | * |
||||
| 326 | * @access public |
||||
| 327 | * |
||||
| 328 | * @param QueryInterface $query the query |
||||
| 329 | * |
||||
| 330 | * @throws Exception not implemented |
||||
| 331 | * |
||||
| 332 | * @return void |
||||
| 333 | */ |
||||
| 334 | public function setQuery(QueryInterface $query): void |
||||
| 335 | { |
||||
| 336 | throw new Exception("setQuery not supported on SolrSearch instance"); |
||||
| 337 | } |
||||
| 338 | |||||
| 339 | /** |
||||
| 340 | * Gets first. |
||||
| 341 | * |
||||
| 342 | * @access public |
||||
| 343 | * |
||||
| 344 | * @return SolrSearch |
||||
| 345 | */ |
||||
| 346 | public function getFirst() |
||||
| 347 | { |
||||
| 348 | return $this[0]; |
||||
| 349 | } |
||||
| 350 | |||||
| 351 | /** |
||||
| 352 | * Parses results to array. |
||||
| 353 | * |
||||
| 354 | * @access public |
||||
| 355 | * |
||||
| 356 | * @return array |
||||
| 357 | */ |
||||
| 358 | public function toArray() |
||||
| 359 | { |
||||
| 360 | return array_values($this->result['documents']); |
||||
| 361 | } |
||||
| 362 | |||||
| 363 | /** |
||||
| 364 | * Get total number of hits. |
||||
| 365 | * |
||||
| 366 | * This can be accessed in Fluid template using `.numFound`. |
||||
| 367 | * |
||||
| 368 | * @access public |
||||
| 369 | * |
||||
| 370 | * @return int |
||||
| 371 | */ |
||||
| 372 | public function getNumFound() |
||||
| 373 | { |
||||
| 374 | return $this->result['numFound']; |
||||
| 375 | } |
||||
| 376 | |||||
| 377 | /** |
||||
| 378 | * Prepares SOLR search. |
||||
| 379 | * |
||||
| 380 | * @access public |
||||
| 381 | * |
||||
| 382 | * @return void |
||||
| 383 | */ |
||||
| 384 | public function prepare() |
||||
| 385 | { |
||||
| 386 | // Prepare query parameters. |
||||
| 387 | $params = []; |
||||
| 388 | $matches = []; |
||||
| 389 | $fields = Solr::getFields(); |
||||
| 390 | $query = ''; |
||||
| 391 | |||||
| 392 | // Set search query. |
||||
| 393 | if ( |
||||
| 394 | !empty($this->searchParams['fulltext']) |
||||
| 395 | || preg_match('/' . $fields['fulltext'] . ':\((.*)\)/', trim($this->searchParams['query'] ?? ''), $matches) |
||||
| 396 | ) { |
||||
| 397 | // If the query already is a fulltext query e.g using the facets |
||||
| 398 | $this->searchParams['query'] = empty($matches[1]) ? $this->searchParams['query'] : $matches[1]; |
||||
| 399 | // Search in fulltext field if applicable. Query must not be empty! |
||||
| 400 | if (!empty($this->searchParams['query'])) { |
||||
| 401 | $query = $fields['fulltext'] . ':(' . Solr::escapeQuery(trim($this->searchParams['query'])) . ')'; |
||||
| 402 | } |
||||
| 403 | $params['fulltext'] = true; |
||||
| 404 | } else { |
||||
| 405 | // Retain given search field if valid. |
||||
| 406 | if (!empty($this->searchParams['query'])) { |
||||
| 407 | $query = Solr::escapeQueryKeepField(trim($this->searchParams['query']), $this->settings['storagePid']); |
||||
| 408 | } |
||||
| 409 | } |
||||
| 410 | |||||
| 411 | // Add extended search query. |
||||
| 412 | if ( |
||||
| 413 | !empty($this->searchParams['extQuery']) |
||||
| 414 | && is_array($this->searchParams['extQuery']) |
||||
| 415 | ) { |
||||
| 416 | $allowedOperators = ['AND', 'OR', 'NOT']; |
||||
| 417 | $numberOfExtQueries = count($this->searchParams['extQuery']); |
||||
| 418 | for ($i = 0; $i < $numberOfExtQueries; $i++) { |
||||
| 419 | if (!empty($this->searchParams['extQuery'][$i])) { |
||||
| 420 | if ( |
||||
| 421 | in_array($this->searchParams['extOperator'][$i], $allowedOperators) |
||||
| 422 | ) { |
||||
| 423 | if (!empty($query)) { |
||||
| 424 | $query .= ' ' . $this->searchParams['extOperator'][$i] . ' '; |
||||
| 425 | } |
||||
| 426 | $query .= Indexer::getIndexFieldName($this->searchParams['extField'][$i], $this->settings['storagePid']) . ':(' . Solr::escapeQuery($this->searchParams['extQuery'][$i]) . ')'; |
||||
| 427 | } |
||||
| 428 | } |
||||
| 429 | } |
||||
| 430 | } |
||||
| 431 | |||||
| 432 | // Add filter query for date search |
||||
| 433 | if (!empty($this->searchParams['dateFrom']) && !empty($this->searchParams['dateTo'])) { |
||||
| 434 | // combine dateFrom and dateTo into range search |
||||
| 435 | $params['filterquery'][]['query'] = '{!join from=' . $fields['uid'] . ' to=' . $fields['uid'] . '}'. $fields['date'] . ':[' . $this->searchParams['dateFrom'] . ' TO ' . $this->searchParams['dateTo'] . ']'; |
||||
| 436 | } |
||||
| 437 | |||||
| 438 | // Add filter query for faceting. |
||||
| 439 | if (isset($this->searchParams['fq']) && is_array($this->searchParams['fq'])) { |
||||
| 440 | foreach ($this->searchParams['fq'] as $filterQuery) { |
||||
| 441 | $params['filterquery'][]['query'] = $filterQuery; |
||||
| 442 | } |
||||
| 443 | } |
||||
| 444 | |||||
| 445 | // Add filter query for in-document searching. |
||||
| 446 | if ( |
||||
| 447 | !empty($this->searchParams['documentId']) |
||||
| 448 | && MathUtility::canBeInterpretedAsInteger($this->searchParams['documentId']) |
||||
| 449 | ) { |
||||
| 450 | // Search in document and all subordinates (valid for up to three levels of hierarchy). |
||||
| 451 | $params['filterquery'][]['query'] = '_query_:"{!join from=' |
||||
| 452 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
| 453 | . $fields['uid'] . ':{!join from=' . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
| 454 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . '"' . ' OR {!join from=' |
||||
| 455 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
| 456 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . ' OR ' |
||||
| 457 | . $fields['uid'] . ':' . $this->searchParams['documentId']; |
||||
| 458 | } |
||||
| 459 | |||||
| 460 | // if collections are given, we prepare the collection query string |
||||
| 461 | if (!empty($this->collections)) { |
||||
| 462 | $params['filterquery'][]['query'] = $this->getCollectionFilterQuery($query); |
||||
| 463 | } |
||||
| 464 | |||||
| 465 | // Set some query parameters. |
||||
| 466 | $params['query'] = !empty($query) ? $query : '*'; |
||||
| 467 | |||||
| 468 | $params['sort'] = $this->getSort(); |
||||
| 469 | $params['listMetadataRecords'] = []; |
||||
| 470 | |||||
| 471 | // Restrict the fields to the required ones. |
||||
| 472 | $params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type'; |
||||
| 473 | |||||
| 474 | if ($this->listedMetadata) { |
||||
| 475 | foreach ($this->listedMetadata as $metadata) { |
||||
| 476 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||||
| 477 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||||
| 478 | $params['fields'] .= ',' . $listMetadataRecord; |
||||
| 479 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||||
| 480 | } |
||||
| 481 | } |
||||
| 482 | } |
||||
| 483 | |||||
| 484 | $this->params = $params; |
||||
| 485 | |||||
| 486 | // Send off query to get total number of search results in advance |
||||
| 487 | $this->submit(0, 1, false); |
||||
| 488 | } |
||||
| 489 | |||||
| 490 | /** |
||||
| 491 | * Submits SOLR search. |
||||
| 492 | * |
||||
| 493 | * @access public |
||||
| 494 | * |
||||
| 495 | * @param int $start |
||||
| 496 | * @param int $rows |
||||
| 497 | * @param bool $processResults default value is true |
||||
| 498 | * |
||||
| 499 | * @return void |
||||
| 500 | */ |
||||
| 501 | public function submit($start, $rows, $processResults = true) |
||||
| 502 | { |
||||
| 503 | $params = $this->params; |
||||
| 504 | $params['start'] = $start; |
||||
| 505 | $params['rows'] = $rows; |
||||
| 506 | |||||
| 507 | // Perform search. |
||||
| 508 | $result = $this->searchSolr($params, true); |
||||
| 509 | |||||
| 510 | // Initialize values |
||||
| 511 | $documents = []; |
||||
| 512 | |||||
| 513 | if ($processResults && $result['numFound'] > 0) { |
||||
| 514 | // flat array with uids from Solr search |
||||
| 515 | $documentSet = array_unique(array_column($result['documents'], 'uid')); |
||||
| 516 | |||||
| 517 | if (empty($documentSet)) { |
||||
| 518 | // return nothing found |
||||
| 519 | $this->result = ['solrResults' => [], 'documents' => [], 'document_keys' => [], 'numFound' => 0]; |
||||
| 520 | return; |
||||
| 521 | } |
||||
| 522 | |||||
| 523 | // get the Extbase document objects for all uids |
||||
| 524 | $allDocuments = $this->documentRepository->findAllByUids($documentSet); |
||||
| 525 | $childrenOf = $this->documentRepository->findChildrenOfEach($documentSet); |
||||
| 526 | |||||
| 527 | foreach ($result['documents'] as $doc) { |
||||
| 528 | if (empty($documents[$doc['uid']]) && isset($allDocuments[$doc['uid']])) { |
||||
| 529 | $documents[$doc['uid']] = $allDocuments[$doc['uid']]; |
||||
| 530 | } |
||||
| 531 | if (isset($documents[$doc['uid']])) { |
||||
| 532 | $this->translateLanguageCode($doc); |
||||
| 533 | if ($doc['toplevel'] === false) { |
||||
| 534 | // this maybe a chapter, article, ..., year |
||||
| 535 | if ($doc['type'] === 'year') { |
||||
| 536 | continue; |
||||
| 537 | } |
||||
| 538 | if (!empty($doc['page'])) { |
||||
| 539 | // it's probably a fulltext or metadata search |
||||
| 540 | $searchResult = []; |
||||
| 541 | $searchResult['page'] = $doc['page']; |
||||
| 542 | $searchResult['thumbnail'] = $doc['thumbnail']; |
||||
| 543 | $searchResult['structure'] = $doc['type']; |
||||
| 544 | $searchResult['title'] = $doc['title']; |
||||
| 545 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||||
| 546 | if (isset($doc['metadata'][$indexName])) { |
||||
| 547 | $searchResult['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||||
| 548 | } |
||||
| 549 | } |
||||
| 550 | if ($this->searchParams['fulltext'] == '1') { |
||||
| 551 | $searchResult['snippet'] = $doc['snippet']; |
||||
| 552 | $searchResult['highlight'] = $doc['highlight']; |
||||
| 553 | $searchResult['highlight_word'] = preg_replace('/^;|;$/', '', // remove ; at beginning or end |
||||
| 554 | preg_replace('/;+/', ';', // replace any multiple of ; with a single ; |
||||
| 555 | preg_replace('/[{~\d*}{\s+}{^=*\d+.*\d*}`~!@#$%\^&*()_|+-=?;:\'",.<>\{\}\[\]\\\]/', ';', $this->searchParams['query']))); // replace search operators and special characters with ; |
||||
| 556 | } |
||||
| 557 | $documents[$doc['uid']]['searchResults'][] = $searchResult; |
||||
| 558 | } |
||||
| 559 | } else if ($doc['toplevel'] === true) { |
||||
| 560 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||||
| 561 | if (isset($doc['metadata'][$indexName])) { |
||||
| 562 | $documents[$doc['uid']]['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||||
| 563 | } |
||||
| 564 | } |
||||
| 565 | if (!array_key_exists('fulltext', $this->searchParams) || $this->searchParams['fulltext'] != '1') { |
||||
| 566 | $documents[$doc['uid']]['page'] = 1; |
||||
| 567 | $children = $childrenOf[$doc['uid']] ?? []; |
||||
| 568 | |||||
| 569 | if (!empty($children)) { |
||||
| 570 | $batchSize = 100; |
||||
| 571 | $totalChildren = count($children); |
||||
| 572 | |||||
| 573 | for ($start = 0; $start < $totalChildren; $start += $batchSize) { |
||||
| 574 | $batch = array_slice($children, $start, $batchSize, true); |
||||
| 575 | |||||
| 576 | // Fetch metadata for the current batch |
||||
| 577 | $metadataOf = $this->fetchToplevelMetadataFromSolr([ |
||||
| 578 | 'query' => 'partof:' . $doc['uid'], |
||||
| 579 | 'start' => $start, |
||||
| 580 | 'rows' => min($batchSize, $totalChildren - $start), |
||||
| 581 | ]); |
||||
| 582 | |||||
| 583 | foreach ($batch as $docChild) { |
||||
| 584 | // We need only a few fields from the children, but we need them as an array. |
||||
| 585 | $childDocument = [ |
||||
| 586 | 'thumbnail' => $docChild['thumbnail'], |
||||
| 587 | 'title' => $docChild['title'], |
||||
| 588 | 'structure' => $docChild['structure'], |
||||
| 589 | 'metsOrderlabel' => $docChild['metsOrderlabel'], |
||||
| 590 | 'uid' => $docChild['uid'], |
||||
| 591 | 'metadata' => $metadataOf[$docChild['uid']], |
||||
| 592 | ]; |
||||
| 593 | $documents[$doc['uid']]['children'][$docChild['uid']] = $childDocument; |
||||
| 594 | } |
||||
| 595 | } |
||||
| 596 | } |
||||
| 597 | } |
||||
| 598 | } |
||||
| 599 | } |
||||
| 600 | } |
||||
| 601 | } |
||||
| 602 | |||||
| 603 | $this->result = ['solrResults' => $result, 'numberOfToplevels' => $result['numberOfToplevels'], 'documents' => $documents, 'document_keys' => array_keys($documents), 'numFound' => $result['numFound']]; |
||||
| 604 | } |
||||
| 605 | |||||
| 606 | /** |
||||
| 607 | * Find all listed metadata using specified query params. |
||||
| 608 | * |
||||
| 609 | * @access protected |
||||
| 610 | * |
||||
| 611 | * @param array $queryParams |
||||
| 612 | * |
||||
| 613 | * @return array |
||||
| 614 | */ |
||||
| 615 | protected function fetchToplevelMetadataFromSolr(array $queryParams): array |
||||
| 616 | { |
||||
| 617 | // Prepare query parameters. |
||||
| 618 | $params = $queryParams; |
||||
| 619 | $metadataArray = []; |
||||
| 620 | |||||
| 621 | // Set some query parameters. |
||||
| 622 | $params['listMetadataRecords'] = []; |
||||
| 623 | |||||
| 624 | // Restrict the fields to the required ones. |
||||
| 625 | $params['fields'] = 'uid,toplevel'; |
||||
| 626 | |||||
| 627 | if ($this->listedMetadata) { |
||||
| 628 | foreach ($this->listedMetadata as $metadata) { |
||||
| 629 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||||
| 630 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||||
| 631 | $params['fields'] .= ',' . $listMetadataRecord; |
||||
| 632 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||||
| 633 | } |
||||
| 634 | } |
||||
| 635 | } |
||||
| 636 | // Set filter query to just get toplevel documents. |
||||
| 637 | $params['filterquery'][] = ['query' => 'toplevel:true']; |
||||
| 638 | |||||
| 639 | // Perform search. |
||||
| 640 | $result = $this->searchSolr($params, true); |
||||
| 641 | |||||
| 642 | foreach ($result['documents'] as $doc) { |
||||
| 643 | $this->translateLanguageCode($doc); |
||||
| 644 | $metadataArray[$doc['uid']] = $doc['metadata']; |
||||
| 645 | } |
||||
| 646 | |||||
| 647 | return $metadataArray; |
||||
| 648 | } |
||||
| 649 | |||||
| 650 | /** |
||||
| 651 | * Processes a search request |
||||
| 652 | * |
||||
| 653 | * @access protected |
||||
| 654 | * |
||||
| 655 | * @param array $parameters Additional search parameters |
||||
| 656 | * @param boolean $enableCache Enable caching of Solr requests |
||||
| 657 | * |
||||
| 658 | * @return array The Apache Solr Documents that were fetched |
||||
| 659 | */ |
||||
| 660 | protected function searchSolr($parameters = [], $enableCache = true) |
||||
| 661 | { |
||||
| 662 | // Set query. |
||||
| 663 | $parameters['query'] = isset($parameters['query']) ? $parameters['query'] : '*'; |
||||
| 664 | $parameters['filterquery'] = isset($parameters['filterquery']) ? $parameters['filterquery'] : []; |
||||
| 665 | |||||
| 666 | // Perform Solr query. |
||||
| 667 | // Instantiate search object. |
||||
| 668 | $solr = Solr::getInstance($this->settings['solrcore']); |
||||
| 669 | if (!$solr->ready) { |
||||
| 670 | Helper::log('Apache Solr not available', LOG_SEVERITY_ERROR); |
||||
| 671 | return [ |
||||
| 672 | 'documents' => [], |
||||
| 673 | 'numberOfToplevels' => 0, |
||||
| 674 | 'numFound' => 0, |
||||
| 675 | ]; |
||||
| 676 | } |
||||
| 677 | |||||
| 678 | $cacheIdentifier = ''; |
||||
| 679 | $cache = null; |
||||
| 680 | // Calculate cache identifier. |
||||
| 681 | if ($enableCache === true) { |
||||
| 682 | $cacheIdentifier = Helper::digest($solr->core . print_r($parameters, true)); |
||||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||||
| 683 | $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_solr'); |
||||
| 684 | } |
||||
| 685 | $resultSet = [ |
||||
| 686 | 'documents' => [], |
||||
| 687 | 'numberOfToplevels' => 0, |
||||
| 688 | 'numFound' => 0, |
||||
| 689 | ]; |
||||
| 690 | if ($enableCache === false || ($entry = $cache->get($cacheIdentifier)) === false) { |
||||
| 691 | $selectQuery = $solr->service->createSelect($parameters); |
||||
| 692 | |||||
| 693 | $edismax = $selectQuery->getEDisMax(); |
||||
| 694 | |||||
| 695 | $queryFields = ''; |
||||
| 696 | |||||
| 697 | if ($this->indexedMetadata) { |
||||
| 698 | foreach ($this->indexedMetadata as $metadata) { |
||||
| 699 | if ($metadata->getIndexIndexed()) { |
||||
| 700 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . 'i'; |
||||
| 701 | $queryFields .= $listMetadataRecord . '^' . $metadata->getIndexBoost() . ' '; |
||||
| 702 | } |
||||
| 703 | } |
||||
| 704 | } |
||||
| 705 | |||||
| 706 | $edismax->setQueryFields($queryFields); |
||||
| 707 | |||||
| 708 | $grouping = $selectQuery->getGrouping(); |
||||
| 709 | $grouping->addField('uid'); |
||||
| 710 | $grouping->setLimit(100); // Results in group (TODO: check) |
||||
| 711 | $grouping->setNumberOfGroups(true); |
||||
| 712 | |||||
| 713 | $fulltextExists = $parameters['fulltext'] ?? false; |
||||
| 714 | if ($fulltextExists === true) { |
||||
| 715 | // get highlighting component and apply settings |
||||
| 716 | $selectQuery->getHighlighting(); |
||||
| 717 | } |
||||
| 718 | |||||
| 719 | $solrRequest = $solr->service->createRequest($selectQuery); |
||||
| 720 | |||||
| 721 | if ($fulltextExists === true) { |
||||
| 722 | // If it is a fulltext search, enable highlighting. |
||||
| 723 | // field for which highlighting is going to be performed, |
||||
| 724 | // is required if you want to have OCR highlighting |
||||
| 725 | $solrRequest->addParam('hl.ocr.fl', 'fulltext'); |
||||
| 726 | // return the coordinates of highlighted search as absolute coordinates |
||||
| 727 | $solrRequest->addParam('hl.ocr.absoluteHighlights', 'on'); |
||||
| 728 | // max amount of snippets for a single page |
||||
| 729 | $solrRequest->addParam('hl.snippets', '20'); |
||||
| 730 | // we store the fulltext on page level and can disable this option |
||||
| 731 | $solrRequest->addParam('hl.ocr.trackPages', 'off'); |
||||
| 732 | } |
||||
| 733 | |||||
| 734 | // Perform search for all documents with the same uid that either fit to the search or marked as toplevel. |
||||
| 735 | $response = $solr->service->executeRequest($solrRequest); |
||||
| 736 | // return empty resultSet on error-response |
||||
| 737 | if ($response->getStatusCode() == 400) { |
||||
| 738 | return $resultSet; |
||||
| 739 | } |
||||
| 740 | $result = $solr->service->createResult($selectQuery, $response); |
||||
| 741 | |||||
| 742 | // TODO: Call to an undefined method Solarium\Core\Query\Result\ResultInterface::getGrouping(). |
||||
| 743 | // @phpstan-ignore-next-line |
||||
| 744 | $uidGroup = $result->getGrouping()->getGroup('uid'); |
||||
|
0 ignored issues
–
show
The method
getGrouping() does not exist on Solarium\Core\Query\Result\ResultInterface. It seems like you code against a sub-type of Solarium\Core\Query\Result\ResultInterface such as Solarium\QueryType\Select\Result\Result.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||
| 745 | $resultSet['numberOfToplevels'] = $uidGroup->getNumberOfGroups(); |
||||
| 746 | $resultSet['numFound'] = $uidGroup->getMatches(); |
||||
| 747 | $highlighting = []; |
||||
| 748 | if ($fulltextExists === true) { |
||||
| 749 | $data = $result->getData(); |
||||
| 750 | $highlighting = $data['ocrHighlighting']; |
||||
| 751 | } |
||||
| 752 | $fields = Solr::getFields(); |
||||
| 753 | |||||
| 754 | foreach ($uidGroup as $group) { |
||||
| 755 | foreach ($group as $record) { |
||||
| 756 | $resultSet['documents'][] = $this->getDocument($record, $highlighting, $fields, $parameters); |
||||
| 757 | } |
||||
| 758 | } |
||||
| 759 | |||||
| 760 | // Save value in cache. |
||||
| 761 | if (!empty($resultSet['documents']) && $enableCache === true) { |
||||
| 762 | $cache->set($cacheIdentifier, $resultSet); |
||||
| 763 | } |
||||
| 764 | } else { |
||||
| 765 | // Return cache hit. |
||||
| 766 | $resultSet = $entry; |
||||
| 767 | } |
||||
| 768 | return $resultSet; |
||||
| 769 | } |
||||
| 770 | |||||
| 771 | /** |
||||
| 772 | * Get collection filter query for search. |
||||
| 773 | * |
||||
| 774 | * @access private |
||||
| 775 | * |
||||
| 776 | * @param string $query |
||||
| 777 | * |
||||
| 778 | * @return string |
||||
| 779 | */ |
||||
| 780 | private function getCollectionFilterQuery(string $query) : string |
||||
| 781 | { |
||||
| 782 | $collectionsQueryString = ''; |
||||
| 783 | $virtualCollectionsQueryString = ''; |
||||
| 784 | foreach ($this->collections as $collection) { |
||||
| 785 | // check for virtual collections query string |
||||
| 786 | if ($collection->getIndexSearch()) { |
||||
| 787 | $virtualCollectionsQueryString .= empty($virtualCollectionsQueryString) ? '(' . $collection->getIndexSearch() . ')' : ' OR (' . $collection->getIndexSearch() . ')'; |
||||
| 788 | } else { |
||||
| 789 | $collectionsQueryString .= empty($collectionsQueryString) ? '"' . $collection->getIndexName() . '"' : ' OR "' . $collection->getIndexName() . '"'; |
||||
| 790 | } |
||||
| 791 | } |
||||
| 792 | |||||
| 793 | // distinguish between simple collection browsing and actual searching within the collection(s) |
||||
| 794 | if (!empty($collectionsQueryString)) { |
||||
| 795 | if (empty($query)) { |
||||
| 796 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . ') AND toplevel:true AND partof:0)'; |
||||
| 797 | } else { |
||||
| 798 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . '))'; |
||||
| 799 | } |
||||
| 800 | } |
||||
| 801 | |||||
| 802 | // virtual collections might query documents that are neither toplevel:true nor partof:0 and need to be searched separately |
||||
| 803 | if (!empty($virtualCollectionsQueryString)) { |
||||
| 804 | $virtualCollectionsQueryString = '(' . $virtualCollectionsQueryString . ')'; |
||||
| 805 | } |
||||
| 806 | |||||
| 807 | // combine both query strings into a single filterquery via OR if both are given, otherwise pass either of those |
||||
| 808 | return implode(' OR ', array_filter([$collectionsQueryString, $virtualCollectionsQueryString])); |
||||
| 809 | } |
||||
| 810 | |||||
| 811 | /** |
||||
| 812 | * Get sort order of the results as given or by title as default. |
||||
| 813 | * |
||||
| 814 | * @access private |
||||
| 815 | * |
||||
| 816 | * @return array |
||||
| 817 | */ |
||||
| 818 | private function getSort() : array |
||||
| 819 | { |
||||
| 820 | if (!empty($this->searchParams['orderBy'])) { |
||||
| 821 | return [ |
||||
| 822 | $this->searchParams['orderBy'] => $this->searchParams['order'], |
||||
| 823 | ]; |
||||
| 824 | } |
||||
| 825 | |||||
| 826 | return [ |
||||
| 827 | 'score' => 'desc', |
||||
| 828 | 'year_sorting' => 'asc', |
||||
| 829 | 'title_sorting' => 'asc', |
||||
| 830 | 'volume_sorting' => 'asc' |
||||
| 831 | ]; |
||||
| 832 | } |
||||
| 833 | |||||
| 834 | /** |
||||
| 835 | * Gets a document |
||||
| 836 | * |
||||
| 837 | * @access private |
||||
| 838 | * |
||||
| 839 | * @param Document $record |
||||
| 840 | * @param array $highlighting |
||||
| 841 | * @param array $fields |
||||
| 842 | * @param array $parameters |
||||
| 843 | * |
||||
| 844 | * @return array The Apache Solr Documents that were fetched |
||||
| 845 | */ |
||||
| 846 | private function getDocument(Document $record, array $highlighting, array $fields, $parameters) { |
||||
| 847 | $resultDocument = new ResultDocument($record, $highlighting, $fields); |
||||
| 848 | |||||
| 849 | $document = [ |
||||
| 850 | 'id' => $resultDocument->getId(), |
||||
| 851 | 'page' => $resultDocument->getPage(), |
||||
| 852 | 'snippet' => $resultDocument->getSnippets(), |
||||
| 853 | 'thumbnail' => $resultDocument->getThumbnail(), |
||||
| 854 | 'title' => $resultDocument->getTitle(), |
||||
| 855 | 'toplevel' => $resultDocument->getToplevel(), |
||||
| 856 | 'type' => $resultDocument->getType(), |
||||
| 857 | 'uid' => !empty($resultDocument->getUid()) ? $resultDocument->getUid() : $parameters['uid'], |
||||
| 858 | 'highlight' => $resultDocument->getHighlightsIds(), |
||||
| 859 | ]; |
||||
| 860 | |||||
| 861 | foreach ($parameters['listMetadataRecords'] as $indexName => $solrField) { |
||||
| 862 | if (!empty($record->$solrField)) { |
||||
| 863 | $document['metadata'][$indexName] = $record->$solrField; |
||||
| 864 | } |
||||
| 865 | } |
||||
| 866 | |||||
| 867 | return $document; |
||||
| 868 | } |
||||
| 869 | |||||
| 870 | /** |
||||
| 871 | * Translate language code if applicable. |
||||
| 872 | * |
||||
| 873 | * @access private |
||||
| 874 | * |
||||
| 875 | * @param &$doc document array |
||||
| 876 | * |
||||
| 877 | * @return void |
||||
| 878 | */ |
||||
| 879 | private function translateLanguageCode(&$doc): void |
||||
| 880 | { |
||||
| 881 | if (is_array($doc['metadata']) && array_key_exists('language', $doc['metadata'])) { |
||||
| 882 | foreach($doc['metadata']['language'] as $indexName => $language) { |
||||
| 883 | $doc['metadata']['language'][$indexName] = Helper::getLanguageName($language); |
||||
| 884 | } |
||||
| 885 | } |
||||
| 886 | } |
||||
| 887 | } |
||||
| 888 |