We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
1 | <?php |
||||
2 | |||||
3 | namespace Kitodo\Dlf\Common\Solr; |
||||
4 | |||||
5 | use Exception; |
||||
6 | use Kitodo\Dlf\Common\AbstractDocument; |
||||
7 | use Kitodo\Dlf\Common\Helper; |
||||
8 | use Kitodo\Dlf\Common\Indexer; |
||||
9 | use Kitodo\Dlf\Common\Solr\SearchResult\ResultDocument; |
||||
10 | use Kitodo\Dlf\Domain\Repository\DocumentRepository; |
||||
11 | use Solarium\QueryType\Select\Result\Document; |
||||
12 | use TYPO3\CMS\Core\Cache\CacheManager; |
||||
13 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||
14 | use TYPO3\CMS\Core\Utility\MathUtility; |
||||
15 | use TYPO3\CMS\Extbase\Persistence\Generic\QueryResult; |
||||
16 | use TYPO3\CMS\Extbase\Persistence\QueryResultInterface; |
||||
17 | use TYPO3\CMS\Extbase\Persistence\QueryInterface; |
||||
18 | |||||
19 | /** |
||||
20 | * Targeted towards being used in ``PaginateController`` (``<f:widget.paginate>``). |
||||
21 | * |
||||
22 | * Notes on implementation: |
||||
23 | * - `Countable`: `count()` returns the number of toplevel documents. |
||||
24 | * - `getNumLoadedDocuments()`: Number of toplevel documents that have been fetched from Solr. |
||||
25 | * - `ArrayAccess`/`Iterator`: Access *fetched* toplevel documents indexed in order of their ranking. |
||||
26 | * |
||||
27 | * @package TYPO3 |
||||
28 | * @subpackage dlf |
||||
29 | * |
||||
30 | * @access public |
||||
31 | */ |
||||
32 | class SolrSearch implements \Countable, \Iterator, \ArrayAccess, QueryResultInterface |
||||
33 | { |
||||
34 | /** |
||||
35 | * @access private |
||||
36 | * @var DocumentRepository |
||||
37 | */ |
||||
38 | private DocumentRepository $documentRepository; |
||||
39 | |||||
40 | /** |
||||
41 | * @access private |
||||
42 | * @var array|QueryResultInterface |
||||
43 | */ |
||||
44 | private $collections; |
||||
45 | |||||
46 | /** |
||||
47 | * @access private |
||||
48 | * @var array |
||||
49 | */ |
||||
50 | private array $settings; |
||||
51 | |||||
52 | /** |
||||
53 | * @access private |
||||
54 | * @var array |
||||
55 | */ |
||||
56 | private array $searchParams; |
||||
57 | |||||
58 | /** |
||||
59 | * @access private |
||||
60 | * @var QueryResult|null |
||||
61 | */ |
||||
62 | private ?QueryResult $listedMetadata; |
||||
63 | |||||
64 | /** |
||||
65 | * @access private |
||||
66 | * @var QueryResult|null |
||||
67 | */ |
||||
68 | private ?QueryResult $indexedMetadata; |
||||
69 | |||||
70 | /** |
||||
71 | * @access private |
||||
72 | * @var array |
||||
73 | */ |
||||
74 | private array $params; |
||||
75 | |||||
76 | /** |
||||
77 | * @access private |
||||
78 | * @var array |
||||
79 | */ |
||||
80 | private $result; |
||||
81 | |||||
82 | /** |
||||
83 | * @access private |
||||
84 | * @var int |
||||
85 | */ |
||||
86 | protected int $position = 0; |
||||
87 | |||||
88 | /** |
||||
89 | * Constructs SolrSearch instance. |
||||
90 | * |
||||
91 | * @access public |
||||
92 | * |
||||
93 | * @param DocumentRepository $documentRepository |
||||
94 | * @param array|QueryResultInterface $collections can contain 0, 1 or many Collection objects |
||||
95 | * @param array $settings |
||||
96 | * @param array $searchParams |
||||
97 | * @param QueryResult $listedMetadata |
||||
98 | * @param QueryResult $indexedMetadata |
||||
99 | * |
||||
100 | * @return void |
||||
101 | */ |
||||
102 | public function __construct(DocumentRepository $documentRepository, $collections, array $settings, array $searchParams, QueryResult $listedMetadata = null, QueryResult $indexedMetadata = null) |
||||
103 | { |
||||
104 | $this->documentRepository = $documentRepository; |
||||
105 | $this->collections = $collections; |
||||
106 | $this->settings = $settings; |
||||
107 | $this->searchParams = $searchParams; |
||||
108 | $this->listedMetadata = $listedMetadata; |
||||
109 | $this->indexedMetadata = $indexedMetadata; |
||||
110 | } |
||||
111 | |||||
112 | /** |
||||
113 | * Gets amount of loaded documents. |
||||
114 | * |
||||
115 | * @access public |
||||
116 | * |
||||
117 | * @return int |
||||
118 | */ |
||||
119 | public function getNumLoadedDocuments(): int |
||||
120 | { |
||||
121 | return count($this->result['documents']); |
||||
122 | } |
||||
123 | |||||
124 | /** |
||||
125 | * Count results. |
||||
126 | * |
||||
127 | * @access public |
||||
128 | * |
||||
129 | * @return int |
||||
130 | */ |
||||
131 | public function count(): int |
||||
132 | { |
||||
133 | if ($this->result === null) { |
||||
134 | return 0; |
||||
135 | } |
||||
136 | |||||
137 | return $this->result['numberOfToplevels']; |
||||
138 | } |
||||
139 | |||||
140 | /** |
||||
141 | * Current result. |
||||
142 | * |
||||
143 | * @access public |
||||
144 | * |
||||
145 | * @return array |
||||
146 | */ |
||||
147 | public function current(): array |
||||
148 | { |
||||
149 | return $this[$this->position]; |
||||
150 | } |
||||
151 | |||||
152 | /** |
||||
153 | * Current key. |
||||
154 | * |
||||
155 | * @access public |
||||
156 | * |
||||
157 | * @return int |
||||
158 | */ |
||||
159 | public function key(): int |
||||
160 | { |
||||
161 | return $this->position; |
||||
162 | } |
||||
163 | |||||
164 | /** |
||||
165 | * Next key. |
||||
166 | * |
||||
167 | * @access public |
||||
168 | * |
||||
169 | * @return void |
||||
170 | */ |
||||
171 | public function next(): void |
||||
172 | { |
||||
173 | $this->position++; |
||||
174 | } |
||||
175 | |||||
176 | /** |
||||
177 | * First key. |
||||
178 | * |
||||
179 | * @access public |
||||
180 | * |
||||
181 | * @return void |
||||
182 | */ |
||||
183 | public function rewind(): void |
||||
184 | { |
||||
185 | $this->position = 0; |
||||
186 | } |
||||
187 | |||||
188 | /** |
||||
189 | * @access public |
||||
190 | * |
||||
191 | * @return bool |
||||
192 | */ |
||||
193 | public function valid(): bool |
||||
194 | { |
||||
195 | return isset($this[$this->position]); |
||||
196 | } |
||||
197 | |||||
198 | /** |
||||
199 | * Checks if the document with given offset exists. |
||||
200 | * |
||||
201 | * @access public |
||||
202 | * |
||||
203 | * @param int $offset |
||||
204 | * |
||||
205 | * @return bool |
||||
206 | */ |
||||
207 | public function offsetExists($offset): bool |
||||
208 | { |
||||
209 | $idx = $this->result['document_keys'][$offset]; |
||||
210 | return isset($this->result['documents'][$idx]); |
||||
211 | } |
||||
212 | |||||
213 | /** |
||||
214 | * Gets the document with given offset. |
||||
215 | * |
||||
216 | * @access public |
||||
217 | * |
||||
218 | * @param int $offset |
||||
219 | * |
||||
220 | * @return mixed |
||||
221 | */ |
||||
222 | #[\ReturnTypeWillChange] |
||||
223 | public function offsetGet($offset) |
||||
224 | { |
||||
225 | $idx = $this->result['document_keys'][$offset]; |
||||
226 | $document = $this->result['documents'][$idx] ?? null; |
||||
227 | |||||
228 | if ($document !== null) { |
||||
229 | // It may happen that a Solr group only includes non-toplevel results, |
||||
230 | // in which case metadata of toplevel entry isn't yet filled. |
||||
231 | if (empty($document['metadata'])) { |
||||
232 | $document['metadata'] = $this->fetchToplevelMetadataFromSolr([ |
||||
233 | 'query' => 'uid:' . $document['uid'], |
||||
234 | 'start' => 0, |
||||
235 | 'rows' => 1, |
||||
236 | 'sort' => ['score' => 'desc'], |
||||
237 | ])[$document['uid']] ?? []; |
||||
238 | } |
||||
239 | |||||
240 | // get title of parent/grandparent/... if empty |
||||
241 | if (empty($document['title']) && $document['partOf'] > 0) { |
||||
242 | $superiorTitle = AbstractDocument::getTitle($document['partOf'], true); |
||||
243 | if (!empty($superiorTitle)) { |
||||
244 | $document['title'] = '[' . $superiorTitle . ']'; |
||||
245 | } |
||||
246 | } |
||||
247 | } |
||||
248 | |||||
249 | return $document; |
||||
250 | } |
||||
251 | |||||
252 | /** |
||||
253 | * Not supported. |
||||
254 | * |
||||
255 | * @access public |
||||
256 | * |
||||
257 | * @param int $offset |
||||
258 | * @param int $value |
||||
259 | * |
||||
260 | * @return void |
||||
261 | * |
||||
262 | * @throws \Exception |
||||
263 | */ |
||||
264 | public function offsetSet($offset, $value): void |
||||
265 | { |
||||
266 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||||
267 | } |
||||
268 | |||||
269 | /** |
||||
270 | * Not supported. |
||||
271 | * |
||||
272 | * @access public |
||||
273 | * |
||||
274 | * @param int $offset |
||||
275 | * |
||||
276 | * @return void |
||||
277 | * |
||||
278 | * @throws \Exception |
||||
279 | */ |
||||
280 | public function offsetUnset($offset): void |
||||
281 | { |
||||
282 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||||
283 | } |
||||
284 | |||||
285 | /** |
||||
286 | * Gets SOLR results. |
||||
287 | * |
||||
288 | * @access public |
||||
289 | * |
||||
290 | * @return mixed |
||||
291 | */ |
||||
292 | public function getSolrResults() |
||||
293 | { |
||||
294 | return $this->result['solrResults']; |
||||
295 | } |
||||
296 | |||||
297 | /** |
||||
298 | * Gets by UID. |
||||
299 | * |
||||
300 | * @access public |
||||
301 | * |
||||
302 | * @param int $uid |
||||
303 | * |
||||
304 | * @return mixed |
||||
305 | */ |
||||
306 | public function getByUid($uid) |
||||
307 | { |
||||
308 | return $this->result['documents'][$uid]; |
||||
309 | } |
||||
310 | |||||
311 | /** |
||||
312 | * Gets query. |
||||
313 | * |
||||
314 | * @access public |
||||
315 | * |
||||
316 | * @return SolrSearchQuery |
||||
317 | */ |
||||
318 | public function getQuery() |
||||
319 | { |
||||
320 | return new SolrSearchQuery($this); |
||||
321 | } |
||||
322 | |||||
323 | /** |
||||
324 | * Sets query. |
||||
325 | * |
||||
326 | * @access public |
||||
327 | * |
||||
328 | * @param QueryInterface $query the query |
||||
329 | * |
||||
330 | * @throws Exception not implemented |
||||
331 | * |
||||
332 | * @return void |
||||
333 | */ |
||||
334 | public function setQuery(QueryInterface $query): void |
||||
335 | { |
||||
336 | throw new Exception("setQuery not supported on SolrSearch instance"); |
||||
337 | } |
||||
338 | |||||
339 | /** |
||||
340 | * Gets first. |
||||
341 | * |
||||
342 | * @access public |
||||
343 | * |
||||
344 | * @return SolrSearch |
||||
345 | */ |
||||
346 | public function getFirst() |
||||
347 | { |
||||
348 | return $this[0]; |
||||
349 | } |
||||
350 | |||||
351 | /** |
||||
352 | * Parses results to array. |
||||
353 | * |
||||
354 | * @access public |
||||
355 | * |
||||
356 | * @return array |
||||
357 | */ |
||||
358 | public function toArray() |
||||
359 | { |
||||
360 | return array_values($this->result['documents']); |
||||
361 | } |
||||
362 | |||||
363 | /** |
||||
364 | * Get total number of hits. |
||||
365 | * |
||||
366 | * This can be accessed in Fluid template using `.numFound`. |
||||
367 | * |
||||
368 | * @access public |
||||
369 | * |
||||
370 | * @return int |
||||
371 | */ |
||||
372 | public function getNumFound() |
||||
373 | { |
||||
374 | return $this->result['numFound']; |
||||
375 | } |
||||
376 | |||||
377 | /** |
||||
378 | * Prepares SOLR search. |
||||
379 | * |
||||
380 | * @access public |
||||
381 | * |
||||
382 | * @return void |
||||
383 | */ |
||||
384 | public function prepare() |
||||
385 | { |
||||
386 | // Prepare query parameters. |
||||
387 | $params = []; |
||||
388 | $matches = []; |
||||
389 | $fields = Solr::getFields(); |
||||
390 | $query = ''; |
||||
391 | |||||
392 | // Set search query. |
||||
393 | if ( |
||||
394 | !empty($this->searchParams['fulltext']) |
||||
395 | || preg_match('/' . $fields['fulltext'] . ':\((.*)\)/', trim($this->searchParams['query'] ?? ''), $matches) |
||||
396 | ) { |
||||
397 | // If the query already is a fulltext query e.g using the facets |
||||
398 | $this->searchParams['query'] = empty($matches[1]) ? $this->searchParams['query'] : $matches[1]; |
||||
399 | // Search in fulltext field if applicable. Query must not be empty! |
||||
400 | if (!empty($this->searchParams['query'])) { |
||||
401 | $query = $fields['fulltext'] . ':(' . Solr::escapeQuery(trim($this->searchParams['query'])) . ')'; |
||||
402 | } |
||||
403 | $params['fulltext'] = true; |
||||
404 | } else { |
||||
405 | // Retain given search field if valid. |
||||
406 | if (!empty($this->searchParams['query'])) { |
||||
407 | $query = Solr::escapeQueryKeepField(trim($this->searchParams['query']), $this->settings['storagePid']); |
||||
408 | } |
||||
409 | } |
||||
410 | |||||
411 | // Add extended search query. |
||||
412 | if ( |
||||
413 | !empty($this->searchParams['extQuery']) |
||||
414 | && is_array($this->searchParams['extQuery']) |
||||
415 | ) { |
||||
416 | $allowedOperators = ['AND', 'OR', 'NOT']; |
||||
417 | $numberOfExtQueries = count($this->searchParams['extQuery']); |
||||
418 | for ($i = 0; $i < $numberOfExtQueries; $i++) { |
||||
419 | if (!empty($this->searchParams['extQuery'][$i])) { |
||||
420 | if ( |
||||
421 | in_array($this->searchParams['extOperator'][$i], $allowedOperators) |
||||
422 | ) { |
||||
423 | if (!empty($query)) { |
||||
424 | $query .= ' ' . $this->searchParams['extOperator'][$i] . ' '; |
||||
425 | } |
||||
426 | $query .= Indexer::getIndexFieldName($this->searchParams['extField'][$i], $this->settings['storagePid']) . ':(' . Solr::escapeQuery($this->searchParams['extQuery'][$i]) . ')'; |
||||
427 | } |
||||
428 | } |
||||
429 | } |
||||
430 | } |
||||
431 | |||||
432 | // Add filter query for date search |
||||
433 | if (!empty($this->searchParams['dateFrom']) && !empty($this->searchParams['dateTo'])) { |
||||
434 | // combine dateFrom and dateTo into range search |
||||
435 | $params['filterquery'][]['query'] = '{!join from=' . $fields['uid'] . ' to=' . $fields['uid'] . '}'. $fields['date'] . ':[' . $this->searchParams['dateFrom'] . ' TO ' . $this->searchParams['dateTo'] . ']'; |
||||
436 | } |
||||
437 | |||||
438 | // Add filter query for faceting. |
||||
439 | if (isset($this->searchParams['fq']) && is_array($this->searchParams['fq'])) { |
||||
440 | foreach ($this->searchParams['fq'] as $filterQuery) { |
||||
441 | $params['filterquery'][]['query'] = $filterQuery; |
||||
442 | } |
||||
443 | } |
||||
444 | |||||
445 | // Add filter query for in-document searching. |
||||
446 | if ( |
||||
447 | !empty($this->searchParams['documentId']) |
||||
448 | && MathUtility::canBeInterpretedAsInteger($this->searchParams['documentId']) |
||||
449 | ) { |
||||
450 | // Search in document and all subordinates (valid for up to three levels of hierarchy). |
||||
451 | $params['filterquery'][]['query'] = '_query_:"{!join from=' |
||||
452 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
453 | . $fields['uid'] . ':{!join from=' . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
454 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . '"' . ' OR {!join from=' |
||||
455 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
456 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . ' OR ' |
||||
457 | . $fields['uid'] . ':' . $this->searchParams['documentId']; |
||||
458 | } |
||||
459 | |||||
460 | // if collections are given, we prepare the collection query string |
||||
461 | if (!empty($this->collections)) { |
||||
462 | $params['filterquery'][]['query'] = $this->getCollectionFilterQuery($query); |
||||
463 | } |
||||
464 | |||||
465 | // Set some query parameters. |
||||
466 | $params['query'] = !empty($query) ? $query : '*'; |
||||
467 | |||||
468 | $params['sort'] = $this->getSort(); |
||||
469 | $params['listMetadataRecords'] = []; |
||||
470 | |||||
471 | // Restrict the fields to the required ones. |
||||
472 | $params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type'; |
||||
473 | |||||
474 | if ($this->listedMetadata) { |
||||
475 | foreach ($this->listedMetadata as $metadata) { |
||||
476 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||||
477 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||||
478 | $params['fields'] .= ',' . $listMetadataRecord; |
||||
479 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||||
480 | } |
||||
481 | } |
||||
482 | } |
||||
483 | |||||
484 | $this->params = $params; |
||||
485 | |||||
486 | // Send off query to get total number of search results in advance |
||||
487 | $this->submit(0, 1, false); |
||||
488 | } |
||||
489 | |||||
490 | /** |
||||
491 | * Submits SOLR search. |
||||
492 | * |
||||
493 | * @access public |
||||
494 | * |
||||
495 | * @param int $start |
||||
496 | * @param int $rows |
||||
497 | * @param bool $processResults default value is true |
||||
498 | * |
||||
499 | * @return void |
||||
500 | */ |
||||
501 | public function submit($start, $rows, $processResults = true) |
||||
502 | { |
||||
503 | $params = $this->params; |
||||
504 | $params['start'] = $start; |
||||
505 | $params['rows'] = $rows; |
||||
506 | |||||
507 | // Perform search. |
||||
508 | $result = $this->searchSolr($params, true); |
||||
509 | |||||
510 | // Initialize values |
||||
511 | $documents = []; |
||||
512 | |||||
513 | if ($processResults && $result['numFound'] > 0) { |
||||
514 | // flat array with uids from Solr search |
||||
515 | $documentSet = array_unique(array_column($result['documents'], 'uid')); |
||||
516 | |||||
517 | if (empty($documentSet)) { |
||||
518 | // return nothing found |
||||
519 | $this->result = ['solrResults' => [], 'documents' => [], 'document_keys' => [], 'numFound' => 0]; |
||||
520 | return; |
||||
521 | } |
||||
522 | |||||
523 | // get the Extbase document objects for all uids |
||||
524 | $allDocuments = $this->documentRepository->findAllByUids($documentSet); |
||||
525 | $childrenOf = $this->documentRepository->findChildrenOfEach($documentSet); |
||||
526 | |||||
527 | foreach ($result['documents'] as $doc) { |
||||
528 | if (empty($documents[$doc['uid']]) && isset($allDocuments[$doc['uid']])) { |
||||
529 | $documents[$doc['uid']] = $allDocuments[$doc['uid']]; |
||||
530 | } |
||||
531 | if (isset($documents[$doc['uid']])) { |
||||
532 | $this->translateLanguageCode($doc); |
||||
533 | if ($doc['toplevel'] === false) { |
||||
534 | // this maybe a chapter, article, ..., year |
||||
535 | if ($doc['type'] === 'year') { |
||||
536 | continue; |
||||
537 | } |
||||
538 | if (!empty($doc['page'])) { |
||||
539 | // it's probably a fulltext or metadata search |
||||
540 | $searchResult = []; |
||||
541 | $searchResult['page'] = $doc['page']; |
||||
542 | $searchResult['thumbnail'] = $doc['thumbnail']; |
||||
543 | $searchResult['structure'] = $doc['type']; |
||||
544 | $searchResult['title'] = $doc['title']; |
||||
545 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||||
546 | if (isset($doc['metadata'][$indexName])) { |
||||
547 | $searchResult['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||||
548 | } |
||||
549 | } |
||||
550 | if ($this->searchParams['fulltext'] == '1') { |
||||
551 | $searchResult['snippet'] = $doc['snippet']; |
||||
552 | $searchResult['highlight'] = $doc['highlight']; |
||||
553 | $searchResult['highlight_word'] = preg_replace('/^;|;$/', '', // remove ; at beginning or end |
||||
554 | preg_replace('/;+/', ';', // replace any multiple of ; with a single ; |
||||
555 | preg_replace('/[{~\d*}{\s+}{^=*\d+.*\d*}`~!@#$%\^&*()_|+-=?;:\'",.<>\{\}\[\]\\\]/', ';', $this->searchParams['query']))); // replace search operators and special characters with ; |
||||
556 | } |
||||
557 | $documents[$doc['uid']]['searchResults'][] = $searchResult; |
||||
558 | } |
||||
559 | } else if ($doc['toplevel'] === true) { |
||||
560 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||||
561 | if (isset($doc['metadata'][$indexName])) { |
||||
562 | $documents[$doc['uid']]['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||||
563 | } |
||||
564 | } |
||||
565 | if (!array_key_exists('fulltext', $this->searchParams) || $this->searchParams['fulltext'] != '1') { |
||||
566 | $documents[$doc['uid']]['page'] = 1; |
||||
567 | $children = $childrenOf[$doc['uid']] ?? []; |
||||
568 | |||||
569 | if (!empty($children)) { |
||||
570 | $batchSize = 100; |
||||
571 | $totalChildren = count($children); |
||||
572 | |||||
573 | for ($start = 0; $start < $totalChildren; $start += $batchSize) { |
||||
574 | $batch = array_slice($children, $start, $batchSize, true); |
||||
575 | |||||
576 | // Fetch metadata for the current batch |
||||
577 | $metadataOf = $this->fetchToplevelMetadataFromSolr([ |
||||
578 | 'query' => 'partof:' . $doc['uid'], |
||||
579 | 'start' => $start, |
||||
580 | 'rows' => min($batchSize, $totalChildren - $start), |
||||
581 | ]); |
||||
582 | |||||
583 | foreach ($batch as $docChild) { |
||||
584 | // We need only a few fields from the children, but we need them as an array. |
||||
585 | $childDocument = [ |
||||
586 | 'thumbnail' => $docChild['thumbnail'], |
||||
587 | 'title' => $docChild['title'], |
||||
588 | 'structure' => $docChild['structure'], |
||||
589 | 'metsOrderlabel' => $docChild['metsOrderlabel'], |
||||
590 | 'uid' => $docChild['uid'], |
||||
591 | 'metadata' => $metadataOf[$docChild['uid']], |
||||
592 | ]; |
||||
593 | $documents[$doc['uid']]['children'][$docChild['uid']] = $childDocument; |
||||
594 | } |
||||
595 | } |
||||
596 | } |
||||
597 | } |
||||
598 | } |
||||
599 | } |
||||
600 | } |
||||
601 | } |
||||
602 | |||||
603 | $this->result = ['solrResults' => $result, 'numberOfToplevels' => $result['numberOfToplevels'], 'documents' => $documents, 'document_keys' => array_keys($documents), 'numFound' => $result['numFound']]; |
||||
604 | } |
||||
605 | |||||
606 | /** |
||||
607 | * Find all listed metadata using specified query params. |
||||
608 | * |
||||
609 | * @access protected |
||||
610 | * |
||||
611 | * @param array $queryParams |
||||
612 | * |
||||
613 | * @return array |
||||
614 | */ |
||||
615 | protected function fetchToplevelMetadataFromSolr(array $queryParams): array |
||||
616 | { |
||||
617 | // Prepare query parameters. |
||||
618 | $params = $queryParams; |
||||
619 | $metadataArray = []; |
||||
620 | |||||
621 | // Set some query parameters. |
||||
622 | $params['listMetadataRecords'] = []; |
||||
623 | |||||
624 | // Restrict the fields to the required ones. |
||||
625 | $params['fields'] = 'uid,toplevel'; |
||||
626 | |||||
627 | if ($this->listedMetadata) { |
||||
628 | foreach ($this->listedMetadata as $metadata) { |
||||
629 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||||
630 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||||
631 | $params['fields'] .= ',' . $listMetadataRecord; |
||||
632 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||||
633 | } |
||||
634 | } |
||||
635 | } |
||||
636 | // Set filter query to just get toplevel documents. |
||||
637 | $params['filterquery'][] = ['query' => 'toplevel:true']; |
||||
638 | |||||
639 | // Perform search. |
||||
640 | $result = $this->searchSolr($params, true); |
||||
641 | |||||
642 | foreach ($result['documents'] as $doc) { |
||||
643 | $this->translateLanguageCode($doc); |
||||
644 | $metadataArray[$doc['uid']] = $doc['metadata']; |
||||
645 | } |
||||
646 | |||||
647 | return $metadataArray; |
||||
648 | } |
||||
649 | |||||
650 | /** |
||||
651 | * Processes a search request |
||||
652 | * |
||||
653 | * @access protected |
||||
654 | * |
||||
655 | * @param array $parameters Additional search parameters |
||||
656 | * @param boolean $enableCache Enable caching of Solr requests |
||||
657 | * |
||||
658 | * @return array The Apache Solr Documents that were fetched |
||||
659 | */ |
||||
660 | protected function searchSolr($parameters = [], $enableCache = true) |
||||
661 | { |
||||
662 | // Set query. |
||||
663 | $parameters['query'] = isset($parameters['query']) ? $parameters['query'] : '*'; |
||||
664 | $parameters['filterquery'] = isset($parameters['filterquery']) ? $parameters['filterquery'] : []; |
||||
665 | |||||
666 | // Perform Solr query. |
||||
667 | // Instantiate search object. |
||||
668 | $solr = Solr::getInstance($this->settings['solrcore']); |
||||
669 | if (!$solr->ready) { |
||||
670 | Helper::log('Apache Solr not available', LOG_SEVERITY_ERROR); |
||||
671 | return [ |
||||
672 | 'documents' => [], |
||||
673 | 'numberOfToplevels' => 0, |
||||
674 | 'numFound' => 0, |
||||
675 | ]; |
||||
676 | } |
||||
677 | |||||
678 | $cacheIdentifier = ''; |
||||
679 | $cache = null; |
||||
680 | // Calculate cache identifier. |
||||
681 | if ($enableCache === true) { |
||||
682 | $cacheIdentifier = Helper::digest($solr->core . print_r($parameters, true)); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
683 | $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_solr'); |
||||
684 | } |
||||
685 | $resultSet = [ |
||||
686 | 'documents' => [], |
||||
687 | 'numberOfToplevels' => 0, |
||||
688 | 'numFound' => 0, |
||||
689 | ]; |
||||
690 | if ($enableCache === false || ($entry = $cache->get($cacheIdentifier)) === false) { |
||||
691 | $selectQuery = $solr->service->createSelect($parameters); |
||||
692 | |||||
693 | $edismax = $selectQuery->getEDisMax(); |
||||
694 | |||||
695 | $queryFields = ''; |
||||
696 | |||||
697 | if ($this->indexedMetadata) { |
||||
698 | foreach ($this->indexedMetadata as $metadata) { |
||||
699 | if ($metadata->getIndexIndexed()) { |
||||
700 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . 'i'; |
||||
701 | $queryFields .= $listMetadataRecord . '^' . $metadata->getIndexBoost() . ' '; |
||||
702 | } |
||||
703 | } |
||||
704 | } |
||||
705 | |||||
706 | $edismax->setQueryFields($queryFields); |
||||
707 | |||||
708 | $grouping = $selectQuery->getGrouping(); |
||||
709 | $grouping->addField('uid'); |
||||
710 | $grouping->setLimit(100); // Results in group (TODO: check) |
||||
711 | $grouping->setNumberOfGroups(true); |
||||
712 | |||||
713 | $fulltextExists = $parameters['fulltext'] ?? false; |
||||
714 | if ($fulltextExists === true) { |
||||
715 | // get highlighting component and apply settings |
||||
716 | $selectQuery->getHighlighting(); |
||||
717 | } |
||||
718 | |||||
719 | $solrRequest = $solr->service->createRequest($selectQuery); |
||||
720 | |||||
721 | if ($fulltextExists === true) { |
||||
722 | // If it is a fulltext search, enable highlighting. |
||||
723 | // field for which highlighting is going to be performed, |
||||
724 | // is required if you want to have OCR highlighting |
||||
725 | $solrRequest->addParam('hl.ocr.fl', 'fulltext'); |
||||
726 | // return the coordinates of highlighted search as absolute coordinates |
||||
727 | $solrRequest->addParam('hl.ocr.absoluteHighlights', 'on'); |
||||
728 | // max amount of snippets for a single page |
||||
729 | $solrRequest->addParam('hl.snippets', '20'); |
||||
730 | // we store the fulltext on page level and can disable this option |
||||
731 | $solrRequest->addParam('hl.ocr.trackPages', 'off'); |
||||
732 | } |
||||
733 | |||||
734 | // Perform search for all documents with the same uid that either fit to the search or marked as toplevel. |
||||
735 | $response = $solr->service->executeRequest($solrRequest); |
||||
736 | // return empty resultSet on error-response |
||||
737 | if ($response->getStatusCode() == 400) { |
||||
738 | return $resultSet; |
||||
739 | } |
||||
740 | $result = $solr->service->createResult($selectQuery, $response); |
||||
741 | |||||
742 | // TODO: Call to an undefined method Solarium\Core\Query\Result\ResultInterface::getGrouping(). |
||||
743 | // @phpstan-ignore-next-line |
||||
744 | $uidGroup = $result->getGrouping()->getGroup('uid'); |
||||
0 ignored issues
–
show
The method
getGrouping() does not exist on Solarium\Core\Query\Result\ResultInterface . It seems like you code against a sub-type of Solarium\Core\Query\Result\ResultInterface such as Solarium\QueryType\Select\Result\Result .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
745 | $resultSet['numberOfToplevels'] = $uidGroup->getNumberOfGroups(); |
||||
746 | $resultSet['numFound'] = $uidGroup->getMatches(); |
||||
747 | $highlighting = []; |
||||
748 | if ($fulltextExists === true) { |
||||
749 | $data = $result->getData(); |
||||
750 | $highlighting = $data['ocrHighlighting']; |
||||
751 | } |
||||
752 | $fields = Solr::getFields(); |
||||
753 | |||||
754 | foreach ($uidGroup as $group) { |
||||
755 | foreach ($group as $record) { |
||||
756 | $resultSet['documents'][] = $this->getDocument($record, $highlighting, $fields, $parameters); |
||||
757 | } |
||||
758 | } |
||||
759 | |||||
760 | // Save value in cache. |
||||
761 | if (!empty($resultSet['documents']) && $enableCache === true) { |
||||
762 | $cache->set($cacheIdentifier, $resultSet); |
||||
763 | } |
||||
764 | } else { |
||||
765 | // Return cache hit. |
||||
766 | $resultSet = $entry; |
||||
767 | } |
||||
768 | return $resultSet; |
||||
769 | } |
||||
770 | |||||
771 | /** |
||||
772 | * Get collection filter query for search. |
||||
773 | * |
||||
774 | * @access private |
||||
775 | * |
||||
776 | * @param string $query |
||||
777 | * |
||||
778 | * @return string |
||||
779 | */ |
||||
780 | private function getCollectionFilterQuery(string $query) : string |
||||
781 | { |
||||
782 | $collectionsQueryString = ''; |
||||
783 | $virtualCollectionsQueryString = ''; |
||||
784 | foreach ($this->collections as $collection) { |
||||
785 | // check for virtual collections query string |
||||
786 | if ($collection->getIndexSearch()) { |
||||
787 | $virtualCollectionsQueryString .= empty($virtualCollectionsQueryString) ? '(' . $collection->getIndexSearch() . ')' : ' OR (' . $collection->getIndexSearch() . ')'; |
||||
788 | } else { |
||||
789 | $collectionsQueryString .= empty($collectionsQueryString) ? '"' . $collection->getIndexName() . '"' : ' OR "' . $collection->getIndexName() . '"'; |
||||
790 | } |
||||
791 | } |
||||
792 | |||||
793 | // distinguish between simple collection browsing and actual searching within the collection(s) |
||||
794 | if (!empty($collectionsQueryString)) { |
||||
795 | if (empty($query)) { |
||||
796 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . ') AND toplevel:true AND partof:0)'; |
||||
797 | } else { |
||||
798 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . '))'; |
||||
799 | } |
||||
800 | } |
||||
801 | |||||
802 | // virtual collections might query documents that are neither toplevel:true nor partof:0 and need to be searched separately |
||||
803 | if (!empty($virtualCollectionsQueryString)) { |
||||
804 | $virtualCollectionsQueryString = '(' . $virtualCollectionsQueryString . ')'; |
||||
805 | } |
||||
806 | |||||
807 | // combine both query strings into a single filterquery via OR if both are given, otherwise pass either of those |
||||
808 | return implode(' OR ', array_filter([$collectionsQueryString, $virtualCollectionsQueryString])); |
||||
809 | } |
||||
810 | |||||
811 | /** |
||||
812 | * Get sort order of the results as given or by title as default. |
||||
813 | * |
||||
814 | * @access private |
||||
815 | * |
||||
816 | * @return array |
||||
817 | */ |
||||
818 | private function getSort() : array |
||||
819 | { |
||||
820 | if (!empty($this->searchParams['orderBy'])) { |
||||
821 | return [ |
||||
822 | $this->searchParams['orderBy'] => $this->searchParams['order'], |
||||
823 | ]; |
||||
824 | } |
||||
825 | |||||
826 | return [ |
||||
827 | 'score' => 'desc', |
||||
828 | 'year_sorting' => 'asc', |
||||
829 | 'title_sorting' => 'asc', |
||||
830 | 'volume_sorting' => 'asc' |
||||
831 | ]; |
||||
832 | } |
||||
833 | |||||
834 | /** |
||||
835 | * Gets a document |
||||
836 | * |
||||
837 | * @access private |
||||
838 | * |
||||
839 | * @param Document $record |
||||
840 | * @param array $highlighting |
||||
841 | * @param array $fields |
||||
842 | * @param array $parameters |
||||
843 | * |
||||
844 | * @return array The Apache Solr Documents that were fetched |
||||
845 | */ |
||||
846 | private function getDocument(Document $record, array $highlighting, array $fields, $parameters) { |
||||
847 | $resultDocument = new ResultDocument($record, $highlighting, $fields); |
||||
848 | |||||
849 | $document = [ |
||||
850 | 'id' => $resultDocument->getId(), |
||||
851 | 'page' => $resultDocument->getPage(), |
||||
852 | 'snippet' => $resultDocument->getSnippets(), |
||||
853 | 'thumbnail' => $resultDocument->getThumbnail(), |
||||
854 | 'title' => $resultDocument->getTitle(), |
||||
855 | 'toplevel' => $resultDocument->getToplevel(), |
||||
856 | 'type' => $resultDocument->getType(), |
||||
857 | 'uid' => !empty($resultDocument->getUid()) ? $resultDocument->getUid() : $parameters['uid'], |
||||
858 | 'highlight' => $resultDocument->getHighlightsIds(), |
||||
859 | ]; |
||||
860 | |||||
861 | foreach ($parameters['listMetadataRecords'] as $indexName => $solrField) { |
||||
862 | if (!empty($record->$solrField)) { |
||||
863 | $document['metadata'][$indexName] = $record->$solrField; |
||||
864 | } |
||||
865 | } |
||||
866 | |||||
867 | return $document; |
||||
868 | } |
||||
869 | |||||
870 | /** |
||||
871 | * Translate language code if applicable. |
||||
872 | * |
||||
873 | * @access private |
||||
874 | * |
||||
875 | * @param &$doc document array |
||||
876 | * |
||||
877 | * @return void |
||||
878 | */ |
||||
879 | private function translateLanguageCode(&$doc): void |
||||
880 | { |
||||
881 | if (is_array($doc['metadata']) && array_key_exists('language', $doc['metadata'])) { |
||||
882 | foreach($doc['metadata']['language'] as $indexName => $language) { |
||||
883 | $doc['metadata']['language'][$indexName] = Helper::getLanguageName($language); |
||||
884 | } |
||||
885 | } |
||||
886 | } |
||||
887 | } |
||||
888 |