We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
1 | <?php |
||||
2 | |||||
3 | namespace Kitodo\Dlf\Common\Solr; |
||||
4 | |||||
5 | use Kitodo\Dlf\Common\AbstractDocument; |
||||
6 | use Kitodo\Dlf\Common\Helper; |
||||
7 | use Kitodo\Dlf\Common\Indexer; |
||||
8 | use Kitodo\Dlf\Common\Solr\SearchResult\ResultDocument; |
||||
9 | use Kitodo\Dlf\Domain\Repository\DocumentRepository; |
||||
10 | use Solarium\QueryType\Select\Result\Document; |
||||
11 | use TYPO3\CMS\Core\Cache\CacheManager; |
||||
12 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||
13 | use TYPO3\CMS\Core\Utility\MathUtility; |
||||
14 | use TYPO3\CMS\Extbase\Persistence\Generic\QueryResult; |
||||
15 | use TYPO3\CMS\Extbase\Persistence\QueryResultInterface; |
||||
16 | |||||
17 | /** |
||||
18 | * Targeted towards being used in ``PaginateController`` (``<f:widget.paginate>``). |
||||
19 | * |
||||
20 | * Notes on implementation: |
||||
21 | * - `Countable`: `count()` returns the number of toplevel documents. |
||||
22 | * - `getNumLoadedDocuments()`: Number of toplevel documents that have been fetched from Solr. |
||||
23 | * - `ArrayAccess`/`Iterator`: Access *fetched* toplevel documents indexed in order of their ranking. |
||||
24 | * |
||||
25 | * @package TYPO3 |
||||
26 | * @subpackage dlf |
||||
27 | * |
||||
28 | * @access public |
||||
29 | */ |
||||
30 | class SolrSearch implements \Countable, \Iterator, \ArrayAccess, QueryResultInterface |
||||
31 | { |
||||
32 | /** |
||||
33 | * @access private |
||||
34 | * @var DocumentRepository |
||||
35 | */ |
||||
36 | private DocumentRepository $documentRepository; |
||||
37 | |||||
38 | /** |
||||
39 | * @access private |
||||
40 | * @var array|QueryResultInterface |
||||
41 | */ |
||||
42 | private $collections; |
||||
43 | |||||
44 | /** |
||||
45 | * @access private |
||||
46 | * @var array |
||||
47 | */ |
||||
48 | private array $settings; |
||||
49 | |||||
50 | /** |
||||
51 | * @access private |
||||
52 | * @var array |
||||
53 | */ |
||||
54 | private array $searchParams; |
||||
55 | |||||
56 | /** |
||||
57 | * @access private |
||||
58 | * @var QueryResult|null |
||||
59 | */ |
||||
60 | private ?QueryResult $listedMetadata; |
||||
61 | |||||
62 | /** |
||||
63 | * @access private |
||||
64 | * @var array |
||||
65 | */ |
||||
66 | private array $params; |
||||
67 | |||||
68 | /** |
||||
69 | * @access private |
||||
70 | * @var array |
||||
71 | */ |
||||
72 | private $result; |
||||
73 | |||||
74 | /** |
||||
75 | * @access private |
||||
76 | * @var int |
||||
77 | */ |
||||
78 | protected int $position = 0; |
||||
79 | |||||
80 | /** |
||||
81 | * Constructs SolrSearch instance. |
||||
82 | * |
||||
83 | * @access public |
||||
84 | * |
||||
85 | * @param DocumentRepository $documentRepository |
||||
86 | * @param array|QueryResultInterface $collections can contain 0, 1 or many Collection objects |
||||
87 | * @param array $settings |
||||
88 | * @param array $searchParams |
||||
89 | * @param QueryResult $listedMetadata |
||||
90 | * |
||||
91 | * @return void |
||||
92 | */ |
||||
93 | public function __construct(DocumentRepository $documentRepository, $collections, array $settings, array $searchParams, QueryResult $listedMetadata = null) |
||||
94 | { |
||||
95 | $this->documentRepository = $documentRepository; |
||||
96 | $this->collections = $collections; |
||||
97 | $this->settings = $settings; |
||||
98 | $this->searchParams = $searchParams; |
||||
99 | $this->listedMetadata = $listedMetadata; |
||||
100 | } |
||||
101 | |||||
102 | /** |
||||
103 | * Gets amount of loaded documents. |
||||
104 | * |
||||
105 | * @access public |
||||
106 | * |
||||
107 | * @return int |
||||
108 | */ |
||||
109 | public function getNumLoadedDocuments(): int |
||||
110 | { |
||||
111 | return count($this->result['documents']); |
||||
112 | } |
||||
113 | |||||
114 | /** |
||||
115 | * Count results. |
||||
116 | * |
||||
117 | * @access public |
||||
118 | * |
||||
119 | * @return int |
||||
120 | */ |
||||
121 | public function count(): int |
||||
122 | { |
||||
123 | if ($this->result === null) { |
||||
124 | return 0; |
||||
125 | } |
||||
126 | |||||
127 | return $this->result['numberOfToplevels']; |
||||
128 | } |
||||
129 | |||||
130 | /** |
||||
131 | * Current result. |
||||
132 | * |
||||
133 | * @access public |
||||
134 | * |
||||
135 | * @return array |
||||
136 | */ |
||||
137 | public function current(): array |
||||
138 | { |
||||
139 | return $this[$this->position]; |
||||
140 | } |
||||
141 | |||||
142 | /** |
||||
143 | * Current key. |
||||
144 | * |
||||
145 | * @access public |
||||
146 | * |
||||
147 | * @return int |
||||
148 | */ |
||||
149 | public function key(): int |
||||
150 | { |
||||
151 | return $this->position; |
||||
152 | } |
||||
153 | |||||
154 | /** |
||||
155 | * Next key. |
||||
156 | * |
||||
157 | * @access public |
||||
158 | * |
||||
159 | * @return void |
||||
160 | */ |
||||
161 | public function next(): void |
||||
162 | { |
||||
163 | $this->position++; |
||||
164 | } |
||||
165 | |||||
166 | /** |
||||
167 | * First key. |
||||
168 | * |
||||
169 | * @access public |
||||
170 | * |
||||
171 | * @return void |
||||
172 | */ |
||||
173 | public function rewind(): void |
||||
174 | { |
||||
175 | $this->position = 0; |
||||
176 | } |
||||
177 | |||||
178 | /** |
||||
179 | * @access public |
||||
180 | * |
||||
181 | * @return bool |
||||
182 | */ |
||||
183 | public function valid(): bool |
||||
184 | { |
||||
185 | return isset($this[$this->position]); |
||||
186 | } |
||||
187 | |||||
188 | /** |
||||
189 | * Checks if the document with given offset exists. |
||||
190 | * |
||||
191 | * @access public |
||||
192 | * |
||||
193 | * @param int $offset |
||||
194 | * |
||||
195 | * @return bool |
||||
196 | */ |
||||
197 | public function offsetExists($offset): bool |
||||
198 | { |
||||
199 | $idx = $this->result['document_keys'][$offset]; |
||||
200 | return isset($this->result['documents'][$idx]); |
||||
201 | } |
||||
202 | |||||
203 | /** |
||||
204 | * Gets the document with given offset. |
||||
205 | * |
||||
206 | * @access public |
||||
207 | * |
||||
208 | * @param int $offset |
||||
209 | * |
||||
210 | * @return mixed |
||||
211 | */ |
||||
212 | #[\ReturnTypeWillChange] |
||||
213 | public function offsetGet($offset) |
||||
214 | { |
||||
215 | $idx = $this->result['document_keys'][$offset]; |
||||
216 | $document = $this->result['documents'][$idx] ?? null; |
||||
217 | |||||
218 | if ($document !== null) { |
||||
219 | // It may happen that a Solr group only includes non-toplevel results, |
||||
220 | // in which case metadata of toplevel entry isn't yet filled. |
||||
221 | if (empty($document['metadata'])) { |
||||
222 | $document['metadata'] = $this->fetchToplevelMetadataFromSolr([ |
||||
223 | 'query' => 'uid:' . $document['uid'], |
||||
224 | 'start' => 0, |
||||
225 | 'rows' => 1, |
||||
226 | 'sort' => ['score' => 'desc'], |
||||
227 | ])[$document['uid']] ?? []; |
||||
228 | } |
||||
229 | |||||
230 | // get title of parent/grandparent/... if empty |
||||
231 | if (empty($document['title']) && $document['partOf'] > 0) { |
||||
232 | $superiorTitle = AbstractDocument::getTitle($document['partOf'], true); |
||||
233 | if (!empty($superiorTitle)) { |
||||
234 | $document['title'] = '[' . $superiorTitle . ']'; |
||||
235 | } |
||||
236 | } |
||||
237 | } |
||||
238 | |||||
239 | return $document; |
||||
240 | } |
||||
241 | |||||
242 | /** |
||||
243 | * Not supported. |
||||
244 | * |
||||
245 | * @access public |
||||
246 | * |
||||
247 | * @param int $offset |
||||
248 | * @param int $value |
||||
249 | * |
||||
250 | * @return void |
||||
251 | * |
||||
252 | * @throws \Exception |
||||
253 | */ |
||||
254 | public function offsetSet($offset, $value): void |
||||
255 | { |
||||
256 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||||
257 | } |
||||
258 | |||||
259 | /** |
||||
260 | * Not supported. |
||||
261 | * |
||||
262 | * @access public |
||||
263 | * |
||||
264 | * @param int $offset |
||||
265 | * |
||||
266 | * @return void |
||||
267 | * |
||||
268 | * @throws \Exception |
||||
269 | */ |
||||
270 | public function offsetUnset($offset): void |
||||
271 | { |
||||
272 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||||
273 | } |
||||
274 | |||||
275 | /** |
||||
276 | * Gets SOLR results. |
||||
277 | * |
||||
278 | * @access public |
||||
279 | * |
||||
280 | * @return mixed |
||||
281 | */ |
||||
282 | public function getSolrResults() |
||||
283 | { |
||||
284 | return $this->result['solrResults']; |
||||
285 | } |
||||
286 | |||||
287 | /** |
||||
288 | * Gets by UID. |
||||
289 | * |
||||
290 | * @access public |
||||
291 | * |
||||
292 | * @param int $uid |
||||
293 | * |
||||
294 | * @return mixed |
||||
295 | */ |
||||
296 | public function getByUid($uid) |
||||
297 | { |
||||
298 | return $this->result['documents'][$uid]; |
||||
299 | } |
||||
300 | |||||
301 | /** |
||||
302 | * Gets query. |
||||
303 | * |
||||
304 | * @access public |
||||
305 | * |
||||
306 | * @return SolrSearchQuery |
||||
307 | */ |
||||
308 | public function getQuery() |
||||
309 | { |
||||
310 | return new SolrSearchQuery($this); |
||||
311 | } |
||||
312 | |||||
313 | /** |
||||
314 | * Gets first. |
||||
315 | * |
||||
316 | * @access public |
||||
317 | * |
||||
318 | * @return SolrSearch |
||||
319 | */ |
||||
320 | public function getFirst() |
||||
321 | { |
||||
322 | return $this[0]; |
||||
323 | } |
||||
324 | |||||
325 | /** |
||||
326 | * Parses results to array. |
||||
327 | * |
||||
328 | * @access public |
||||
329 | * |
||||
330 | * @return array |
||||
331 | */ |
||||
332 | public function toArray() |
||||
333 | { |
||||
334 | return array_values($this->result['documents']); |
||||
335 | } |
||||
336 | |||||
337 | /** |
||||
338 | * Get total number of hits. |
||||
339 | * |
||||
340 | * This can be accessed in Fluid template using `.numFound`. |
||||
341 | * |
||||
342 | * @access public |
||||
343 | * |
||||
344 | * @return int |
||||
345 | */ |
||||
346 | public function getNumFound() |
||||
347 | { |
||||
348 | return $this->result['numFound']; |
||||
349 | } |
||||
350 | |||||
351 | /** |
||||
352 | * Prepares SOLR search. |
||||
353 | * |
||||
354 | * @access public |
||||
355 | * |
||||
356 | * @return void |
||||
357 | */ |
||||
358 | public function prepare() |
||||
359 | { |
||||
360 | // Prepare query parameters. |
||||
361 | $params = []; |
||||
362 | $matches = []; |
||||
363 | $fields = Solr::getFields(); |
||||
364 | $query = ''; |
||||
365 | |||||
366 | // Set search query. |
||||
367 | if ( |
||||
368 | (!empty($this->searchParams['fulltext'])) |
||||
369 | || preg_match('/' . $fields['fulltext'] . ':\((.*)\)/', trim($this->searchParams['query']), $matches) |
||||
370 | ) { |
||||
371 | // If the query already is a fulltext query e.g using the facets |
||||
372 | $this->searchParams['query'] = empty($matches[1]) ? $this->searchParams['query'] : $matches[1]; |
||||
373 | // Search in fulltext field if applicable. Query must not be empty! |
||||
374 | if (!empty($this->searchParams['query'])) { |
||||
375 | $query = $fields['fulltext'] . ':(' . Solr::escapeQuery(trim($this->searchParams['query'])) . ')'; |
||||
376 | } |
||||
377 | $params['fulltext'] = true; |
||||
378 | } else { |
||||
379 | // Retain given search field if valid. |
||||
380 | if (!empty($this->searchParams['query'])) { |
||||
381 | $query = Solr::escapeQueryKeepField(trim($this->searchParams['query']), $this->settings['storagePid']); |
||||
382 | } |
||||
383 | } |
||||
384 | |||||
385 | // Add extended search query. |
||||
386 | if ( |
||||
387 | !empty($this->searchParams['extQuery']) |
||||
388 | && is_array($this->searchParams['extQuery']) |
||||
389 | ) { |
||||
390 | $allowedOperators = ['AND', 'OR', 'NOT']; |
||||
391 | $numberOfExtQueries = count($this->searchParams['extQuery']); |
||||
392 | for ($i = 0; $i < $numberOfExtQueries; $i++) { |
||||
393 | if (!empty($this->searchParams['extQuery'][$i])) { |
||||
394 | if ( |
||||
395 | in_array($this->searchParams['extOperator'][$i], $allowedOperators) |
||||
396 | ) { |
||||
397 | if (!empty($query)) { |
||||
398 | $query .= ' ' . $this->searchParams['extOperator'][$i] . ' '; |
||||
399 | } |
||||
400 | $query .= Indexer::getIndexFieldName($this->searchParams['extField'][$i], $this->settings['storagePid']) . ':(' . Solr::escapeQuery($this->searchParams['extQuery'][$i]) . ')'; |
||||
401 | } |
||||
402 | } |
||||
403 | } |
||||
404 | } |
||||
405 | |||||
406 | // Add filter query for date search |
||||
407 | if (!empty($this->searchParams['dateFrom']) && !empty($this->searchParams['dateTo'])) { |
||||
408 | // combine dateFrom and dateTo into range search |
||||
409 | $params['filterquery'][]['query'] = '{!join from=' . $fields['uid'] . ' to=' . $fields['uid'] . '}'. $fields['date'] . ':[' . $this->searchParams['dateFrom'] . ' TO ' . $this->searchParams['dateTo'] . ']'; |
||||
410 | } |
||||
411 | |||||
412 | // Add filter query for faceting. |
||||
413 | if (isset($this->searchParams['fq']) && is_array($this->searchParams['fq'])) { |
||||
414 | foreach ($this->searchParams['fq'] as $filterQuery) { |
||||
415 | $params['filterquery'][]['query'] = $filterQuery; |
||||
416 | } |
||||
417 | } |
||||
418 | |||||
419 | // Add filter query for in-document searching. |
||||
420 | if ( |
||||
421 | !empty($this->searchParams['documentId']) |
||||
422 | && MathUtility::canBeInterpretedAsInteger($this->searchParams['documentId']) |
||||
423 | ) { |
||||
424 | // Search in document and all subordinates (valid for up to three levels of hierarchy). |
||||
425 | $params['filterquery'][]['query'] = '_query_:"{!join from=' |
||||
426 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
427 | . $fields['uid'] . ':{!join from=' . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
428 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . '"' . ' OR {!join from=' |
||||
429 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||||
430 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . ' OR ' |
||||
431 | . $fields['uid'] . ':' . $this->searchParams['documentId']; |
||||
432 | } |
||||
433 | |||||
434 | // if collections are given, we prepare the collection query string |
||||
435 | if (!empty($this->collections)) { |
||||
436 | $params['filterquery'][]['query'] = $this->getCollectionFilterQuery($query); |
||||
437 | } |
||||
438 | |||||
439 | // Set some query parameters. |
||||
440 | $params['query'] = !empty($query) ? $query : '*'; |
||||
441 | |||||
442 | $params['sort'] = $this->getSort(); |
||||
443 | $params['listMetadataRecords'] = []; |
||||
444 | |||||
445 | // Restrict the fields to the required ones. |
||||
446 | $params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type'; |
||||
447 | |||||
448 | if ($this->listedMetadata) { |
||||
449 | foreach ($this->listedMetadata as $metadata) { |
||||
450 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||||
451 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||||
452 | $params['fields'] .= ',' . $listMetadataRecord; |
||||
453 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||||
454 | } |
||||
455 | } |
||||
456 | } |
||||
457 | |||||
458 | $this->params = $params; |
||||
459 | |||||
460 | // Send off query to get total number of search results in advance |
||||
461 | $this->submit(0, 1, false); |
||||
462 | } |
||||
463 | |||||
464 | /** |
||||
465 | * Submits SOLR search. |
||||
466 | * |
||||
467 | * @access public |
||||
468 | * |
||||
469 | * @param int $start |
||||
470 | * @param int $rows |
||||
471 | * @param bool $processResults default value is true |
||||
472 | * |
||||
473 | * @return void |
||||
474 | */ |
||||
475 | public function submit($start, $rows, $processResults = true) |
||||
476 | { |
||||
477 | $params = $this->params; |
||||
478 | $params['start'] = $start; |
||||
479 | $params['rows'] = $rows; |
||||
480 | |||||
481 | // Perform search. |
||||
482 | $result = $this->searchSolr($params, true); |
||||
483 | |||||
484 | // Initialize values |
||||
485 | $documents = []; |
||||
486 | |||||
487 | if ($processResults && $result['numFound'] > 0) { |
||||
488 | // flat array with uids from Solr search |
||||
489 | $documentSet = array_unique(array_column($result['documents'], 'uid')); |
||||
490 | |||||
491 | if (empty($documentSet)) { |
||||
492 | // return nothing found |
||||
493 | $this->result = ['solrResults' => [], 'documents' => [], 'document_keys' => [], 'numFound' => 0]; |
||||
494 | return; |
||||
495 | } |
||||
496 | |||||
497 | // get the Extbase document objects for all uids |
||||
498 | $allDocuments = $this->documentRepository->findAllByUids($documentSet); |
||||
499 | $childrenOf = $this->documentRepository->findChildrenOfEach($documentSet); |
||||
500 | |||||
501 | foreach ($result['documents'] as $doc) { |
||||
502 | if (empty($documents[$doc['uid']]) && $allDocuments[$doc['uid']]) { |
||||
503 | $documents[$doc['uid']] = $allDocuments[$doc['uid']]; |
||||
504 | } |
||||
505 | if ($documents[$doc['uid']]) { |
||||
506 | $this->translateLanguageCode($doc); |
||||
507 | if ($doc['toplevel'] === false) { |
||||
508 | // this maybe a chapter, article, ..., year |
||||
509 | if ($doc['type'] === 'year') { |
||||
510 | continue; |
||||
511 | } |
||||
512 | if (!empty($doc['page'])) { |
||||
513 | // it's probably a fulltext or metadata search |
||||
514 | $searchResult = []; |
||||
515 | $searchResult['page'] = $doc['page']; |
||||
516 | $searchResult['thumbnail'] = $doc['thumbnail']; |
||||
517 | $searchResult['structure'] = $doc['type']; |
||||
518 | $searchResult['title'] = $doc['title']; |
||||
519 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||||
520 | if (isset($doc['metadata'][$indexName])) { |
||||
521 | $searchResult['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||||
522 | } |
||||
523 | } |
||||
524 | if ($this->searchParams['fulltext'] == '1') { |
||||
525 | $searchResult['snippet'] = $doc['snippet']; |
||||
526 | $searchResult['highlight'] = $doc['highlight']; |
||||
527 | $searchResult['highlight_word'] = preg_replace('/^;|;$/', '', // remove ; at beginning or end |
||||
528 | preg_replace('/;+/', ';', // replace any multiple of ; with a single ; |
||||
529 | preg_replace('/[{~\d*}{\s+}{^=*\d+.*\d*}`~!@#$%\^&*()_|+-=?;:\'",.<>\{\}\[\]\\\]/', ';', $this->searchParams['query']))); // replace search operators and special characters with ; |
||||
530 | } |
||||
531 | $documents[$doc['uid']]['searchResults'][] = $searchResult; |
||||
532 | } |
||||
533 | } else if ($doc['toplevel'] === true) { |
||||
534 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||||
535 | if (isset($doc['metadata'][$indexName])) { |
||||
536 | $documents[$doc['uid']]['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||||
537 | } |
||||
538 | } |
||||
539 | if ($this->searchParams['fulltext'] != '1') { |
||||
540 | $documents[$doc['uid']]['page'] = 1; |
||||
541 | $children = $childrenOf[$doc['uid']] ?? []; |
||||
542 | |||||
543 | if (!empty($children)) { |
||||
544 | $batchSize = 100; |
||||
545 | $totalChildren = count($children); |
||||
546 | |||||
547 | for ($start = 0; $start < $totalChildren; $start += $batchSize) { |
||||
548 | $batch = array_slice($children, $start, $batchSize, true); |
||||
549 | |||||
550 | // Fetch metadata for the current batch |
||||
551 | $metadataOf = $this->fetchToplevelMetadataFromSolr([ |
||||
552 | 'query' => 'partof:' . $doc['uid'], |
||||
553 | 'start' => $start, |
||||
554 | 'rows' => min($batchSize, $totalChildren - $start), |
||||
555 | ]); |
||||
556 | |||||
557 | foreach ($batch as $docChild) { |
||||
558 | // We need only a few fields from the children, but we need them as an array. |
||||
559 | $childDocument = [ |
||||
560 | 'thumbnail' => $docChild['thumbnail'], |
||||
561 | 'title' => $docChild['title'], |
||||
562 | 'structure' => $docChild['structure'], |
||||
563 | 'metsOrderlabel' => $docChild['metsOrderlabel'], |
||||
564 | 'uid' => $docChild['uid'], |
||||
565 | 'metadata' => $metadataOf[$docChild['uid']], |
||||
566 | ]; |
||||
567 | $documents[$doc['uid']]['children'][$docChild['uid']] = $childDocument; |
||||
568 | } |
||||
569 | } |
||||
570 | } |
||||
571 | } |
||||
572 | } |
||||
573 | } |
||||
574 | } |
||||
575 | } |
||||
576 | |||||
577 | $this->result = ['solrResults' => $result, 'numberOfToplevels' => $result['numberOfToplevels'], 'documents' => $documents, 'document_keys' => array_keys($documents), 'numFound' => $result['numFound']]; |
||||
578 | } |
||||
579 | |||||
580 | /** |
||||
581 | * Find all listed metadata using specified query params. |
||||
582 | * |
||||
583 | * @access protected |
||||
584 | * |
||||
585 | * @param array $queryParams |
||||
586 | * |
||||
587 | * @return array |
||||
588 | */ |
||||
589 | protected function fetchToplevelMetadataFromSolr(array $queryParams): array |
||||
590 | { |
||||
591 | // Prepare query parameters. |
||||
592 | $params = $queryParams; |
||||
593 | $metadataArray = []; |
||||
594 | |||||
595 | // Set some query parameters. |
||||
596 | $params['listMetadataRecords'] = []; |
||||
597 | |||||
598 | // Restrict the fields to the required ones. |
||||
599 | $params['fields'] = 'uid,toplevel'; |
||||
600 | |||||
601 | if ($this->listedMetadata) { |
||||
602 | foreach ($this->listedMetadata as $metadata) { |
||||
603 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||||
604 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||||
605 | $params['fields'] .= ',' . $listMetadataRecord; |
||||
606 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||||
607 | } |
||||
608 | } |
||||
609 | } |
||||
610 | // Set filter query to just get toplevel documents. |
||||
611 | $params['filterquery'][] = ['query' => 'toplevel:true']; |
||||
612 | |||||
613 | // Perform search. |
||||
614 | $result = $this->searchSolr($params, true); |
||||
615 | |||||
616 | foreach ($result['documents'] as $doc) { |
||||
617 | $this->translateLanguageCode($doc); |
||||
618 | $metadataArray[$doc['uid']] = $doc['metadata']; |
||||
619 | } |
||||
620 | |||||
621 | return $metadataArray; |
||||
622 | } |
||||
623 | |||||
624 | /** |
||||
625 | * Processes a search request |
||||
626 | * |
||||
627 | * @access protected |
||||
628 | * |
||||
629 | * @param array $parameters Additional search parameters |
||||
630 | * @param boolean $enableCache Enable caching of Solr requests |
||||
631 | * |
||||
632 | * @return array The Apache Solr Documents that were fetched |
||||
633 | */ |
||||
634 | protected function searchSolr($parameters = [], $enableCache = true) |
||||
635 | { |
||||
636 | // Set query. |
||||
637 | $parameters['query'] = isset($parameters['query']) ? $parameters['query'] : '*'; |
||||
638 | $parameters['filterquery'] = isset($parameters['filterquery']) ? $parameters['filterquery'] : []; |
||||
639 | |||||
640 | // Perform Solr query. |
||||
641 | // Instantiate search object. |
||||
642 | $solr = Solr::getInstance($this->settings['solrcore']); |
||||
643 | if (!$solr->ready) { |
||||
644 | Helper::log('Apache Solr not available', LOG_SEVERITY_ERROR); |
||||
645 | return [ |
||||
646 | 'documents' => [], |
||||
647 | 'numberOfToplevels' => 0, |
||||
648 | 'numFound' => 0, |
||||
649 | ]; |
||||
650 | } |
||||
651 | |||||
652 | $cacheIdentifier = ''; |
||||
653 | $cache = null; |
||||
654 | // Calculate cache identifier. |
||||
655 | if ($enableCache === true) { |
||||
656 | $cacheIdentifier = Helper::digest($solr->core . print_r($parameters, true)); |
||||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||||
657 | $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_solr'); |
||||
658 | } |
||||
659 | $resultSet = [ |
||||
660 | 'documents' => [], |
||||
661 | 'numberOfToplevels' => 0, |
||||
662 | 'numFound' => 0, |
||||
663 | ]; |
||||
664 | if ($enableCache === false || ($entry = $cache->get($cacheIdentifier)) === false) { |
||||
665 | $selectQuery = $solr->service->createSelect($parameters); |
||||
666 | |||||
667 | $grouping = $selectQuery->getGrouping(); |
||||
668 | $grouping->addField('uid'); |
||||
669 | $grouping->setLimit(100); // Results in group (TODO: check) |
||||
670 | $grouping->setNumberOfGroups(true); |
||||
671 | |||||
672 | if ($parameters['fulltext'] === true) { |
||||
673 | // get highlighting component and apply settings |
||||
674 | $selectQuery->getHighlighting(); |
||||
675 | } |
||||
676 | |||||
677 | $solrRequest = $solr->service->createRequest($selectQuery); |
||||
678 | |||||
679 | if ($parameters['fulltext'] === true) { |
||||
680 | // If it is a fulltext search, enable highlighting. |
||||
681 | // field for which highlighting is going to be performed, |
||||
682 | // is required if you want to have OCR highlighting |
||||
683 | $solrRequest->addParam('hl.ocr.fl', 'fulltext'); |
||||
684 | // return the coordinates of highlighted search as absolute coordinates |
||||
685 | $solrRequest->addParam('hl.ocr.absoluteHighlights', 'on'); |
||||
686 | // max amount of snippets for a single page |
||||
687 | $solrRequest->addParam('hl.snippets', '20'); |
||||
688 | // we store the fulltext on page level and can disable this option |
||||
689 | $solrRequest->addParam('hl.ocr.trackPages', 'off'); |
||||
690 | } |
||||
691 | |||||
692 | // Perform search for all documents with the same uid that either fit to the search or marked as toplevel. |
||||
693 | $response = $solr->service->executeRequest($solrRequest); |
||||
694 | // return empty resultSet on error-response |
||||
695 | if ($response->getStatusCode() == 400) { |
||||
696 | return $resultSet; |
||||
697 | } |
||||
698 | $result = $solr->service->createResult($selectQuery, $response); |
||||
699 | |||||
700 | // TODO: Call to an undefined method Solarium\Core\Query\Result\ResultInterface::getGrouping(). |
||||
701 | // @phpstan-ignore-next-line |
||||
702 | $uidGroup = $result->getGrouping()->getGroup('uid'); |
||||
0 ignored issues
–
show
The method
getGrouping() does not exist on Solarium\Core\Query\Result\ResultInterface . It seems like you code against a sub-type of Solarium\Core\Query\Result\ResultInterface such as Solarium\QueryType\Select\Result\Result .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||
703 | $resultSet['numberOfToplevels'] = $uidGroup->getNumberOfGroups(); |
||||
704 | $resultSet['numFound'] = $uidGroup->getMatches(); |
||||
705 | $highlighting = []; |
||||
706 | if ($parameters['fulltext'] === true) { |
||||
707 | $data = $result->getData(); |
||||
708 | $highlighting = $data['ocrHighlighting']; |
||||
709 | } |
||||
710 | $fields = Solr::getFields(); |
||||
711 | |||||
712 | foreach ($uidGroup as $group) { |
||||
713 | foreach ($group as $record) { |
||||
714 | $resultSet['documents'][] = $this->getDocument($record, $highlighting, $fields, $parameters); |
||||
715 | } |
||||
716 | } |
||||
717 | |||||
718 | // Save value in cache. |
||||
719 | if (!empty($resultSet['documents']) && $enableCache === true) { |
||||
720 | $cache->set($cacheIdentifier, $resultSet); |
||||
721 | } |
||||
722 | } else { |
||||
723 | // Return cache hit. |
||||
724 | $resultSet = $entry; |
||||
725 | } |
||||
726 | return $resultSet; |
||||
727 | } |
||||
728 | |||||
729 | /** |
||||
730 | * Get collection filter query for search. |
||||
731 | * |
||||
732 | * @access private |
||||
733 | * |
||||
734 | * @param string $query |
||||
735 | * |
||||
736 | * @return string |
||||
737 | */ |
||||
738 | private function getCollectionFilterQuery(string $query) : string |
||||
739 | { |
||||
740 | $collectionsQueryString = ''; |
||||
741 | $virtualCollectionsQueryString = ''; |
||||
742 | foreach ($this->collections as $collection) { |
||||
743 | // check for virtual collections query string |
||||
744 | if ($collection->getIndexSearch()) { |
||||
745 | $virtualCollectionsQueryString .= empty($virtualCollectionsQueryString) ? '(' . $collection->getIndexSearch() . ')' : ' OR (' . $collection->getIndexSearch() . ')'; |
||||
746 | } else { |
||||
747 | $collectionsQueryString .= empty($collectionsQueryString) ? '"' . $collection->getIndexName() . '"' : ' OR "' . $collection->getIndexName() . '"'; |
||||
748 | } |
||||
749 | } |
||||
750 | |||||
751 | // distinguish between simple collection browsing and actual searching within the collection(s) |
||||
752 | if (!empty($collectionsQueryString)) { |
||||
753 | if (empty($query)) { |
||||
754 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . ') AND toplevel:true AND partof:0)'; |
||||
755 | } else { |
||||
756 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . '))'; |
||||
757 | } |
||||
758 | } |
||||
759 | |||||
760 | // virtual collections might query documents that are neither toplevel:true nor partof:0 and need to be searched separately |
||||
761 | if (!empty($virtualCollectionsQueryString)) { |
||||
762 | $virtualCollectionsQueryString = '(' . $virtualCollectionsQueryString . ')'; |
||||
763 | } |
||||
764 | |||||
765 | // combine both query strings into a single filterquery via OR if both are given, otherwise pass either of those |
||||
766 | return implode(' OR ', array_filter([$collectionsQueryString, $virtualCollectionsQueryString])); |
||||
767 | } |
||||
768 | |||||
769 | /** |
||||
770 | * Get sort order of the results as given or by title as default. |
||||
771 | * |
||||
772 | * @access private |
||||
773 | * |
||||
774 | * @return array |
||||
775 | */ |
||||
776 | private function getSort() : array |
||||
777 | { |
||||
778 | if (!empty($this->searchParams['orderBy'])) { |
||||
779 | return [ |
||||
780 | $this->searchParams['orderBy'] => $this->searchParams['order'], |
||||
781 | ]; |
||||
782 | } |
||||
783 | |||||
784 | return [ |
||||
785 | 'score' => 'desc', |
||||
786 | 'year_sorting' => 'asc', |
||||
787 | 'title_sorting' => 'asc', |
||||
788 | 'volume' => 'asc' |
||||
789 | ]; |
||||
790 | } |
||||
791 | |||||
792 | /** |
||||
793 | * Gets a document |
||||
794 | * |
||||
795 | * @access private |
||||
796 | * |
||||
797 | * @param Document $record |
||||
798 | * @param array $highlighting |
||||
799 | * @param array $fields |
||||
800 | * @param array $parameters |
||||
801 | * |
||||
802 | * @return array The Apache Solr Documents that were fetched |
||||
803 | */ |
||||
804 | private function getDocument(Document $record, array $highlighting, array $fields, $parameters) { |
||||
805 | $resultDocument = new ResultDocument($record, $highlighting, $fields); |
||||
806 | |||||
807 | $document = [ |
||||
808 | 'id' => $resultDocument->getId(), |
||||
809 | 'page' => $resultDocument->getPage(), |
||||
810 | 'snippet' => $resultDocument->getSnippets(), |
||||
811 | 'thumbnail' => $resultDocument->getThumbnail(), |
||||
812 | 'title' => $resultDocument->getTitle(), |
||||
813 | 'toplevel' => $resultDocument->getToplevel(), |
||||
814 | 'type' => $resultDocument->getType(), |
||||
815 | 'uid' => !empty($resultDocument->getUid()) ? $resultDocument->getUid() : $parameters['uid'], |
||||
816 | 'highlight' => $resultDocument->getHighlightsIds(), |
||||
817 | ]; |
||||
818 | |||||
819 | foreach ($parameters['listMetadataRecords'] as $indexName => $solrField) { |
||||
820 | if (!empty($record->$solrField)) { |
||||
821 | $document['metadata'][$indexName] = $record->$solrField; |
||||
822 | } |
||||
823 | } |
||||
824 | |||||
825 | return $document; |
||||
826 | } |
||||
827 | |||||
828 | /** |
||||
829 | * Translate language code if applicable. |
||||
830 | * |
||||
831 | * @access private |
||||
832 | * |
||||
833 | * @param &$doc document array |
||||
834 | * |
||||
835 | * @return void |
||||
836 | */ |
||||
837 | private function translateLanguageCode(&$doc): void |
||||
838 | { |
||||
839 | if ($doc['metadata']['language']) { |
||||
840 | foreach($doc['metadata']['language'] as $indexName => $language) { |
||||
841 | $doc['metadata']['language'][$indexName] = Helper::getLanguageName($language); |
||||
842 | } |
||||
843 | } |
||||
844 | } |
||||
845 | } |
||||
846 |