We could not synchronize checks via GitHub's checks API since Scrutinizer's GitHub App is not installed for this repository.
1 | <?php |
||
2 | |||
3 | namespace Kitodo\Dlf\Common\Solr; |
||
4 | |||
5 | use Kitodo\Dlf\Common\AbstractDocument; |
||
6 | use Kitodo\Dlf\Common\Helper; |
||
7 | use Kitodo\Dlf\Common\Indexer; |
||
8 | use Kitodo\Dlf\Common\Solr\SearchResult\ResultDocument; |
||
9 | use Kitodo\Dlf\Domain\Repository\DocumentRepository; |
||
10 | use Solarium\QueryType\Select\Result\Document; |
||
11 | use TYPO3\CMS\Core\Cache\CacheManager; |
||
12 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||
13 | use TYPO3\CMS\Core\Utility\MathUtility; |
||
14 | use TYPO3\CMS\Extbase\Persistence\Generic\QueryResult; |
||
15 | use TYPO3\CMS\Extbase\Persistence\QueryResultInterface; |
||
16 | |||
17 | /** |
||
18 | * Targeted towards being used in ``PaginateController`` (``<f:widget.paginate>``). |
||
19 | * |
||
20 | * Notes on implementation: |
||
21 | * - `Countable`: `count()` returns the number of toplevel documents. |
||
22 | * - `getNumLoadedDocuments()`: Number of toplevel documents that have been fetched from Solr. |
||
23 | * - `ArrayAccess`/`Iterator`: Access *fetched* toplevel documents indexed in order of their ranking. |
||
24 | * |
||
25 | * @package TYPO3 |
||
26 | * @subpackage dlf |
||
27 | * |
||
28 | * @access public |
||
29 | */ |
||
30 | class SolrSearch implements \Countable, \Iterator, \ArrayAccess, QueryResultInterface |
||
31 | { |
||
32 | /** |
||
33 | * @access private |
||
34 | * @var DocumentRepository |
||
35 | */ |
||
36 | private DocumentRepository $documentRepository; |
||
37 | |||
38 | /** |
||
39 | * @access private |
||
40 | * @var array|QueryResultInterface |
||
41 | */ |
||
42 | private $collections; |
||
43 | |||
44 | /** |
||
45 | * @access private |
||
46 | * @var array |
||
47 | */ |
||
48 | private array $settings; |
||
49 | |||
50 | /** |
||
51 | * @access private |
||
52 | * @var array |
||
53 | */ |
||
54 | private array $searchParams; |
||
55 | |||
56 | /** |
||
57 | * @access private |
||
58 | * @var QueryResult|null |
||
59 | */ |
||
60 | private ?QueryResult $listedMetadata; |
||
61 | |||
62 | /** |
||
63 | * @access private |
||
64 | * @var array |
||
65 | */ |
||
66 | private array $params; |
||
67 | |||
68 | /** |
||
69 | * @access private |
||
70 | * @var array |
||
71 | */ |
||
72 | private $result; |
||
73 | |||
74 | /** |
||
75 | * @access private |
||
76 | * @var int |
||
77 | */ |
||
78 | protected int $position = 0; |
||
79 | |||
80 | /** |
||
81 | * Constructs SolrSearch instance. |
||
82 | * |
||
83 | * @access public |
||
84 | * |
||
85 | * @param DocumentRepository $documentRepository |
||
86 | * @param array|QueryResultInterface $collections can contain 0, 1 or many Collection objects |
||
87 | * @param array $settings |
||
88 | * @param array $searchParams |
||
89 | * @param QueryResult $listedMetadata |
||
90 | * |
||
91 | * @return void |
||
92 | */ |
||
93 | public function __construct(DocumentRepository $documentRepository, $collections, array $settings, array $searchParams, QueryResult $listedMetadata = null) |
||
94 | { |
||
95 | $this->documentRepository = $documentRepository; |
||
96 | $this->collections = $collections; |
||
97 | $this->settings = $settings; |
||
98 | $this->searchParams = $searchParams; |
||
99 | $this->listedMetadata = $listedMetadata; |
||
100 | } |
||
101 | |||
102 | /** |
||
103 | * Gets amount of loaded documents. |
||
104 | * |
||
105 | * @access public |
||
106 | * |
||
107 | * @return int |
||
108 | */ |
||
109 | public function getNumLoadedDocuments(): int |
||
110 | { |
||
111 | return count($this->result['documents']); |
||
112 | } |
||
113 | |||
114 | /** |
||
115 | * Count results. |
||
116 | * |
||
117 | * @access public |
||
118 | * |
||
119 | * @return int |
||
120 | */ |
||
121 | public function count(): int |
||
122 | { |
||
123 | if ($this->result === null) { |
||
124 | return 0; |
||
125 | } |
||
126 | |||
127 | return $this->result['numberOfToplevels']; |
||
128 | } |
||
129 | |||
130 | /** |
||
131 | * Current result. |
||
132 | * |
||
133 | * @access public |
||
134 | * |
||
135 | * @return array |
||
136 | */ |
||
137 | public function current(): array |
||
138 | { |
||
139 | return $this[$this->position]; |
||
140 | } |
||
141 | |||
142 | /** |
||
143 | * Current key. |
||
144 | * |
||
145 | * @access public |
||
146 | * |
||
147 | * @return int |
||
148 | */ |
||
149 | public function key(): int |
||
150 | { |
||
151 | return $this->position; |
||
152 | } |
||
153 | |||
154 | /** |
||
155 | * Next key. |
||
156 | * |
||
157 | * @access public |
||
158 | * |
||
159 | * @return void |
||
160 | */ |
||
161 | public function next(): void |
||
162 | { |
||
163 | $this->position++; |
||
164 | } |
||
165 | |||
166 | /** |
||
167 | * First key. |
||
168 | * |
||
169 | * @access public |
||
170 | * |
||
171 | * @return void |
||
172 | */ |
||
173 | public function rewind(): void |
||
174 | { |
||
175 | $this->position = 0; |
||
176 | } |
||
177 | |||
178 | /** |
||
179 | * @access public |
||
180 | * |
||
181 | * @return bool |
||
182 | */ |
||
183 | public function valid(): bool |
||
184 | { |
||
185 | return isset($this[$this->position]); |
||
186 | } |
||
187 | |||
188 | /** |
||
189 | * Checks if the document with given offset exists. |
||
190 | * |
||
191 | * @access public |
||
192 | * |
||
193 | * @param int $offset |
||
194 | * |
||
195 | * @return bool |
||
196 | */ |
||
197 | public function offsetExists($offset): bool |
||
198 | { |
||
199 | $idx = $this->result['document_keys'][$offset]; |
||
200 | return isset($this->result['documents'][$idx]); |
||
201 | } |
||
202 | |||
203 | /** |
||
204 | * Gets the document with given offset. |
||
205 | * |
||
206 | * @access public |
||
207 | * |
||
208 | * @param int $offset |
||
209 | * |
||
210 | * @return mixed |
||
211 | */ |
||
212 | #[\ReturnTypeWillChange] |
||
213 | public function offsetGet($offset) |
||
214 | { |
||
215 | $idx = $this->result['document_keys'][$offset]; |
||
216 | $document = $this->result['documents'][$idx] ?? null; |
||
217 | |||
218 | if ($document !== null) { |
||
219 | // It may happen that a Solr group only includes non-toplevel results, |
||
220 | // in which case metadata of toplevel entry isn't yet filled. |
||
221 | if (empty($document['metadata'])) { |
||
222 | $document['metadata'] = $this->fetchToplevelMetadataFromSolr([ |
||
223 | 'query' => 'uid:' . $document['uid'], |
||
224 | 'start' => 0, |
||
225 | 'rows' => 1, |
||
226 | 'sort' => ['score' => 'desc'], |
||
227 | ])[$document['uid']] ?? []; |
||
228 | } |
||
229 | |||
230 | // get title of parent/grandparent/... if empty |
||
231 | if (empty($document['title']) && $document['partOf'] > 0) { |
||
232 | $superiorTitle = AbstractDocument::getTitle($document['partOf'], true); |
||
233 | if (!empty($superiorTitle)) { |
||
234 | $document['title'] = '[' . $superiorTitle . ']'; |
||
235 | } |
||
236 | } |
||
237 | } |
||
238 | |||
239 | return $document; |
||
240 | } |
||
241 | |||
242 | /** |
||
243 | * Not supported. |
||
244 | * |
||
245 | * @access public |
||
246 | * |
||
247 | * @param int $offset |
||
248 | * @param int $value |
||
249 | * |
||
250 | * @return void |
||
251 | * |
||
252 | * @throws \Exception |
||
253 | */ |
||
254 | public function offsetSet($offset, $value): void |
||
255 | { |
||
256 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||
257 | } |
||
258 | |||
259 | /** |
||
260 | * Not supported. |
||
261 | * |
||
262 | * @access public |
||
263 | * |
||
264 | * @param int $offset |
||
265 | * |
||
266 | * @return void |
||
267 | * |
||
268 | * @throws \Exception |
||
269 | */ |
||
270 | public function offsetUnset($offset): void |
||
271 | { |
||
272 | throw new \Exception("SolrSearch: Modifying result list is not supported"); |
||
273 | } |
||
274 | |||
275 | /** |
||
276 | * Gets SOLR results. |
||
277 | * |
||
278 | * @access public |
||
279 | * |
||
280 | * @return mixed |
||
281 | */ |
||
282 | public function getSolrResults() |
||
283 | { |
||
284 | return $this->result['solrResults']; |
||
285 | } |
||
286 | |||
287 | /** |
||
288 | * Gets by UID. |
||
289 | * |
||
290 | * @access public |
||
291 | * |
||
292 | * @param int $uid |
||
293 | * |
||
294 | * @return mixed |
||
295 | */ |
||
296 | public function getByUid($uid) |
||
297 | { |
||
298 | return $this->result['documents'][$uid]; |
||
299 | } |
||
300 | |||
301 | /** |
||
302 | * Gets query. |
||
303 | * |
||
304 | * @access public |
||
305 | * |
||
306 | * @return SolrSearchQuery |
||
307 | */ |
||
308 | public function getQuery() |
||
309 | { |
||
310 | return new SolrSearchQuery($this); |
||
311 | } |
||
312 | |||
313 | /** |
||
314 | * Gets first. |
||
315 | * |
||
316 | * @access public |
||
317 | * |
||
318 | * @return SolrSearch |
||
319 | */ |
||
320 | public function getFirst() |
||
321 | { |
||
322 | return $this[0]; |
||
323 | } |
||
324 | |||
325 | /** |
||
326 | * Parses results to array. |
||
327 | * |
||
328 | * @access public |
||
329 | * |
||
330 | * @return array |
||
331 | */ |
||
332 | public function toArray() |
||
333 | { |
||
334 | return array_values($this->result['documents']); |
||
335 | } |
||
336 | |||
337 | /** |
||
338 | * Get total number of hits. |
||
339 | * |
||
340 | * This can be accessed in Fluid template using `.numFound`. |
||
341 | * |
||
342 | * @access public |
||
343 | * |
||
344 | * @return int |
||
345 | */ |
||
346 | public function getNumFound() |
||
347 | { |
||
348 | return $this->result['numFound']; |
||
349 | } |
||
350 | |||
351 | /** |
||
352 | * Prepares SOLR search. |
||
353 | * |
||
354 | * @access public |
||
355 | * |
||
356 | * @return void |
||
357 | */ |
||
358 | public function prepare() |
||
359 | { |
||
360 | // Prepare query parameters. |
||
361 | $params = []; |
||
362 | $matches = []; |
||
363 | $fields = Solr::getFields(); |
||
364 | $query = ''; |
||
365 | |||
366 | // Set search query. |
||
367 | if ( |
||
368 | (!empty($this->searchParams['fulltext'])) |
||
369 | || preg_match('/' . $fields['fulltext'] . ':\((.*)\)/', trim($this->searchParams['query']), $matches) |
||
370 | ) { |
||
371 | // If the query already is a fulltext query e.g using the facets |
||
372 | $this->searchParams['query'] = empty($matches[1]) ? $this->searchParams['query'] : $matches[1]; |
||
373 | // Search in fulltext field if applicable. Query must not be empty! |
||
374 | if (!empty($this->searchParams['query'])) { |
||
375 | $query = $fields['fulltext'] . ':(' . Solr::escapeQuery(trim($this->searchParams['query'])) . ')'; |
||
376 | } |
||
377 | $params['fulltext'] = true; |
||
378 | } else { |
||
379 | // Retain given search field if valid. |
||
380 | if (!empty($this->searchParams['query'])) { |
||
381 | $query = Solr::escapeQueryKeepField(trim($this->searchParams['query']), $this->settings['storagePid']); |
||
382 | } |
||
383 | } |
||
384 | |||
385 | // Add extended search query. |
||
386 | if ( |
||
387 | !empty($this->searchParams['extQuery']) |
||
388 | && is_array($this->searchParams['extQuery']) |
||
389 | ) { |
||
390 | $allowedOperators = ['AND', 'OR', 'NOT']; |
||
391 | $numberOfExtQueries = count($this->searchParams['extQuery']); |
||
392 | for ($i = 0; $i < $numberOfExtQueries; $i++) { |
||
393 | if (!empty($this->searchParams['extQuery'][$i])) { |
||
394 | if ( |
||
395 | in_array($this->searchParams['extOperator'][$i], $allowedOperators) |
||
396 | ) { |
||
397 | if (!empty($query)) { |
||
398 | $query .= ' ' . $this->searchParams['extOperator'][$i] . ' '; |
||
399 | } |
||
400 | $query .= Indexer::getIndexFieldName($this->searchParams['extField'][$i], $this->settings['storagePid']) . ':(' . Solr::escapeQuery($this->searchParams['extQuery'][$i]) . ')'; |
||
401 | } |
||
402 | } |
||
403 | } |
||
404 | } |
||
405 | |||
406 | // Add filter query for date search |
||
407 | if (!empty($this->searchParams['dateFrom']) && !empty($this->searchParams['dateTo'])) { |
||
408 | // combine dateFrom and dateTo into range search |
||
409 | $params['filterquery'][]['query'] = '{!join from=' . $fields['uid'] . ' to=' . $fields['uid'] . '}'. $fields['date'] . ':[' . $this->searchParams['dateFrom'] . ' TO ' . $this->searchParams['dateTo'] . ']'; |
||
410 | } |
||
411 | |||
412 | // Add filter query for faceting. |
||
413 | if (isset($this->searchParams['fq']) && is_array($this->searchParams['fq'])) { |
||
414 | foreach ($this->searchParams['fq'] as $filterQuery) { |
||
415 | $params['filterquery'][]['query'] = $filterQuery; |
||
416 | } |
||
417 | } |
||
418 | |||
419 | // Add filter query for in-document searching. |
||
420 | if ( |
||
421 | !empty($this->searchParams['documentId']) |
||
422 | && MathUtility::canBeInterpretedAsInteger($this->searchParams['documentId']) |
||
423 | ) { |
||
424 | // Search in document and all subordinates (valid for up to three levels of hierarchy). |
||
425 | $params['filterquery'][]['query'] = '_query_:"{!join from=' |
||
426 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||
427 | . $fields['uid'] . ':{!join from=' . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||
428 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . '"' . ' OR {!join from=' |
||
429 | . $fields['uid'] . ' to=' . $fields['partof'] . '}' |
||
430 | . $fields['uid'] . ':' . $this->searchParams['documentId'] . ' OR ' |
||
431 | . $fields['uid'] . ':' . $this->searchParams['documentId']; |
||
432 | } |
||
433 | |||
434 | // if collections are given, we prepare the collection query string |
||
435 | if (!empty($this->collections)) { |
||
436 | $params['filterquery'][]['query'] = $this->getCollectionFilterQuery($query); |
||
437 | } |
||
438 | |||
439 | // Set some query parameters. |
||
440 | $params['query'] = !empty($query) ? $query : '*'; |
||
441 | |||
442 | $params['sort'] = $this->getSort(); |
||
443 | $params['listMetadataRecords'] = []; |
||
444 | |||
445 | // Restrict the fields to the required ones. |
||
446 | $params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type'; |
||
447 | |||
448 | if ($this->listedMetadata) { |
||
449 | foreach ($this->listedMetadata as $metadata) { |
||
450 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||
451 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||
452 | $params['fields'] .= ',' . $listMetadataRecord; |
||
453 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||
454 | } |
||
455 | } |
||
456 | } |
||
457 | |||
458 | $this->params = $params; |
||
459 | |||
460 | // Send off query to get total number of search results in advance |
||
461 | $this->submit(0, 1, false); |
||
462 | } |
||
463 | |||
464 | /** |
||
465 | * Submits SOLR search. |
||
466 | * |
||
467 | * @access public |
||
468 | * |
||
469 | * @param int $start |
||
470 | * @param int $rows |
||
471 | * @param bool $processResults default value is true |
||
472 | * |
||
473 | * @return void |
||
474 | */ |
||
475 | public function submit($start, $rows, $processResults = true) |
||
476 | { |
||
477 | $params = $this->params; |
||
478 | $params['start'] = $start; |
||
479 | $params['rows'] = $rows; |
||
480 | |||
481 | // Perform search. |
||
482 | $result = $this->searchSolr($params, true); |
||
483 | |||
484 | // Initialize values |
||
485 | $documents = []; |
||
486 | |||
487 | if ($processResults && $result['numFound'] > 0) { |
||
488 | // flat array with uids from Solr search |
||
489 | $documentSet = array_unique(array_column($result['documents'], 'uid')); |
||
490 | |||
491 | if (empty($documentSet)) { |
||
492 | // return nothing found |
||
493 | $this->result = ['solrResults' => [], 'documents' => [], 'document_keys' => [], 'numFound' => 0]; |
||
494 | return; |
||
495 | } |
||
496 | |||
497 | // get the Extbase document objects for all uids |
||
498 | $allDocuments = $this->documentRepository->findAllByUids($documentSet); |
||
499 | $childrenOf = $this->documentRepository->findChildrenOfEach($documentSet); |
||
500 | |||
501 | foreach ($result['documents'] as $doc) { |
||
502 | if (empty($documents[$doc['uid']]) && $allDocuments[$doc['uid']]) { |
||
503 | $documents[$doc['uid']] = $allDocuments[$doc['uid']]; |
||
504 | } |
||
505 | if ($documents[$doc['uid']]) { |
||
506 | $this->translateLanguageCode($doc); |
||
507 | if ($doc['toplevel'] === false) { |
||
508 | // this maybe a chapter, article, ..., year |
||
509 | if ($doc['type'] === 'year') { |
||
510 | continue; |
||
511 | } |
||
512 | if (!empty($doc['page'])) { |
||
513 | // it's probably a fulltext or metadata search |
||
514 | $searchResult = []; |
||
515 | $searchResult['page'] = $doc['page']; |
||
516 | $searchResult['thumbnail'] = $doc['thumbnail']; |
||
517 | $searchResult['structure'] = $doc['type']; |
||
518 | $searchResult['title'] = $doc['title']; |
||
519 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||
520 | if (isset($doc['metadata'][$indexName])) { |
||
521 | $searchResult['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||
522 | } |
||
523 | } |
||
524 | if ($this->searchParams['fulltext'] == '1') { |
||
525 | $searchResult['snippet'] = $doc['snippet']; |
||
526 | $searchResult['highlight'] = $doc['highlight']; |
||
527 | $searchResult['highlight_word'] = preg_replace('/^;|;$/', '', // remove ; at beginning or end |
||
528 | preg_replace('/;+/', ';', // replace any multiple of ; with a single ; |
||
529 | preg_replace('/[{~\d*}{\s+}{^=*\d+.*\d*}`~!@#$%\^&*()_|+-=?;:\'",.<>\{\}\[\]\\\]/', ';', $this->searchParams['query']))); // replace search operators and special characters with ; |
||
530 | } |
||
531 | $documents[$doc['uid']]['searchResults'][] = $searchResult; |
||
532 | } |
||
533 | } else if ($doc['toplevel'] === true) { |
||
534 | foreach ($params['listMetadataRecords'] as $indexName => $solrField) { |
||
535 | if (isset($doc['metadata'][$indexName])) { |
||
536 | $documents[$doc['uid']]['metadata'][$indexName] = $doc['metadata'][$indexName]; |
||
537 | } |
||
538 | } |
||
539 | if ($this->searchParams['fulltext'] != '1') { |
||
540 | $documents[$doc['uid']]['page'] = 1; |
||
541 | $children = $childrenOf[$doc['uid']] ?? []; |
||
542 | |||
543 | if (!empty($children)) { |
||
544 | $batchSize = 100; |
||
545 | $totalChildren = count($children); |
||
546 | |||
547 | for ($start = 0; $start < $totalChildren; $start += $batchSize) { |
||
548 | $batch = array_slice($children, $start, $batchSize, true); |
||
549 | |||
550 | // Fetch metadata for the current batch |
||
551 | $metadataOf = $this->fetchToplevelMetadataFromSolr([ |
||
552 | 'query' => 'partof:' . $doc['uid'], |
||
553 | 'start' => $start, |
||
554 | 'rows' => min($batchSize, $totalChildren - $start), |
||
555 | ]); |
||
556 | |||
557 | foreach ($batch as $docChild) { |
||
558 | // We need only a few fields from the children, but we need them as an array. |
||
559 | $childDocument = [ |
||
560 | 'thumbnail' => $docChild['thumbnail'], |
||
561 | 'title' => $docChild['title'], |
||
562 | 'structure' => $docChild['structure'], |
||
563 | 'metsOrderlabel' => $docChild['metsOrderlabel'], |
||
564 | 'uid' => $docChild['uid'], |
||
565 | 'metadata' => $metadataOf[$docChild['uid']], |
||
566 | ]; |
||
567 | $documents[$doc['uid']]['children'][$docChild['uid']] = $childDocument; |
||
568 | } |
||
569 | } |
||
570 | } |
||
571 | } |
||
572 | } |
||
573 | } |
||
574 | } |
||
575 | } |
||
576 | |||
577 | $this->result = ['solrResults' => $result, 'numberOfToplevels' => $result['numberOfToplevels'], 'documents' => $documents, 'document_keys' => array_keys($documents), 'numFound' => $result['numFound']]; |
||
578 | } |
||
579 | |||
580 | /** |
||
581 | * Find all listed metadata using specified query params. |
||
582 | * |
||
583 | * @access protected |
||
584 | * |
||
585 | * @param array $queryParams |
||
586 | * |
||
587 | * @return array |
||
588 | */ |
||
589 | protected function fetchToplevelMetadataFromSolr(array $queryParams): array |
||
590 | { |
||
591 | // Prepare query parameters. |
||
592 | $params = $queryParams; |
||
593 | $metadataArray = []; |
||
594 | |||
595 | // Set some query parameters. |
||
596 | $params['listMetadataRecords'] = []; |
||
597 | |||
598 | // Restrict the fields to the required ones. |
||
599 | $params['fields'] = 'uid,toplevel'; |
||
600 | |||
601 | if ($this->listedMetadata) { |
||
602 | foreach ($this->listedMetadata as $metadata) { |
||
603 | if ($metadata->getIndexStored() || $metadata->getIndexIndexed()) { |
||
604 | $listMetadataRecord = $metadata->getIndexName() . '_' . ($metadata->getIndexTokenized() ? 't' : 'u') . ($metadata->getIndexStored() ? 's' : 'u') . ($metadata->getIndexIndexed() ? 'i' : 'u'); |
||
605 | $params['fields'] .= ',' . $listMetadataRecord; |
||
606 | $params['listMetadataRecords'][$metadata->getIndexName()] = $listMetadataRecord; |
||
607 | } |
||
608 | } |
||
609 | } |
||
610 | // Set filter query to just get toplevel documents. |
||
611 | $params['filterquery'][] = ['query' => 'toplevel:true']; |
||
612 | |||
613 | // Perform search. |
||
614 | $result = $this->searchSolr($params, true); |
||
615 | |||
616 | foreach ($result['documents'] as $doc) { |
||
617 | $this->translateLanguageCode($doc); |
||
618 | $metadataArray[$doc['uid']] = $doc['metadata']; |
||
619 | } |
||
620 | |||
621 | return $metadataArray; |
||
622 | } |
||
623 | |||
624 | /** |
||
625 | * Processes a search request |
||
626 | * |
||
627 | * @access protected |
||
628 | * |
||
629 | * @param array $parameters Additional search parameters |
||
630 | * @param boolean $enableCache Enable caching of Solr requests |
||
631 | * |
||
632 | * @return array The Apache Solr Documents that were fetched |
||
633 | */ |
||
634 | protected function searchSolr($parameters = [], $enableCache = true) |
||
635 | { |
||
636 | // Set query. |
||
637 | $parameters['query'] = isset($parameters['query']) ? $parameters['query'] : '*'; |
||
638 | $parameters['filterquery'] = isset($parameters['filterquery']) ? $parameters['filterquery'] : []; |
||
639 | |||
640 | // Perform Solr query. |
||
641 | // Instantiate search object. |
||
642 | $solr = Solr::getInstance($this->settings['solrcore']); |
||
643 | if (!$solr->ready) { |
||
644 | Helper::log('Apache Solr not available', LOG_SEVERITY_ERROR); |
||
645 | return [ |
||
646 | 'documents' => [], |
||
647 | 'numberOfToplevels' => 0, |
||
648 | 'numFound' => 0, |
||
649 | ]; |
||
650 | } |
||
651 | |||
652 | $cacheIdentifier = ''; |
||
653 | $cache = null; |
||
654 | // Calculate cache identifier. |
||
655 | if ($enableCache === true) { |
||
656 | $cacheIdentifier = Helper::digest($solr->core . print_r($parameters, true)); |
||
657 | $cache = GeneralUtility::makeInstance(CacheManager::class)->getCache('tx_dlf_solr'); |
||
658 | } |
||
659 | $resultSet = [ |
||
660 | 'documents' => [], |
||
661 | 'numberOfToplevels' => 0, |
||
662 | 'numFound' => 0, |
||
663 | ]; |
||
664 | if ($enableCache === false || ($entry = $cache->get($cacheIdentifier)) === false) { |
||
665 | $selectQuery = $solr->service->createSelect($parameters); |
||
666 | |||
667 | $grouping = $selectQuery->getGrouping(); |
||
668 | $grouping->addField('uid'); |
||
669 | $grouping->setLimit(100); // Results in group (TODO: check) |
||
670 | $grouping->setNumberOfGroups(true); |
||
671 | |||
672 | if ($parameters['fulltext'] === true) { |
||
673 | // get highlighting component and apply settings |
||
674 | $selectQuery->getHighlighting(); |
||
675 | } |
||
676 | |||
677 | $solrRequest = $solr->service->createRequest($selectQuery); |
||
678 | |||
679 | if ($parameters['fulltext'] === true) { |
||
680 | // If it is a fulltext search, enable highlighting. |
||
681 | // field for which highlighting is going to be performed, |
||
682 | // is required if you want to have OCR highlighting |
||
683 | $solrRequest->addParam('hl.ocr.fl', 'fulltext'); |
||
684 | // return the coordinates of highlighted search as absolute coordinates |
||
685 | $solrRequest->addParam('hl.ocr.absoluteHighlights', 'on'); |
||
686 | // max amount of snippets for a single page |
||
687 | $solrRequest->addParam('hl.snippets', '20'); |
||
688 | // we store the fulltext on page level and can disable this option |
||
689 | $solrRequest->addParam('hl.ocr.trackPages', 'off'); |
||
690 | } |
||
691 | |||
692 | // Perform search for all documents with the same uid that either fit to the search or marked as toplevel. |
||
693 | $response = $solr->service->executeRequest($solrRequest); |
||
694 | // return empty resultSet on error-response |
||
695 | if ($response->getStatusCode() == 400) { |
||
696 | return $resultSet; |
||
697 | } |
||
698 | $result = $solr->service->createResult($selectQuery, $response); |
||
699 | |||
700 | // TODO: Call to an undefined method Solarium\Core\Query\Result\ResultInterface::getGrouping(). |
||
701 | // @phpstan-ignore-next-line |
||
702 | $uidGroup = $result->getGrouping()->getGroup('uid'); |
||
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
703 | $resultSet['numberOfToplevels'] = $uidGroup->getNumberOfGroups(); |
||
704 | $resultSet['numFound'] = $uidGroup->getMatches(); |
||
705 | $highlighting = []; |
||
706 | if ($parameters['fulltext'] === true) { |
||
707 | $data = $result->getData(); |
||
708 | $highlighting = $data['ocrHighlighting']; |
||
709 | } |
||
710 | $fields = Solr::getFields(); |
||
711 | |||
712 | foreach ($uidGroup as $group) { |
||
713 | foreach ($group as $record) { |
||
714 | $resultSet['documents'][] = $this->getDocument($record, $highlighting, $fields, $parameters); |
||
715 | } |
||
716 | } |
||
717 | |||
718 | // Save value in cache. |
||
719 | if (!empty($resultSet['documents']) && $enableCache === true) { |
||
720 | $cache->set($cacheIdentifier, $resultSet); |
||
721 | } |
||
722 | } else { |
||
723 | // Return cache hit. |
||
724 | $resultSet = $entry; |
||
725 | } |
||
726 | return $resultSet; |
||
727 | } |
||
728 | |||
729 | /** |
||
730 | * Get collection filter query for search. |
||
731 | * |
||
732 | * @access private |
||
733 | * |
||
734 | * @param string $query |
||
735 | * |
||
736 | * @return string |
||
737 | */ |
||
738 | private function getCollectionFilterQuery(string $query) : string |
||
739 | { |
||
740 | $collectionsQueryString = ''; |
||
741 | $virtualCollectionsQueryString = ''; |
||
742 | foreach ($this->collections as $collection) { |
||
743 | // check for virtual collections query string |
||
744 | if ($collection->getIndexSearch()) { |
||
745 | $virtualCollectionsQueryString .= empty($virtualCollectionsQueryString) ? '(' . $collection->getIndexSearch() . ')' : ' OR (' . $collection->getIndexSearch() . ')'; |
||
746 | } else { |
||
747 | $collectionsQueryString .= empty($collectionsQueryString) ? '"' . $collection->getIndexName() . '"' : ' OR "' . $collection->getIndexName() . '"'; |
||
748 | } |
||
749 | } |
||
750 | |||
751 | // distinguish between simple collection browsing and actual searching within the collection(s) |
||
752 | if (!empty($collectionsQueryString)) { |
||
753 | if (empty($query)) { |
||
754 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . ') AND toplevel:true AND partof:0)'; |
||
755 | } else { |
||
756 | $collectionsQueryString = '(collection_faceting:(' . $collectionsQueryString . '))'; |
||
757 | } |
||
758 | } |
||
759 | |||
760 | // virtual collections might query documents that are neither toplevel:true nor partof:0 and need to be searched separately |
||
761 | if (!empty($virtualCollectionsQueryString)) { |
||
762 | $virtualCollectionsQueryString = '(' . $virtualCollectionsQueryString . ')'; |
||
763 | } |
||
764 | |||
765 | // combine both query strings into a single filterquery via OR if both are given, otherwise pass either of those |
||
766 | return implode(' OR ', array_filter([$collectionsQueryString, $virtualCollectionsQueryString])); |
||
767 | } |
||
768 | |||
769 | /** |
||
770 | * Get sort order of the results as given or by title as default. |
||
771 | * |
||
772 | * @access private |
||
773 | * |
||
774 | * @return array |
||
775 | */ |
||
776 | private function getSort() : array |
||
777 | { |
||
778 | if (!empty($this->searchParams['orderBy'])) { |
||
779 | return [ |
||
780 | $this->searchParams['orderBy'] => $this->searchParams['order'], |
||
781 | ]; |
||
782 | } |
||
783 | |||
784 | return [ |
||
785 | 'score' => 'desc', |
||
786 | 'year_sorting' => 'asc', |
||
787 | 'title_sorting' => 'asc', |
||
788 | 'volume' => 'asc' |
||
789 | ]; |
||
790 | } |
||
791 | |||
792 | /** |
||
793 | * Gets a document |
||
794 | * |
||
795 | * @access private |
||
796 | * |
||
797 | * @param Document $record |
||
798 | * @param array $highlighting |
||
799 | * @param array $fields |
||
800 | * @param array $parameters |
||
801 | * |
||
802 | * @return array The Apache Solr Documents that were fetched |
||
803 | */ |
||
804 | private function getDocument(Document $record, array $highlighting, array $fields, $parameters) { |
||
805 | $resultDocument = new ResultDocument($record, $highlighting, $fields); |
||
806 | |||
807 | $document = [ |
||
808 | 'id' => $resultDocument->getId(), |
||
809 | 'page' => $resultDocument->getPage(), |
||
810 | 'snippet' => $resultDocument->getSnippets(), |
||
811 | 'thumbnail' => $resultDocument->getThumbnail(), |
||
812 | 'title' => $resultDocument->getTitle(), |
||
813 | 'toplevel' => $resultDocument->getToplevel(), |
||
814 | 'type' => $resultDocument->getType(), |
||
815 | 'uid' => !empty($resultDocument->getUid()) ? $resultDocument->getUid() : $parameters['uid'], |
||
816 | 'highlight' => $resultDocument->getHighlightsIds(), |
||
817 | ]; |
||
818 | |||
819 | foreach ($parameters['listMetadataRecords'] as $indexName => $solrField) { |
||
820 | if (!empty($record->$solrField)) { |
||
821 | $document['metadata'][$indexName] = $record->$solrField; |
||
822 | } |
||
823 | } |
||
824 | |||
825 | return $document; |
||
826 | } |
||
827 | |||
828 | /** |
||
829 | * Translate language code if applicable. |
||
830 | * |
||
831 | * @access private |
||
832 | * |
||
833 | * @param &$doc document array |
||
834 | * |
||
835 | * @return void |
||
836 | */ |
||
837 | private function translateLanguageCode(&$doc): void |
||
838 | { |
||
839 | if ($doc['metadata']['language']) { |
||
840 | foreach($doc['metadata']['language'] as $indexName => $language) { |
||
841 | $doc['metadata']['language'][$indexName] = Helper::getLanguageName($language); |
||
842 | } |
||
843 | } |
||
844 | } |
||
845 | } |
||
846 |