Passed
Pull Request — release-11.5.x (#3206)
by Michael
40:59 queued 01:38
created

Typo3PageIndexer::getPageAccessRootline()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 3
ccs 0
cts 2
cp 0
rs 10
c 0
b 0
f 0
cc 1
eloc 1
nc 1
nop 0
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the TYPO3 CMS project.
7
 *
8
 * It is free software; you can redistribute it and/or modify it under
9
 * the terms of the GNU General Public License, either version 2
10
 * of the License, or any later version.
11
 *
12
 * For the full copyright and license information, please read the
13
 * LICENSE.txt file that was distributed with this source code.
14
 *
15
 * The TYPO3 project - inspiring people to share!
16
 */
17
18
namespace ApacheSolrForTypo3\Solr;
19
20
use ApacheSolrForTypo3\Solr\Access\Rootline;
21
use ApacheSolrForTypo3\Solr\Domain\Search\ApacheSolrDocument\Builder;
22
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
23
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
24
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
25
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
26
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
27
use ApacheSolrForTypo3\Solr\System\Solr\Document\Document;
28
use ApacheSolrForTypo3\Solr\System\Solr\SolrConnection;
29
use Doctrine\DBAL\Driver\Exception as DBALDriverException;
30
use Doctrine\DBAL\Exception as DBALException;
31
use Exception;
32
use RuntimeException;
33
use Throwable;
34
use TYPO3\CMS\Core\Context\Exception\AspectNotFoundException;
35
use TYPO3\CMS\Core\Utility\GeneralUtility;
36
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
37
use UnexpectedValueException;
38
39
/**
40
 * Page Indexer to index TYPO3 pages used by the Index Queue.
41
 *
42
 * @author Ingo Renner <[email protected]>
43
 * @author Daniel Poetzinger <[email protected]>
44
 * @author Timo Schmidt <[email protected]>
45
 */
46
class Typo3PageIndexer
47
{
48
    /**
49
     * ID of the current page's Solr document.
50
     *
51
     * @var string
52
     */
53
    protected static string $pageSolrDocumentId = '';
54
55
    /**
56
     * The Solr document generated for the current page.
57
     *
58
     * @var Document
59
     */
60
    protected static Document $pageSolrDocument;
61
62
    /**
63
     * The mount point parameter used in the Frontend controller.
64
     *
65
     * @var string
66
     */
67
    protected string $mountPointParameter = '';
68
69
    /**
70
     * Solr server connection.
71
     *
72
     * @var SolrConnection|null
73
     */
74
    protected ?SolrConnection $solrConnection = null;
75
76
    /**
77
     * Frontend page object (TSFE).
78
     *
79
     * @var TypoScriptFrontendController
80
     */
81
    protected TypoScriptFrontendController $page;
82
83
    /**
84
     * Content extractor to extract content from TYPO3 pages
85
     *
86
     * @var Typo3PageContentExtractor
87
     */
88
    protected Typo3PageContentExtractor $contentExtractor;
89
90
    /**
91
     * URL to be indexed as the page's URL
92
     *
93
     * @var string
94
     */
95
    protected string $pageUrl = '';
96
97
    /**
98
     * The page's access rootline
99
     *
100
     * @var Rootline
101
     */
102
    protected Rootline $pageAccessRootline;
103
104
    /**
105
     * Documents that have been sent to Solr
106
     *
107
     * @var array
108
     */
109
    protected array $documentsSentToSolr = [];
110
111
    /**
112
     * @var TypoScriptConfiguration
113
     */
114
    protected TypoScriptConfiguration $configuration;
115
116
    /**
117
     * @var Item
118
     */
119
    protected Item $indexQueueItem;
120
121
    /**
122
     * @var SolrLogManager
123
     */
124
    protected SolrLogManager $logger;
125
126
    /**
127
     * Constructor
128
     *
129
     * @param TypoScriptFrontendController $page The page to index
130
     */
131 68
    public function __construct(TypoScriptFrontendController $page)
132
    {
133 68
        $this->logger = GeneralUtility::makeInstance(SolrLogManager::class, /** @scrutinizer ignore-type */ __CLASS__);
134
135 68
        $this->page = $page;
136 68
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
137 68
        $this->configuration = Util::getSolrConfiguration();
138
139
        try {
140 68
            $this->initializeSolrConnection();
141
        } catch (Throwable $e) {
142
            $this->logger->log(
143
                SolrLogManager::ERROR,
144
                $e->getMessage() . ' Error code: ' . $e->getCode()
145
            );
146
147
            // TODO extract to a class "ExceptionLogger"
148
            if ($this->configuration->getLoggingExceptions()) {
149
                $this->logger->log(
150
                    SolrLogManager::ERROR,
151
                    'Exception while trying to index a page',
152
                    [
153
                        $e->__toString(),
154
                    ]
155
                );
156
            }
157
        }
158
159 68
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, /** @scrutinizer ignore-type */ '');
160
    }
161
162
    /**
163
     * @param Item $indexQueueItem
164
     */
165 10
    public function setIndexQueueItem(Item $indexQueueItem)
166
    {
167 10
        $this->indexQueueItem = $indexQueueItem;
168
    }
169
170
    /**
171
     * Initializes the Solr server connection.
172
     *
173
     * @throws AspectNotFoundException
174
     * @throws DBALDriverException
175
     * @throws NoSolrConnectionFoundException
176
     * @throws Exception
177
     */
178 68
    protected function initializeSolrConnection()
179
    {
180 68
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, Util::getLanguageUid());
181
182
        // do not continue if no server is available
183 68
        if (!$solr->getWriteService()->ping()) {
184
            throw new Exception(
185
                'No Solr instance available while trying to index a page.',
186
                1234790825
187
            );
188
        }
189
190 68
        $this->solrConnection = $solr;
191
    }
192
193
    /**
194
     * Gets the current page's Solr document ID.
195
     *
196
     * @return string The page's Solr document ID or empty string in case no document was generated yet.
197
     */
198
    public static function getPageSolrDocumentId(): string
199
    {
200
        return self::$pageSolrDocumentId;
201
    }
202
203
    /**
204
     * Gets the Solr document generated for the current page.
205
     *
206
     * @return Document|null The page's Solr document or NULL if it has not been generated yet.
207
     */
208 10
    public static function getPageSolrDocument(): ?Document
209
    {
210 10
        return self::$pageSolrDocument;
211
    }
212
213
    /**
214
     * Allows to provide a Solr server connection other than the one
215
     * initialized by the constructor.
216
     *
217
     * @param SolrConnection $solrConnection Solr connection
218
     * @throws Exception if the Solr server cannot be reached
219
     */
220 10
    public function setSolrConnection(SolrConnection $solrConnection)
221
    {
222 10
        if (!$solrConnection->getWriteService()->ping()) {
223
            throw new Exception(
224
                'Could not connect to Solr server.',
225
                1323946472
226
            );
227
        }
228
229 10
        $this->solrConnection = $solrConnection;
230
    }
231
232
    /**
233
     * Indexes a page.
234
     *
235
     * @return bool TRUE after successfully indexing the page, FALSE on error
236
     * @throws AspectNotFoundException
237
     * @throws DBALDriverException
238
     * @throws DBALException
239
     */
240 68
    public function indexPage(): bool
241
    {
242 68
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
243
244 68
        if (is_null($this->solrConnection)) {
245
            // intended early return as it doesn't make sense to continue
246
            // and waste processing time if the solr server isn't available
247
            // anyways
248
            // FIXME use an exception
249
            return false;
250
        }
251
252 68
        $pageDocument = $this->getPageDocument();
253 68
        $pageDocument = $this->substitutePageDocument($pageDocument);
254
255 68
        $this->applyIndexPagePostProcessors($pageDocument);
256
257 68
        self::$pageSolrDocument = $pageDocument;
258 68
        $documents[] = $pageDocument;
259 68
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
260 68
        $this->processDocuments($documents);
261
262 68
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
263 68
        $this->documentsSentToSolr = $documents;
264
265 68
        return $pageIndexed;
266
    }
267
268
    /**
269
     * Applies the configured post processors (indexPagePostProcessPageDocument)
270
     *
271
     * @param Document $pageDocument
272
     */
273 68
    protected function applyIndexPagePostProcessors(Document $pageDocument)
274
    {
275 68
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] ?? null)) {
276 67
            return;
277
        }
278
279 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
280 1
            $postProcessor = GeneralUtility::makeInstance($classReference);
281 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
282
                throw new UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
283
            }
284
285 1
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
286
        }
287
    }
288
289
    /**
290
     * Builds the Solr document for the current page.
291
     *
292
     * @return Document A document representing the page
293
     * @throws AspectNotFoundException
294
     */
295 68
    protected function getPageDocument(): Document
296
    {
297 68
        $documentBuilder = GeneralUtility::makeInstance(Builder::class);
298 68
        $document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, $this->mountPointParameter);
299
300 68
        self::$pageSolrDocumentId = $document['id'];
301
302 68
        return $document;
303
    }
304
305
    // Logging
306
    // TODO replace by a central logger
307
308
    /**
309
     * Gets the mount point parameter that is used in the Frontend controller.
310
     *
311
     * @return string
312
     */
313
    public function getMountPointParameter(): string
314
    {
315
        return $this->mountPointParameter;
316
    }
317
318
    // Misc
319
320
    /**
321
     * Sets the mount point parameter that is used in the Frontend controller.
322
     *
323
     * @param string $mountPointParameter
324
     */
325 10
    public function setMountPointParameter(string $mountPointParameter)
326
    {
327 10
        $this->mountPointParameter = $mountPointParameter;
328
    }
329
330
    /**
331
     * Allows third party extensions to replace or modify the page document
332
     * created by this indexer.
333
     *
334
     * @param Document $pageDocument The page document created by this indexer.
335
     * @return Document An Apache Solr document representing the currently indexed page
336
     */
337 68
    protected function substitutePageDocument(Document $pageDocument): Document
338
    {
339 68
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] ?? null)) {
340
            return $pageDocument;
341
        }
342
343 68
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
344 68
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
345 68
            $substituteIndexer = GeneralUtility::makeInstance($classReference);
346
347 68
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
348
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
349
                throw new UnexpectedValueException($message, 1310491001);
350
            }
351
352 68
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
353 40
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
354
            }
355
356 68
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
357 68
            $pageDocument = $substituteDocument;
358
        }
359
360 68
        return $pageDocument;
361
    }
362
363
    /**
364
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
365
     *
366
     * @return string
367
     */
368 68
    protected function getIndexConfigurationNameForCurrentPage(): string
369
    {
370 68
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
371
    }
372
373
    /**
374
     * Allows third party extensions to provide additional documents which
375
     * should be indexed for the current page.
376
     *
377
     * @param Document $pageDocument The main document representing this page.
378
     * @param Document[] $existingDocuments An array of documents already created for this page.
379
     * @return array An array of additional Document objects to index
380
     */
381 68
    protected function getAdditionalDocuments(Document $pageDocument, array $existingDocuments): array
382
    {
383 68
        $documents = $existingDocuments;
384
385 68
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] ?? null)) {
386 67
            return $documents;
387
        }
388
389 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
390 1
            $additionalIndexer = GeneralUtility::makeInstance($classReference);
391
392 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
393
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
394
                throw new UnexpectedValueException($message, 1310491024);
395
            }
396
397 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
398 1
            if (!empty($additionalDocuments)) {
399 1
                $documents = array_merge($documents, $additionalDocuments);
400
            }
401
        }
402
403 1
        return $documents;
404
    }
405
406
    /**
407
     * Sends the given documents to the field processing service which takes
408
     * care of manipulating fields as defined in the field's configuration.
409
     *
410
     * @param array $documents An array of documents to manipulate
411
     * @throws DBALDriverException
412
     * @throws DBALException
413
     */
414 68
    protected function processDocuments(array $documents)
415
    {
416 68
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
417 68
        if (count($processingInstructions) > 0) {
418 68
            $service = GeneralUtility::makeInstance(Service::class);
419 68
            $service->processDocuments($documents, $processingInstructions);
420
        }
421
    }
422
423
    /**
424
     * Adds the collected documents to the Solr index.
425
     *
426
     * @param array $documents An array of Document objects.
427
     * @return bool TRUE if documents were added successfully, FALSE otherwise
428
     */
429 68
    protected function addDocumentsToSolrIndex(array $documents): bool
430
    {
431 68
        $documentsAdded = false;
432
433 68
        if (!count($documents)) {
434
            return false;
435
        }
436
437
        try {
438 68
            $this->logger->log(SolrLogManager::INFO, 'Adding ' . count($documents) . ' documents.', $documents);
439
440
            // chunk adds by 20
441 68
            $documentChunks = array_chunk($documents, 20);
442 68
            foreach ($documentChunks as $documentChunk) {
443 68
                $response = $this->solrConnection->getWriteService()->addDocuments($documentChunk);
0 ignored issues
show
Bug introduced by
The method getWriteService() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

443
                $response = $this->solrConnection->/** @scrutinizer ignore-call */ getWriteService()->addDocuments($documentChunk);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
444 68
                if ($response->getHttpStatus() != 200) {
445
                    throw new RuntimeException('Solr Request failed.', 1331834983);
446
                }
447
            }
448
449 68
            $documentsAdded = true;
450
        } catch (Throwable $e) {
451
            $this->logger->log(SolrLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode());
452
453
            if ($this->configuration->getLoggingExceptions()) {
454
                $this->logger->log(SolrLogManager::ERROR, 'Exception while adding documents', [$e->__toString()]);
455
            }
456
        }
457
458 68
        return $documentsAdded;
459
    }
460
461
    /**
462
     * Gets the current page's URL.
463
     *
464
     * @return string URL of the current page.
465
     */
466
    public function getPageUrl(): string
467
    {
468
        return $this->pageUrl;
469
    }
470
471
    /**
472
     * Sets the URL to use for the page document.
473
     *
474
     * @param string $url The page's URL.
475
     */
476 10
    public function setPageUrl(string $url)
477
    {
478 10
        $this->pageUrl = $url;
479
    }
480
481
    /**
482
     * Gets the page's access rootline.
483
     *
484
     * @return Rootline The page's access rootline
485
     */
486
    public function getPageAccessRootline(): Rootline
487
    {
488
        return $this->pageAccessRootline;
489
    }
490
491
    /**
492
     * Sets the page's access rootline.
493
     *
494
     * @param Rootline $accessRootline The page's access rootline
495
     */
496 31
    public function setPageAccessRootline(Rootline $accessRootline)
497
    {
498 31
        $this->pageAccessRootline = $accessRootline;
499
    }
500
501
    /**
502
     * Gets the documents that have been sent to Solr
503
     *
504
     * @return array An array of Document objects
505
     */
506 10
    public function getDocumentsSentToSolr(): array
507
    {
508 10
        return $this->documentsSentToSolr;
509
    }
510
}
511