Passed
Push — release-11.5.x ( 385fe8...cd49eb )
by Rafael
53:22 queued 14:05
created

Typo3PageIndexer::setSolrConnection()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2.7462

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 10
ccs 3
cts 7
cp 0.4286
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2.7462
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the TYPO3 CMS project.
7
 *
8
 * It is free software; you can redistribute it and/or modify it under
9
 * the terms of the GNU General Public License, either version 2
10
 * of the License, or any later version.
11
 *
12
 * For the full copyright and license information, please read the
13
 * LICENSE.txt file that was distributed with this source code.
14
 *
15
 * The TYPO3 project - inspiring people to share!
16
 */
17
18
namespace ApacheSolrForTypo3\Solr;
19
20
use ApacheSolrForTypo3\Solr\Access\Rootline;
21
use ApacheSolrForTypo3\Solr\Domain\Search\ApacheSolrDocument\Builder;
22
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
23
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
24
use ApacheSolrForTypo3\Solr\IndexQueue\Indexer;
25
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
26
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
27
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
28
use ApacheSolrForTypo3\Solr\System\Solr\Document\Document;
29
use ApacheSolrForTypo3\Solr\System\Solr\SolrConnection;
30
use Doctrine\DBAL\Driver\Exception as DBALDriverException;
31
use Doctrine\DBAL\Exception as DBALException;
32
use Exception;
0 ignored issues
show
Bug introduced by
This use statement conflicts with another class in this namespace, ApacheSolrForTypo3\Solr\Exception. Consider defining an alias.

Let?s assume that you have a directory layout like this:

.
|-- OtherDir
|   |-- Bar.php
|   `-- Foo.php
`-- SomeDir
    `-- Foo.php

and let?s assume the following content of Bar.php:

// Bar.php
namespace OtherDir;

use SomeDir\Foo; // This now conflicts the class OtherDir\Foo

If both files OtherDir/Foo.php and SomeDir/Foo.php are loaded in the same runtime, you will see a PHP error such as the following:

PHP Fatal error:  Cannot use SomeDir\Foo as Foo because the name is already in use in OtherDir/Foo.php

However, as OtherDir/Foo.php does not necessarily have to be loaded and the error is only triggered if it is loaded before OtherDir/Bar.php, this problem might go unnoticed for a while. In order to prevent this error from surfacing, you must import the namespace with a different alias:

// Bar.php
namespace OtherDir;

use SomeDir\Foo as SomeDirFoo; // There is no conflict anymore.
Loading history...
33
use RuntimeException;
34
use Throwable;
35
use TYPO3\CMS\Core\Context\Exception\AspectNotFoundException;
36
use TYPO3\CMS\Core\Utility\GeneralUtility;
37
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
38
use UnexpectedValueException;
39
40
/**
41
 * Page Indexer to index TYPO3 pages used by the Index Queue.
42
 *
43
 * @author Ingo Renner <[email protected]>
44
 * @author Daniel Poetzinger <[email protected]>
45
 * @author Timo Schmidt <[email protected]>
46
 */
47
class Typo3PageIndexer
48
{
49
    /**
50
     * ID of the current page's Solr document.
51
     *
52
     * @var string
53
     */
54
    protected static string $pageSolrDocumentId = '';
55
56
    /**
57
     * The Solr document generated for the current page.
58
     *
59
     * @var Document
60
     */
61
    protected static Document $pageSolrDocument;
62
63
    /**
64
     * The mount point parameter used in the Frontend controller.
65
     *
66
     * @var string
67
     */
68
    protected string $mountPointParameter = '';
69
70
    /**
71
     * Solr server connection.
72
     *
73
     * @var SolrConnection|null
74
     */
75
    protected ?SolrConnection $solrConnection = null;
76
77
    /**
78
     * Frontend page object (TSFE).
79
     *
80
     * @var TypoScriptFrontendController
81
     */
82
    protected TypoScriptFrontendController $page;
83
84
    /**
85
     * Content extractor to extract content from TYPO3 pages
86
     *
87
     * @var Typo3PageContentExtractor
88
     */
89
    protected Typo3PageContentExtractor $contentExtractor;
90
91
    /**
92
     * URL to be indexed as the page's URL
93
     *
94
     * @var string
95
     */
96
    protected string $pageUrl = '';
97
98
    /**
99
     * The page's access rootline
100
     *
101
     * @var Rootline
102
     */
103
    protected Rootline $pageAccessRootline;
104
105
    /**
106
     * Documents that have been sent to Solr
107
     *
108
     * @var array
109
     */
110
    protected array $documentsSentToSolr = [];
111
112
    /**
113
     * @var TypoScriptConfiguration
114
     */
115
    protected TypoScriptConfiguration $configuration;
116
117
    /**
118
     * @var Item
119
     */
120
    protected Item $indexQueueItem;
121
122
    /**
123
     * @var SolrLogManager
124
     */
125
    protected SolrLogManager $logger;
126
127
    /**
128
     * Constructor
129
     *
130
     * @param TypoScriptFrontendController $page The page to index
131
     */
132 69
    public function __construct(TypoScriptFrontendController $page)
133
    {
134 69
        $this->logger = GeneralUtility::makeInstance(SolrLogManager::class, /** @scrutinizer ignore-type */ __CLASS__);
135
136 69
        $this->page = $page;
137 69
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
138 69
        $this->configuration = Util::getSolrConfiguration();
139
140
        try {
141 69
            $this->initializeSolrConnection();
142
        } catch (Throwable $e) {
143
            $this->logger->log(
144
                SolrLogManager::ERROR,
145
                $e->getMessage() . ' Error code: ' . $e->getCode()
146
            );
147
148
            // TODO extract to a class "ExceptionLogger"
149
            if ($this->configuration->getLoggingExceptions()) {
150
                $this->logger->log(
151
                    SolrLogManager::ERROR,
152
                    'Exception while trying to index a page',
153
                    [
154
                        $e->__toString(),
155
                    ]
156
                );
157
            }
158
        }
159
160 69
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, /** @scrutinizer ignore-type */ '');
161
    }
162
163
    /**
164
     * @param Item $indexQueueItem
165
     */
166 69
    public function setIndexQueueItem(Item $indexQueueItem)
167
    {
168 69
        $this->indexQueueItem = $indexQueueItem;
169
    }
170
171
    /**
172
     * Initializes the Solr server connection.
173
     *
174
     * @throws AspectNotFoundException
175
     * @throws DBALDriverException
176
     * @throws NoSolrConnectionFoundException
177
     * @throws Exception
178
     */
179 69
    protected function initializeSolrConnection()
180
    {
181 69
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, Util::getLanguageUid());
182
183
        // do not continue if no server is available
184 69
        if (!$solr->getWriteService()->ping()) {
185
            throw new Exception(
186
                'No Solr instance available while trying to index a page.',
187
                1234790825
188
            );
189
        }
190
191 69
        $this->solrConnection = $solr;
192
    }
193
194
    /**
195
     * Gets the current page's Solr document ID.
196
     *
197
     * @return string The page's Solr document ID or empty string in case no document was generated yet.
198
     */
199
    public static function getPageSolrDocumentId(): string
200
    {
201
        return self::$pageSolrDocumentId;
202
    }
203
204
    /**
205
     * Gets the Solr document generated for the current page.
206
     *
207
     * @return Document|null The page's Solr document or NULL if it has not been generated yet.
208
     */
209 10
    public static function getPageSolrDocument(): ?Document
210
    {
211 10
        return self::$pageSolrDocument;
212
    }
213
214
    /**
215
     * Allows to provide a Solr server connection other than the one
216
     * initialized by the constructor.
217
     *
218
     * @param SolrConnection $solrConnection Solr connection
219
     * @throws Exception if the Solr server cannot be reached
220
     */
221 10
    public function setSolrConnection(SolrConnection $solrConnection)
222
    {
223 10
        if (!$solrConnection->getWriteService()->ping()) {
224
            throw new Exception(
225
                'Could not connect to Solr server.',
226
                1323946472
227
            );
228
        }
229
230 10
        $this->solrConnection = $solrConnection;
231
    }
232
233
    /**
234
     * Indexes a page.
235
     *
236
     * @return bool TRUE after successfully indexing the page, FALSE on error
237
     * @throws AspectNotFoundException
238
     * @throws DBALDriverException
239
     * @throws DBALException
240
     */
241 69
    public function indexPage(): bool
242
    {
243 69
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
244
245 69
        if (is_null($this->solrConnection)) {
246
            // intended early return as it doesn't make sense to continue
247
            // and waste processing time if the solr server isn't available
248
            // anyways
249
            // FIXME use an exception
250
            return false;
251
        }
252
253 69
        $pageDocument = $this->getPageDocument();
254 69
        $pageDocument = $this->substitutePageDocument($pageDocument);
255
256 69
        $this->applyIndexPagePostProcessors($pageDocument);
0 ignored issues
show
Deprecated Code introduced by
The function ApacheSolrForTypo3\Solr\...dexPagePostProcessors() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

256
        /** @scrutinizer ignore-deprecated */ $this->applyIndexPagePostProcessors($pageDocument);
Loading history...
257
258 69
        self::$pageSolrDocument = $pageDocument;
259 69
        $documents[] = $pageDocument;
260 69
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
261 69
        $this->processDocuments($documents);
262 69
        $documents = Indexer::preAddModifyDocuments(
263 69
            $this->indexQueueItem,
264 69
            $this->page->getLanguage()->getLanguageId(),
265 69
            $documents
266 69
        );
267
268 69
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
269 69
        $this->documentsSentToSolr = $documents;
270
271 69
        return $pageIndexed;
272
    }
273
274
    /**
275
     * Applies the configured post processors (indexPagePostProcessPageDocument)
276
     *
277
     * @deprecated
278
     *
279
     * @param Document $pageDocument
280
     */
281 69
    protected function applyIndexPagePostProcessors(Document $pageDocument)
282
    {
283 69
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] ?? null)) {
284 69
            return;
285
        }
286
287
        trigger_error(
288
            "The hook indexPagePostProcessPageDocument has been superseded by \$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['IndexQueueIndexer']['preAddModifyDocuments']",
289
            E_USER_DEPRECATED
290
        );
291
292
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
293
            $postProcessor = GeneralUtility::makeInstance($classReference);
294
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
295
                throw new UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
296
            }
297
298
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
299
        }
300
    }
301
302
    /**
303
     * Builds the Solr document for the current page.
304
     *
305
     * @return Document A document representing the page
306
     * @throws AspectNotFoundException
307
     */
308 69
    protected function getPageDocument(): Document
309
    {
310 69
        $documentBuilder = GeneralUtility::makeInstance(Builder::class);
311 69
        $document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, $this->mountPointParameter);
312
313 69
        self::$pageSolrDocumentId = $document['id'];
314
315 69
        return $document;
316
    }
317
318
    // Logging
319
    // TODO replace by a central logger
320
321
    /**
322
     * Gets the mount point parameter that is used in the Frontend controller.
323
     *
324
     * @return string
325
     */
326
    public function getMountPointParameter(): string
327
    {
328
        return $this->mountPointParameter;
329
    }
330
331
    // Misc
332
333
    /**
334
     * Sets the mount point parameter that is used in the Frontend controller.
335
     *
336
     * @param string $mountPointParameter
337
     */
338 10
    public function setMountPointParameter(string $mountPointParameter)
339
    {
340 10
        $this->mountPointParameter = $mountPointParameter;
341
    }
342
343
    /**
344
     * Allows third party extensions to replace or modify the page document
345
     * created by this indexer.
346
     *
347
     * @param Document $pageDocument The page document created by this indexer.
348
     * @return Document An Apache Solr document representing the currently indexed page
349
     */
350 69
    protected function substitutePageDocument(Document $pageDocument): Document
351
    {
352 69
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] ?? null)) {
353
            return $pageDocument;
354
        }
355
356 69
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
357 69
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
358 69
            $substituteIndexer = GeneralUtility::makeInstance($classReference);
359
360 69
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
361
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
362
                throw new UnexpectedValueException($message, 1310491001);
363
            }
364
365 69
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
366 41
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
367
            }
368
369 69
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
370 69
            $pageDocument = $substituteDocument;
371
        }
372
373 69
        return $pageDocument;
374
    }
375
376
    /**
377
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
378
     *
379
     * @return string
380
     */
381 69
    protected function getIndexConfigurationNameForCurrentPage(): string
382
    {
383 69
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
384
    }
385
386
    /**
387
     * Allows third party extensions to provide additional documents which
388
     * should be indexed for the current page.
389
     *
390
     * @param Document $pageDocument The main document representing this page.
391
     * @param Document[] $existingDocuments An array of documents already created for this page.
392
     * @return array An array of additional Document objects to index
393
     */
394 69
    protected function getAdditionalDocuments(Document $pageDocument, array $existingDocuments): array
395
    {
396 69
        $documents = $existingDocuments;
397
398 69
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] ?? null)) {
399 68
            return $documents;
400
        }
401
402 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
403 1
            $additionalIndexer = GeneralUtility::makeInstance($classReference);
404
405 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
406
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
407
                throw new UnexpectedValueException($message, 1310491024);
408
            }
409
410 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
411 1
            if (!empty($additionalDocuments)) {
412 1
                $documents = array_merge($documents, $additionalDocuments);
413
            }
414
        }
415
416 1
        return $documents;
417
    }
418
419
    /**
420
     * Sends the given documents to the field processing service which takes
421
     * care of manipulating fields as defined in the field's configuration.
422
     *
423
     * @param array $documents An array of documents to manipulate
424
     * @throws DBALDriverException
425
     * @throws DBALException
426
     */
427 69
    protected function processDocuments(array $documents)
428
    {
429 69
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
430 69
        if (count($processingInstructions) > 0) {
431 69
            $service = GeneralUtility::makeInstance(Service::class);
432 69
            $service->processDocuments($documents, $processingInstructions);
433
        }
434
    }
435
436
    /**
437
     * Adds the collected documents to the Solr index.
438
     *
439
     * @param array $documents An array of Document objects.
440
     * @return bool TRUE if documents were added successfully, FALSE otherwise
441
     */
442 69
    protected function addDocumentsToSolrIndex(array $documents): bool
443
    {
444 69
        $documentsAdded = false;
445
446 69
        if (!count($documents)) {
447
            return false;
448
        }
449
450
        try {
451 69
            $this->logger->log(SolrLogManager::INFO, 'Adding ' . count($documents) . ' documents.', $documents);
452
453
            // chunk adds by 20
454 69
            $documentChunks = array_chunk($documents, 20);
455 69
            foreach ($documentChunks as $documentChunk) {
456 69
                $response = $this->solrConnection->getWriteService()->addDocuments($documentChunk);
0 ignored issues
show
Bug introduced by
The method getWriteService() does not exist on null. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

456
                $response = $this->solrConnection->/** @scrutinizer ignore-call */ getWriteService()->addDocuments($documentChunk);

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
457 69
                if ($response->getHttpStatus() != 200) {
458
                    throw new RuntimeException('Solr Request failed.', 1331834983);
459
                }
460
            }
461
462 69
            $documentsAdded = true;
463
        } catch (Throwable $e) {
464
            $this->logger->log(SolrLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode());
465
466
            if ($this->configuration->getLoggingExceptions()) {
467
                $this->logger->log(SolrLogManager::ERROR, 'Exception while adding documents', [$e->__toString()]);
468
            }
469
        }
470
471 69
        return $documentsAdded;
472
    }
473
474
    /**
475
     * Gets the current page's URL.
476
     *
477
     * @return string URL of the current page.
478
     */
479
    public function getPageUrl(): string
480
    {
481
        return $this->pageUrl;
482
    }
483
484
    /**
485
     * Sets the URL to use for the page document.
486
     *
487
     * @param string $url The page's URL.
488
     */
489 10
    public function setPageUrl(string $url)
490
    {
491 10
        $this->pageUrl = $url;
492
    }
493
494
    /**
495
     * Gets the page's access rootline.
496
     *
497
     * @return Rootline The page's access rootline
498
     */
499
    public function getPageAccessRootline(): Rootline
500
    {
501
        return $this->pageAccessRootline;
502
    }
503
504
    /**
505
     * Sets the page's access rootline.
506
     *
507
     * @param Rootline $accessRootline The page's access rootline
508
     */
509 31
    public function setPageAccessRootline(Rootline $accessRootline)
510
    {
511 31
        $this->pageAccessRootline = $accessRootline;
512
    }
513
514
    /**
515
     * Gets the documents that have been sent to Solr
516
     *
517
     * @return array An array of Document objects
518
     */
519 10
    public function getDocumentsSentToSolr(): array
520
    {
521 10
        return $this->documentsSentToSolr;
522
    }
523
}
524