Failed Conditions
Push — task/2976_TYPO3.11_compatibili... ( 38a128...0b41c4 )
by Rafael
25:41 queued 19:10
created

Typo3PageIndexer::getPageSolrDocument()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 1
nc 1
nop 0
crap 1
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 3 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\Domain\Search\ApacheSolrDocument\Builder;
29
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
30
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
31
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
32
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
33
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
34
use ApacheSolrForTypo3\Solr\System\Solr\Document\Document;
35
use ApacheSolrForTypo3\Solr\System\Solr\SolrConnection;
36
use Exception;
37
use TYPO3\CMS\Core\Utility\GeneralUtility;
38
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
39
40
/**
41
 * Page Indexer to index TYPO3 pages used by the Index Queue.
42
 *
43
 * @author Ingo Renner <[email protected]>
44
 * @author Daniel Poetzinger <[email protected]>
45
 * @author Timo Schmidt <[email protected]>
46
 */
47
class Typo3PageIndexer
48
{
49
50
    /**
51
     * ID of the current page's Solr document.
52
     *
53
     * @var string
54
     */
55
    protected static $pageSolrDocumentId = '';
56
    /**
57
     * The Solr document generated for the current page.
58
     *
59
     * @var Document
60
     */
61
    protected static $pageSolrDocument = null;
62
    /**
63
     * The mount point parameter used in the Frontend controller.
64
     *
65
     * @var string
66
     */
67
    protected $mountPointParameter;
68
    /**
69
     * Solr server connection.
70
     *
71
     * @var SolrConnection
72
     */
73
    protected $solrConnection = null;
74
    /**
75
     * Frontend page object (TSFE).
76
     *
77
     * @var TypoScriptFrontendController
78
     */
79
    protected $page = null;
80
    /**
81
     * Content extractor to extract content from TYPO3 pages
82
     *
83
     * @var Typo3PageContentExtractor
84
     */
85
    protected $contentExtractor = null;
86
    /**
87
     * URL to be indexed as the page's URL
88
     *
89
     * @var string
90
     */
91
    protected $pageUrl = '';
92
    /**
93
     * The page's access rootline
94
     *
95
     * @var Rootline
96
     */
97
    protected $pageAccessRootline = null;
98
    /**
99
     * Documents that have been sent to Solr
100
     *
101
     * @var array
102
     */
103
    protected $documentsSentToSolr = [];
104
105
    /**
106
     * @var TypoScriptConfiguration
107
     */
108
    protected $configuration;
109
110
    /**
111
     * @var Item
112
     */
113
    protected $indexQueueItem;
114
115
    /**
116
     * @var \ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager
117
     */
118
    protected $logger = null;
119
120
    /**
121
     * Constructor
122
     *
123
     * @param TypoScriptFrontendController $page The page to index
124
     */
125 31
    public function __construct(TypoScriptFrontendController $page)
126
    {
127 31
        $this->logger = GeneralUtility::makeInstance(SolrLogManager::class, /** @scrutinizer ignore-type */ __CLASS__);
128
129 31
        $this->page = $page;
130 31
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
131 31
        $this->configuration = Util::getSolrConfiguration();
132
133
        try {
134 31
            $this->initializeSolrConnection();
135
        } catch (Exception $e) {
136
            $this->logger->log(
137
                SolrLogManager::ERROR,
138
                $e->getMessage() . ' Error code: ' . $e->getCode()
139
            );
140
141
            // TODO extract to a class "ExceptionLogger"
142
            if ($this->configuration->getLoggingExceptions()) {
143
                $this->logger->log(
144
                    SolrLogManager::ERROR,
145
                    'Exception while trying to index a page',
146
                    [
147
                        $e->__toString()
148
                    ]
149
                );
150
            }
151
        }
152
153 31
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, /** @scrutinizer ignore-type */ '');
154 31
    }
155
156
    /**
157
     * @param Item $indexQueueItem
158
     */
159 10
    public function setIndexQueueItem($indexQueueItem)
160
    {
161 10
        $this->indexQueueItem = $indexQueueItem;
162 10
    }
163
164
    /**
165
     * Initializes the Solr server connection.
166
     *
167
     * @throws Exception when no Solr connection can be established.
168
     */
169 31
    protected function initializeSolrConnection()
170
    {
171 31
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, Util::getLanguageUid());
172
173
        // do not continue if no server is available
174 31
        if (!$solr->getWriteService()->ping()) {
175
            throw new Exception(
176
                'No Solr instance available while trying to index a page.',
177
                1234790825
178
            );
179
        }
180
181 31
        $this->solrConnection = $solr;
182 31
    }
183
184
    /**
185
     * Gets the current page's Solr document ID.
186
     *
187
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
188
     */
189
    public static function getPageSolrDocumentId()
190
    {
191
        return self::$pageSolrDocumentId;
192
    }
193
194
    /**
195
     * Gets the Solr document generated for the current page.
196
     *
197
     * @return Document|NULL The page's Solr document or NULL if it has not been generated yet.
198
     */
199 10
    public static function getPageSolrDocument()
200
    {
201 10
        return self::$pageSolrDocument;
202
    }
203
204
    /**
205
     * Allows to provide a Solr server connection other than the one
206
     * initialized by the constructor.
207
     *
208
     * @param SolrConnection $solrConnection Solr connection
209
     * @throws Exception if the Solr server cannot be reached
210
     */
211 10
    public function setSolrConnection(SolrConnection $solrConnection)
212
    {
213 10
        if (!$solrConnection->getWriteService()->ping()) {
214
            throw new Exception(
215
                'Could not connect to Solr server.',
216
                1323946472
217
            );
218
        }
219
220 10
        $this->solrConnection = $solrConnection;
221 10
    }
222
223
    /**
224
     * Indexes a page.
225
     *
226
     * @return bool TRUE after successfully indexing the page, FALSE on error
227
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
228
     */
229 31
    public function indexPage()
230
    {
231 31
        $pageIndexed = false;
232 31
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
233
234 31
        if (is_null($this->solrConnection)) {
235
            // intended early return as it doesn't make sense to continue
236
            // and waste processing time if the solr server isn't available
237
            // anyways
238
            // FIXME use an exception
239
            return $pageIndexed;
240
        }
241
242 31
        $pageDocument = $this->getPageDocument();
243 31
        $pageDocument = $this->substitutePageDocument($pageDocument);
244
245 31
        $this->applyIndexPagePostProcessors($pageDocument);
246
247 31
        self::$pageSolrDocument = $pageDocument;
248 31
        $documents[] = $pageDocument;
249 31
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
250 31
        $this->processDocuments($documents);
251
252 31
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
253 31
        $this->documentsSentToSolr = $documents;
254
255 31
        return $pageIndexed;
256
    }
257
258
    /**
259
     * Applies the configured post processors (indexPagePostProcessPageDocument)
260
     *
261
     * @param Document $pageDocument
262
     */
263 31
    protected function applyIndexPagePostProcessors($pageDocument)
264
    {
265 31
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
266 30
            return;
267
        }
268
269 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
270 1
            $postProcessor = GeneralUtility::makeInstance($classReference);
271 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
272
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
273
            }
274
275 1
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
276
        }
277 1
    }
278
279
    /**
280
     * Builds the Solr document for the current page.
281
     *
282
     * @return Document A document representing the page
283
     */
284 31
    protected function getPageDocument()
285
    {
286 31
        $documentBuilder = GeneralUtility::makeInstance(Builder::class);
287 31
        $document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, (string)$this->mountPointParameter);
288
289 31
        self::$pageSolrDocumentId = $document['id'];
290
291 31
        return $document;
292
    }
293
294
295
    // Logging
296
    // TODO replace by a central logger
297
298
    /**
299
     * Gets the mount point parameter that is used in the Frontend controller.
300
     *
301
     * @return string
302
     */
303
    public function getMountPointParameter()
304
    {
305
        return $this->mountPointParameter;
306
    }
307
308
    // Misc
309
310
    /**
311
     * Sets the mount point parameter that is used in the Frontend controller.
312
     *
313
     * @param string $mountPointParameter
314
     */
315 10
    public function setMountPointParameter($mountPointParameter)
316
    {
317 10
        $this->mountPointParameter = (string)$mountPointParameter;
318 10
    }
319
320
    /**
321
     * Allows third party extensions to replace or modify the page document
322
     * created by this indexer.
323
     *
324
     * @param Document $pageDocument The page document created by this indexer.
325
     * @return Document An Apache Solr document representing the currently indexed page
326
     */
327 31
    protected function substitutePageDocument(Document $pageDocument)
328
    {
329 31
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
330
            return $pageDocument;
331
        }
332
333 31
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
334 31
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
335 31
            $substituteIndexer = GeneralUtility::makeInstance($classReference);
336
337 31
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
338
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
339
                throw new \UnexpectedValueException($message, 1310491001);
340
            }
341
342 31
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
343 10
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
344
            }
345
346 31
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
347 31
            if (!$substituteDocument instanceof Document) {
348
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Document object.';
349
                throw new \UnexpectedValueException($message, 1310490952);
350
            }
351 31
            $pageDocument = $substituteDocument;
352
        }
353
354 31
        return $pageDocument;
355
    }
356
357
    /**
358
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
359
     *
360
     * @return string
361
     */
362 31
    protected function getIndexConfigurationNameForCurrentPage()
363
    {
364 31
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
365
    }
366
367
    /**
368
     * Allows third party extensions to provide additional documents which
369
     * should be indexed for the current page.
370
     *
371
     * @param Document $pageDocument The main document representing this page.
372
     * @param Document[] $existingDocuments An array of documents already created for this page.
373
     * @return array An array of additional Document objects to index
374
     */
375 31
    protected function getAdditionalDocuments(Document $pageDocument, array $existingDocuments)
376
    {
377 31
        $documents = $existingDocuments;
378
379 31
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
380 30
            return $documents;
381
        }
382
383 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
384 1
            $additionalIndexer = GeneralUtility::makeInstance($classReference);
385
386 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
387
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
388
                throw new \UnexpectedValueException($message, 1310491024);
389
            }
390
391 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
392 1
            if (is_array($additionalDocuments)) {
393 1
                $documents = array_merge($documents, $additionalDocuments);
394
            }
395
        }
396
397 1
        return $documents;
398
    }
399
400
    /**
401
     * Sends the given documents to the field processing service which takes
402
     * care of manipulating fields as defined in the field's configuration.
403
     *
404
     * @param array $documents An array of documents to manipulate
405
     */
406 31
    protected function processDocuments(array $documents)
407
    {
408 31
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
409 31
        if (count($processingInstructions) > 0) {
410 31
            $service = GeneralUtility::makeInstance(Service::class);
411 31
            $service->processDocuments($documents, $processingInstructions);
412
        }
413 31
    }
414
415
    /**
416
     * Adds the collected documents to the Solr index.
417
     *
418
     * @param array $documents An array of Document objects.
419
     * @return bool TRUE if documents were added successfully, FALSE otherwise
420
     */
421 31
    protected function addDocumentsToSolrIndex(array $documents)
422
    {
423 31
        $documentsAdded = false;
424
425 31
        if (!count($documents)) {
426
            return $documentsAdded;
427
        }
428
429
        try {
430 31
            $this->logger->log(SolrLogManager::INFO, 'Adding ' . count($documents) . ' documents.', $documents);
431
432
            // chunk adds by 20
433 31
            $documentChunks = array_chunk($documents, 20);
434 31
            foreach ($documentChunks as $documentChunk) {
435 31
                $response = $this->solrConnection->getWriteService()->addDocuments($documentChunk);
436 31
                if ($response->getHttpStatus() != 200) {
437
                    throw new \RuntimeException('Solr Request failed.', 1331834983);
438
                }
439
            }
440
441 31
            $documentsAdded = true;
442
        } catch (Exception $e) {
443
            $this->logger->log(SolrLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode());
444
445
            if ($this->configuration->getLoggingExceptions()) {
446
                $this->logger->log(SolrLogManager::ERROR, 'Exception while adding documents', [$e->__toString()]);
447
            }
448
        }
449
450 31
        return $documentsAdded;
451
    }
452
453
    /**
454
     * Gets the current page's URL.
455
     *
456
     * @return string URL of the current page.
457
     */
458
    public function getPageUrl()
459
    {
460
        return $this->pageUrl;
461
    }
462
463
    /**
464
     * Sets the URL to use for the page document.
465
     *
466
     * @param string $url The page's URL.
467
     */
468 10
    public function setPageUrl($url)
469
    {
470 10
        $this->pageUrl = $url;
471 10
    }
472
473
    /**
474
     * Gets the page's access rootline.
475
     *
476
     * @return Rootline The page's access rootline
477
     */
478
    public function getPageAccessRootline()
479
    {
480
        return $this->pageAccessRootline;
481
    }
482
483
    /**
484
     * Sets the page's access rootline.
485
     *
486
     * @param Rootline $accessRootline The page's access rootline
487
     */
488 24
    public function setPageAccessRootline(Rootline $accessRootline)
489
    {
490 24
        $this->pageAccessRootline = $accessRootline;
491 24
    }
492
493
    /**
494
     * Gets the documents that have been sent to Solr
495
     *
496
     * @return array An array of Document objects
497
     */
498 10
    public function getDocumentsSentToSolr()
499
    {
500 10
        return $this->documentsSentToSolr;
501
    }
502
}
503