Passed
Push — master ( 42d2d3...8f9ec7 )
by Timo
69:32 queued 48:37
created

Typo3PageIndexer::indexPage()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 28
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 2.0438

Importance

Changes 0
Metric Value
dl 0
loc 28
ccs 14
cts 18
cp 0.7778
rs 8.8571
c 0
b 0
f 0
cc 2
eloc 15
nc 2
nop 0
crap 2.0438
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use Apache_Solr_Document;
28
use ApacheSolrForTypo3\Solr\Access\Rootline;
29
use ApacheSolrForTypo3\Solr\Domain\Search\ApacheSolrDocument\Builder;
30
use ApacheSolrForTypo3\Solr\Domain\Variants\IdBuilder;
31
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
32
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
33
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
34
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
35
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
36
use ApacheSolrForTypo3\Solr\System\Solr\SolrConnection;
37
use TYPO3\CMS\Core\Utility\GeneralUtility;
38
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
39
40
/**
41
 * Page Indexer to index TYPO3 pages used by the Index Queue.
42
 *
43
 * @author Ingo Renner <[email protected]>
44
 * @author Daniel Poetzinger <[email protected]>
45
 * @author Timo Schmidt <[email protected]>
46
 */
47
class Typo3PageIndexer
48
{
49
50
    /**
51
     * ID of the current page's Solr document.
52
     *
53
     * @var string
54
     */
55
    protected static $pageSolrDocumentId = '';
56
    /**
57
     * The Solr document generated for the current page.
58
     *
59
     * @var \Apache_Solr_Document
60
     */
61
    protected static $pageSolrDocument = null;
62
    /**
63
     * The mount point parameter used in the Frontend controller.
64
     *
65
     * @var string
66
     */
67
    protected $mountPointParameter;
68
    /**
69
     * Solr server connection.
70
     *
71
     * @var SolrConnection
72
     */
73
    protected $solrConnection = null;
74
    /**
75
     * Frontend page object (TSFE).
76
     *
77
     * @var TypoScriptFrontendController
78
     */
79
    protected $page = null;
80
    /**
81
     * Content extractor to extract content from TYPO3 pages
82
     *
83
     * @var Typo3PageContentExtractor
84
     */
85
    protected $contentExtractor = null;
86
    /**
87
     * URL to be indexed as the page's URL
88
     *
89
     * @var string
90
     */
91
    protected $pageUrl = '';
92
    /**
93
     * The page's access rootline
94
     *
95
     * @var Rootline
96
     */
97
    protected $pageAccessRootline = null;
98
    /**
99
     * Documents that have been sent to Solr
100
     *
101
     * @var array
102
     */
103
    protected $documentsSentToSolr = [];
104
105
    /**
106
     * @var TypoScriptConfiguration
107
     */
108
    protected $configuration;
109
110
    /**
111
     * @var Item
112
     */
113
    protected $indexQueueItem;
114
115
    /**
116
     * @var \ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager
117
     */
118
    protected $logger = null;
119
120
    /**
121
     * Constructor
122
     *
123
     * @param TypoScriptFrontendController $page The page to index
124
     */
125 62
    public function __construct(TypoScriptFrontendController $page)
126
    {
127 62
        $this->logger = GeneralUtility::makeInstance(SolrLogManager::class, __CLASS__);
128
129 62
        $this->page = $page;
130 62
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
131 62
        $this->configuration = Util::getSolrConfiguration();
132
133
        try {
134 62
            $this->initializeSolrConnection();
135 2
        } catch (\Exception $e) {
136 2
            $this->logger->log(
137 2
                SolrLogManager::ERROR,
138 2
                $e->getMessage() . ' Error code: ' . $e->getCode()
139
            );
140
141
            // TODO extract to a class "ExceptionLogger"
142 2
            if ($this->configuration->getLoggingExceptions()) {
143 2
                $this->logger->log(
144 2
                    SolrLogManager::ERROR,
145 2
                    'Exception while trying to index a page',
146
                    [
147 2
                        $e->__toString()
148
                    ]
149
                );
150
            }
151
        }
152
153 62
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
154 62
    }
155
156
    /**
157
     * @param Item $indexQueueItem
158
     */
159 10
    public function setIndexQueueItem($indexQueueItem)
160
    {
161 10
        $this->indexQueueItem = $indexQueueItem;
162 10
    }
163
164
    /**
165
     * Initializes the Solr server connection.
166
     *
167
     * @throws    \Exception when no Solr connection can be established.
168
     */
169 62
    protected function initializeSolrConnection()
170
    {
171 62
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
172
173
        // do not continue if no server is available
174 60
        if (!$solr->getWriteService()->ping()) {
175
            throw new \Exception(
176
                'No Solr instance available while trying to index a page.',
177
                1234790825
178
            );
179
        }
180
181 60
        $this->solrConnection = $solr;
182 60
    }
183
184
    /**
185
     * Gets the current page's Solr document ID.
186
     *
187
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
188
     */
189
    public static function getPageSolrDocumentId()
190
    {
191
        return self::$pageSolrDocumentId;
192
    }
193
194
    /**
195
     * Gets the Solr document generated for the current page.
196
     *
197
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
198
     */
199 10
    public static function getPageSolrDocument()
200
    {
201 10
        return self::$pageSolrDocument;
202
    }
203
204
    /**
205
     * Allows to provide a Solr server connection other than the one
206
     * initialized by the constructor.
207
     *
208
     * @param SolrConnection $solrConnection Solr connection
209
     * @throws \Exception if the Solr server cannot be reached
210
     */
211 10
    public function setSolrConnection(SolrConnection $solrConnection)
212
    {
213 10
        if (!$solrConnection->getWriteService()->ping()) {
214
            throw new \Exception(
215
                'Could not connect to Solr server.',
216
                1323946472
217
            );
218
        }
219
220 10
        $this->solrConnection = $solrConnection;
221 10
    }
222
223
    /**
224
     * Indexes a page.
225
     *
226
     * @return bool TRUE after successfully indexing the page, FALSE on error
227
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
228
     */
229 62
    public function indexPage()
230
    {
231 62
        $pageIndexed = false;
232 62
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
233
234 62
        if (is_null($this->solrConnection)) {
235
            // intended early return as it doesn't make sense to continue
236
            // and waste processing time if the solr server isn't available
237
            // anyways
238
            // FIXME use an exception
239
            return $pageIndexed;
240
        }
241
242 62
        $pageDocument = $this->getPageDocument();
243 62
        $pageDocument = $this->substitutePageDocument($pageDocument);
244
245 62
        $this->applyIndexPagePostProcessors($pageDocument);
246
247 62
        self::$pageSolrDocument = $pageDocument;
248 62
        $documents[] = $pageDocument;
249 62
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
250 62
        $this->processDocuments($documents);
251
252 62
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
253 62
        $this->documentsSentToSolr = $documents;
254
255 62
        return $pageIndexed;
256
    }
257
258
    /**
259
     * Applies the configured post processors (indexPagePostProcessPageDocument)
260
     *
261
     * @param \Apache_Solr_Document $pageDocument
262
     */
263 62
    protected function applyIndexPagePostProcessors($pageDocument)
264
    {
265 62
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
266 61
            return;
267
        }
268
269 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
270 1
            $postProcessor = GeneralUtility::getUserObj($classReference);
271 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
272
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
273
            }
274
275 1
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
276
        }
277 1
    }
278
279
    /**
280
     * Builds the Solr document for the current page.
281
     *
282
     * @return \Apache_Solr_Document A document representing the page
283
     */
284 62
    protected function getPageDocument()
285
    {
286 62
        $documentBuilder = GeneralUtility::makeInstance(Builder::class);
287 62
        $document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, (string)$this->mountPointParameter);
288 62
        $idField = $document->getField('id');
289
290 62
        self::$pageSolrDocumentId = $idField['value'];
291
292 62
        return $document;
293
    }
294
295
296
    // Logging
297
    // TODO replace by a central logger
298
299
    /**
300
     * Gets the mount point parameter that is used in the Frontend controller.
301
     *
302
     * @return string
303
     */
304
    public function getMountPointParameter()
305
    {
306
        return $this->mountPointParameter;
307
    }
308
309
    // Misc
310
311
    /**
312
     * Sets the mount point parameter that is used in the Frontend controller.
313
     *
314
     * @param string $mountPointParameter
315
     */
316 10
    public function setMountPointParameter($mountPointParameter)
317
    {
318 10
        $this->mountPointParameter = (string)$mountPointParameter;
319 10
    }
320
321
    /**
322
     * Allows third party extensions to replace or modify the page document
323
     * created by this indexer.
324
     *
325
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
326
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
327
     */
328 62
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
329
    {
330 62
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
331 52
            return $pageDocument;
332
        }
333
334 10
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
335 10
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
336 10
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
337
338 10
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
339
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
340
                throw new \UnexpectedValueException($message, 1310491001);
341
            }
342
343 10
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
344 10
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
345
            }
346
347 10
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
348 10
            if (!$substituteDocument instanceof Apache_Solr_Document) {
349
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
350
                throw new \UnexpectedValueException($message, 1310490952);
351
            }
352 10
            $pageDocument = $substituteDocument;
353
        }
354
355 10
        return $pageDocument;
356
    }
357
358
    /**
359
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
360
     *
361
     * @return string
362
     */
363 10
    protected function getIndexConfigurationNameForCurrentPage()
364
    {
365 10
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
366
    }
367
368
    /**
369
     * Allows third party extensions to provide additional documents which
370
     * should be indexed for the current page.
371
     *
372
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
373
     * @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page.
374
     * @return array An array of additional \Apache_Solr_Document objects to index
375
     */
376 62
    protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments)
377
    {
378 62
        $documents = $existingDocuments;
379
380 62
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
381 61
            return $documents;
382
        }
383
384 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
385 1
            $additionalIndexer = GeneralUtility::getUserObj($classReference);
386
387 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
388
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
389
                throw new \UnexpectedValueException($message, 1310491024);
390
            }
391
392 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
393 1
            if (is_array($additionalDocuments)) {
394 1
                $documents = array_merge($documents, $additionalDocuments);
395
            }
396
        }
397
398 1
        return $documents;
399
    }
400
401
    /**
402
     * Sends the given documents to the field processing service which takes
403
     * care of manipulating fields as defined in the field's configuration.
404
     *
405
     * @param array $documents An array of documents to manipulate
406
     */
407 62
    protected function processDocuments(array $documents)
408
    {
409 62
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
410 62
        if (count($processingInstructions) > 0) {
411 62
            $service = GeneralUtility::makeInstance(Service::class);
412 62
            $service->processDocuments($documents, $processingInstructions);
413
        }
414 62
    }
415
416
    /**
417
     * Adds the collected documents to the Solr index.
418
     *
419
     * @param array $documents An array of \Apache_Solr_Document objects.
420
     * @return bool TRUE if documents were added successfully, FALSE otherwise
421
     */
422 62
    protected function addDocumentsToSolrIndex(array $documents)
423
    {
424 62
        $documentsAdded = false;
425
426 62
        if (!count($documents)) {
427
            return $documentsAdded;
428
        }
429
430
        try {
431 62
            $this->logger->log(
432 62
                SolrLogManager::INFO,
433 62
                'Adding ' . count($documents) . ' documents.',
434 62
                $documents
435
            );
436
437
            // chunk adds by 20
438 62
            $documentChunks = array_chunk($documents, 20);
439 62
            foreach ($documentChunks as $documentChunk) {
440 62
                $response = $this->solrConnection->getWriteService()->addDocuments($documentChunk);
441
442 62
                if ($response->getHttpStatus() != 200) {
443
                    $transportException = new \Apache_Solr_HttpTransportException($response);
444 62
                    throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException);
445
                }
446
            }
447
448 62
            $documentsAdded = true;
449
        } catch (\Exception $e) {
450
            $this->logger->log(
451
                SolrLogManager::ERROR,
452
                $e->getMessage() . ' Error code: ' . $e->getCode()
453
            );
454
455
            if ($this->configuration->getLoggingExceptions()) {
456
                $this->logger->log(
457
                    SolrLogManager::ERROR,
458
                    'Exception while adding documents',
459
                    [
460
                        $e->__toString()
461
                    ]
462
                );
463
            }
464
        }
465
466 62
        return $documentsAdded;
467
    }
468
469
    /**
470
     * Gets the current page's URL.
471
     *
472
     * @return string URL of the current page.
473
     */
474
    public function getPageUrl()
475
    {
476
        return $this->pageUrl;
477
    }
478
479
    /**
480
     * Sets the URL to use for the page document.
481
     *
482
     * @param string $url The page's URL.
483
     */
484 10
    public function setPageUrl($url)
485
    {
486 10
        $this->pageUrl = $url;
487 10
    }
488
489
    /**
490
     * Gets the page's access rootline.
491
     *
492
     * @return Rootline The page's access rootline
493
     */
494
    public function getPageAccessRootline()
495
    {
496
        return $this->pageAccessRootline;
497
    }
498
499
    /**
500
     * Sets the page's access rootline.
501
     *
502
     * @param Rootline $accessRootline The page's access rootline
503
     */
504 23
    public function setPageAccessRootline(Rootline $accessRootline)
505
    {
506 23
        $this->pageAccessRootline = $accessRootline;
507 23
    }
508
509
    /**
510
     * Gets the documents that have been sent to Solr
511
     *
512
     * @return array An array of \Apache_Solr_Document objects
513
     */
514
    public function getDocumentsSentToSolr()
515
    {
516
        return $this->documentsSentToSolr;
517
    }
518
}
519