Passed
Push — master ( a95893...a4d2df )
by Timo
57s
created

Typo3PageIndexer::indexPage()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 28
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 2.0185

Importance

Changes 0
Metric Value
dl 0
loc 28
ccs 15
cts 18
cp 0.8333
rs 8.8571
c 0
b 0
f 0
cc 2
eloc 15
nc 2
nop 0
crap 2.0185
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use Apache_Solr_Document;
28
use ApacheSolrForTypo3\Solr\Access\Rootline;
29
use ApacheSolrForTypo3\Solr\Domain\Search\ApacheSolrDocument\Builder;
30
use ApacheSolrForTypo3\Solr\Domain\Variants\IdBuilder;
31
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
32
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
33
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
34
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
35
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
36
use TYPO3\CMS\Core\Utility\GeneralUtility;
37
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
38
39
/**
40
 * Page Indexer to index TYPO3 pages used by the Index Queue.
41
 *
42
 * @author Ingo Renner <[email protected]>
43
 * @author Daniel Poetzinger <[email protected]>
44
 * @author Timo Schmidt <[email protected]>
45
 */
46
class Typo3PageIndexer
47
{
48
49
    /**
50
     * ID of the current page's Solr document.
51
     *
52
     * @var string
53
     */
54
    protected static $pageSolrDocumentId = '';
55
    /**
56
     * The Solr document generated for the current page.
57
     *
58
     * @var \Apache_Solr_Document
59
     */
60
    protected static $pageSolrDocument = null;
61
    /**
62
     * The mount point parameter used in the Frontend controller.
63
     *
64
     * @var string
65
     */
66
    protected $mountPointParameter;
67
    /**
68
     * Solr server connection.
69
     *
70
     * @var SolrService
71
     */
72
    protected $solrConnection = null;
73
    /**
74
     * Frontend page object (TSFE).
75
     *
76
     * @var TypoScriptFrontendController
77
     */
78
    protected $page = null;
79
    /**
80
     * Content extractor to extract content from TYPO3 pages
81
     *
82
     * @var Typo3PageContentExtractor
83
     */
84
    protected $contentExtractor = null;
85
    /**
86
     * URL to be indexed as the page's URL
87
     *
88
     * @var string
89
     */
90
    protected $pageUrl = '';
91
    /**
92
     * The page's access rootline
93
     *
94
     * @var Rootline
95
     */
96
    protected $pageAccessRootline = null;
97
    /**
98
     * Documents that have been sent to Solr
99
     *
100
     * @var array
101
     */
102
    protected $documentsSentToSolr = [];
103
104
    /**
105
     * @var TypoScriptConfiguration
106
     */
107
    protected $configuration;
108
109
    /**
110
     * @var Item
111
     */
112
    protected $indexQueueItem;
113
114
    /**
115
     * @var \ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager
116
     */
117
    protected $logger = null;
118
119
    /**
120
     * Constructor
121
     *
122
     * @param TypoScriptFrontendController $page The page to index
123
     */
124 48
    public function __construct(TypoScriptFrontendController $page)
125
    {
126 48
        $this->logger = GeneralUtility::makeInstance(SolrLogManager::class, __CLASS__);
127
128 48
        $this->page = $page;
129 48
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
130 48
        $this->configuration = Util::getSolrConfiguration();
131
132
        try {
133 48
            $this->initializeSolrConnection();
134 3
        } catch (\Exception $e) {
135 3
            $this->logger->log(
136 3
                SolrLogManager::ERROR,
137 3
                $e->getMessage() . ' Error code: ' . $e->getCode()
138
            );
139
140
            // TODO extract to a class "ExceptionLogger"
141 3
            if ($this->configuration->getLoggingExceptions()) {
142 3
                $this->logger->log(
143 3
                    SolrLogManager::ERROR,
144 3
                    'Exception while trying to index a page',
145
                    [
146 3
                        $e->__toString()
147
                    ]
148
                );
149
            }
150
        }
151
152 48
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
153 48
    }
154
155
    /**
156
     * @param Item $indexQueueItem
157
     */
158 8
    public function setIndexQueueItem($indexQueueItem)
159
    {
160 8
        $this->indexQueueItem = $indexQueueItem;
161 8
    }
162
163
    /**
164
     * Initializes the Solr server connection.
165
     *
166
     * @throws    \Exception when no Solr connection can be established.
167
     */
168 48
    protected function initializeSolrConnection()
169
    {
170 48
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
171
172
        // do not continue if no server is available
173 46
        if (!$solr->ping()) {
174 1
            throw new \Exception(
175 1
                'No Solr instance available while trying to index a page.',
176 1
                1234790825
177
            );
178
        }
179
180 45
        $this->solrConnection = $solr;
181 45
    }
182
183
    /**
184
     * Gets the current page's Solr document ID.
185
     *
186
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
187
     */
188
    public static function getPageSolrDocumentId()
189
    {
190
        return self::$pageSolrDocumentId;
191
    }
192
193
    /**
194
     * Gets the Solr document generated for the current page.
195
     *
196
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
197
     */
198 8
    public static function getPageSolrDocument()
199
    {
200 8
        return self::$pageSolrDocument;
201
    }
202
203
    /**
204
     * Allows to provide a Solr server connection other than the one
205
     * initialized by the constructor.
206
     *
207
     * @param SolrService $solrConnection Solr connection
208
     * @throws \Exception if the Solr server cannot be reached
209
     */
210 8
    public function setSolrConnection(SolrService $solrConnection)
211
    {
212 8
        if (!$solrConnection->ping()) {
213
            throw new \Exception(
214
                'Could not connect to Solr server.',
215
                1323946472
216
            );
217
        }
218
219 8
        $this->solrConnection = $solrConnection;
220 8
    }
221
222
    /**
223
     * Indexes a page.
224
     *
225
     * @return bool TRUE after successfully indexing the page, FALSE on error
226
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
227
     */
228 48
    public function indexPage()
229
    {
230 48
        $pageIndexed = false;
231 48
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
232
233 48
        if (is_null($this->solrConnection)) {
234
            // intended early return as it doesn't make sense to continue
235
            // and waste processing time if the solr server isn't available
236
            // anyways
237
            // FIXME use an exception
238 1
            return $pageIndexed;
239
        }
240
241 47
        $pageDocument = $this->getPageDocument();
242 47
        $pageDocument = $this->substitutePageDocument($pageDocument);
243
244 47
        $this->applyIndexPagePostProcessors($pageDocument);
245
246 47
        self::$pageSolrDocument = $pageDocument;
247 47
        $documents[] = $pageDocument;
248 47
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
249 47
        $this->processDocuments($documents);
250
251 47
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
252 47
        $this->documentsSentToSolr = $documents;
253
254 47
        return $pageIndexed;
255
    }
256
257
    /**
258
     * Applies the configured post processors (indexPagePostProcessPageDocument)
259
     *
260
     * @param \Apache_Solr_Document $pageDocument
261
     */
262 47
    protected function applyIndexPagePostProcessors($pageDocument)
263
    {
264 47
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
265 46
            return;
266
        }
267
268 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
269 1
            $postProcessor = GeneralUtility::getUserObj($classReference);
270 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
271
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
272
            }
273
274 1
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
275
        }
276 1
    }
277
278
    /**
279
     * Builds the Solr document for the current page.
280
     *
281
     * @return \Apache_Solr_Document A document representing the page
282
     */
283 47
    protected function getPageDocument()
284
    {
285 47
        $documentBuilder = GeneralUtility::makeInstance(Builder::class);
286 47
        $document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, $this->mountPointParameter);
287 47
        $idField = $document->getField('id');
288
289 47
        self::$pageSolrDocumentId = $idField['value'];
290
291 47
        return $document;
292
    }
293
294
295
    // Logging
296
    // TODO replace by a central logger
297
298
    /**
299
     * Gets the mount point parameter that is used in the Frontend controller.
300
     *
301
     * @return string
302
     */
303
    public function getMountPointParameter()
304
    {
305
        return $this->mountPointParameter;
306
    }
307
308
    // Misc
309
310
    /**
311
     * Sets the mount point parameter that is used in the Frontend controller.
312
     *
313
     * @param string $mountPointParameter
314
     */
315 8
    public function setMountPointParameter($mountPointParameter)
316
    {
317 8
        $this->mountPointParameter = (string)$mountPointParameter;
318 8
    }
319
320
    /**
321
     * Allows third party extensions to replace or modify the page document
322
     * created by this indexer.
323
     *
324
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
325
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
326
     */
327 47
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
328
    {
329 47
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
330 39
            return $pageDocument;
331
        }
332
333 8
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
334 8
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
335 8
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
336
337 8
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
338
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
339
                throw new \UnexpectedValueException($message, 1310491001);
340
            }
341
342 8
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
343 8
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
344
            }
345
346 8
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
347 8
            if (!$substituteDocument instanceof Apache_Solr_Document) {
348
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
349
                throw new \UnexpectedValueException($message, 1310490952);
350
            }
351 8
            $pageDocument = $substituteDocument;
352
        }
353
354 8
        return $pageDocument;
355
    }
356
357
    /**
358
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
359
     *
360
     * @return string
361
     */
362 8
    protected function getIndexConfigurationNameForCurrentPage()
363
    {
364 8
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
365
    }
366
367
    /**
368
     * Allows third party extensions to provide additional documents which
369
     * should be indexed for the current page.
370
     *
371
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
372
     * @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page.
373
     * @return array An array of additional \Apache_Solr_Document objects to index
374
     */
375 47
    protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments)
376
    {
377 47
        $documents = $existingDocuments;
378
379 47
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
380 46
            return $documents;
381
        }
382
383 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
384 1
            $additionalIndexer = GeneralUtility::getUserObj($classReference);
385
386 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
387
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
388
                throw new \UnexpectedValueException($message, 1310491024);
389
            }
390
391 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
392 1
            if (is_array($additionalDocuments)) {
393 1
                $documents = array_merge($documents, $additionalDocuments);
394
            }
395
        }
396
397 1
        return $documents;
398
    }
399
400
    /**
401
     * Sends the given documents to the field processing service which takes
402
     * care of manipulating fields as defined in the field's configuration.
403
     *
404
     * @param array $documents An array of documents to manipulate
405
     */
406 47
    protected function processDocuments(array $documents)
407
    {
408 47
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
409 47
        if (count($processingInstructions) > 0) {
410 47
            $service = GeneralUtility::makeInstance(Service::class);
411 47
            $service->processDocuments($documents, $processingInstructions);
412
        }
413 47
    }
414
415
    /**
416
     * Adds the collected documents to the Solr index.
417
     *
418
     * @param array $documents An array of \Apache_Solr_Document objects.
419
     * @return bool TRUE if documents were added successfully, FALSE otherwise
420
     */
421 47
    protected function addDocumentsToSolrIndex(array $documents)
422
    {
423 47
        $documentsAdded = false;
424
425 47
        if (!count($documents)) {
426
            return $documentsAdded;
427
        }
428
429
        try {
430 47
            $this->logger->log(
431 47
                SolrLogManager::INFO,
432 47
                'Adding ' . count($documents) . ' documents.',
433 47
                $documents
434
            );
435
436
            // chunk adds by 20
437 47
            $documentChunks = array_chunk($documents, 20);
438 47
            foreach ($documentChunks as $documentChunk) {
439 47
                $response = $this->solrConnection->addDocuments($documentChunk);
440
441 47
                if ($response->getHttpStatus() != 200) {
442
                    $transportException = new \Apache_Solr_HttpTransportException($response);
443
                    throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException);
444
                }
445
            }
446
447 47
            $documentsAdded = true;
448
        } catch (\Exception $e) {
449
            $this->logger->log(
450
                SolrLogManager::ERROR,
451
                $e->getMessage() . ' Error code: ' . $e->getCode()
452
            );
453
454
            if ($this->configuration->getLoggingExceptions()) {
455
                $this->logger->log(
456
                    SolrLogManager::ERROR,
457
                    'Exception while adding documents',
458
                    [
459
                        $e->__toString()
460
                    ]
461
                );
462
            }
463
        }
464
465 47
        return $documentsAdded;
466
    }
467
468
    /**
469
     * Gets the current page's URL.
470
     *
471
     * @return string URL of the current page.
472
     */
473
    public function getPageUrl()
474
    {
475
        return $this->pageUrl;
476
    }
477
478
    /**
479
     * Sets the URL to use for the page document.
480
     *
481
     * @param string $url The page's URL.
482
     */
483 8
    public function setPageUrl($url)
484
    {
485 8
        $this->pageUrl = $url;
486 8
    }
487
488
    /**
489
     * Gets the page's access rootline.
490
     *
491
     * @return Rootline The page's access rootline
492
     */
493
    public function getPageAccessRootline()
494
    {
495
        return $this->pageAccessRootline;
496
    }
497
498
    /**
499
     * Sets the page's access rootline.
500
     *
501
     * @param Rootline $accessRootline The page's access rootline
502
     */
503 21
    public function setPageAccessRootline(Rootline $accessRootline)
504
    {
505 21
        $this->pageAccessRootline = $accessRootline;
506 21
    }
507
508
    /**
509
     * Gets the documents that have been sent to Solr
510
     *
511
     * @return array An array of \Apache_Solr_Document objects
512
     */
513 8
    public function getDocumentsSentToSolr()
514
    {
515 8
        return $this->documentsSentToSolr;
516
    }
517
}
518