Typo3PageIndexer::setMountPointParameter()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 1

Importance

Changes 0
Metric Value
eloc 1
c 0
b 0
f 0
dl 0
loc 3
ccs 1
cts 1
cp 1
rs 10
cc 1
nc 1
nop 1
crap 1
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 3 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\Domain\Search\ApacheSolrDocument\Builder;
29
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
30
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
31
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
32
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
33
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
34
use ApacheSolrForTypo3\Solr\System\Solr\Document\Document;
35
use ApacheSolrForTypo3\Solr\System\Solr\SolrConnection;
36
use TYPO3\CMS\Core\Utility\GeneralUtility;
37
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
38
39
/**
40
 * Page Indexer to index TYPO3 pages used by the Index Queue.
41
 *
42
 * @author Ingo Renner <[email protected]>
43
 * @author Daniel Poetzinger <[email protected]>
44
 * @author Timo Schmidt <[email protected]>
45
 */
46
class Typo3PageIndexer
47
{
48
49
    /**
50
     * ID of the current page's Solr document.
51
     *
52
     * @var string
53
     */
54
    protected static $pageSolrDocumentId = '';
55
    /**
56
     * The Solr document generated for the current page.
57
     *
58
     * @var Document
59
     */
60
    protected static $pageSolrDocument = null;
61
    /**
62
     * The mount point parameter used in the Frontend controller.
63
     *
64
     * @var string
65
     */
66
    protected $mountPointParameter;
67
    /**
68
     * Solr server connection.
69
     *
70
     * @var SolrConnection
71
     */
72
    protected $solrConnection = null;
73
    /**
74
     * Frontend page object (TSFE).
75
     *
76
     * @var TypoScriptFrontendController
77
     */
78
    protected $page = null;
79
    /**
80
     * Content extractor to extract content from TYPO3 pages
81
     *
82
     * @var Typo3PageContentExtractor
83
     */
84
    protected $contentExtractor = null;
85
    /**
86
     * URL to be indexed as the page's URL
87
     *
88
     * @var string
89
     */
90
    protected $pageUrl = '';
91
    /**
92
     * The page's access rootline
93
     *
94
     * @var Rootline
95
     */
96
    protected $pageAccessRootline = null;
97
    /**
98
     * Documents that have been sent to Solr
99
     *
100
     * @var array
101
     */
102
    protected $documentsSentToSolr = [];
103
104
    /**
105
     * @var TypoScriptConfiguration
106
     */
107
    protected $configuration;
108
109
    /**
110
     * @var Item
111
     */
112
    protected $indexQueueItem;
113
114
    /**
115
     * @var \ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager
116
     */
117
    protected $logger = null;
118
119
    /**
120
     * Constructor
121
     *
122
     * @param TypoScriptFrontendController $page The page to index
123
     */
124
    public function __construct(TypoScriptFrontendController $page)
125 66
    {
126
        $this->logger = GeneralUtility::makeInstance(SolrLogManager::class, /** @scrutinizer ignore-type */ __CLASS__);
127 66
128
        $this->page = $page;
129 66
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
130 66
        $this->configuration = Util::getSolrConfiguration();
131 66
132
        try {
133
            $this->initializeSolrConnection();
134 66
        } catch (\Exception $e) {
135 2
            $this->logger->log(
136 2
                SolrLogManager::ERROR,
137 2
                $e->getMessage() . ' Error code: ' . $e->getCode()
138 2
            );
139
140
            // TODO extract to a class "ExceptionLogger"
141
            if ($this->configuration->getLoggingExceptions()) {
142 2
                $this->logger->log(
143 2
                    SolrLogManager::ERROR,
144 2
                    'Exception while trying to index a page',
145 2
                    [
146
                        $e->__toString()
147 2
                    ]
148
                );
149
            }
150
        }
151
152
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, /** @scrutinizer ignore-type */ '');
153 66
    }
154 66
155
    /**
156
     * @param Item $indexQueueItem
157
     */
158
    public function setIndexQueueItem($indexQueueItem)
159 10
    {
160
        $this->indexQueueItem = $indexQueueItem;
161 10
    }
162 10
163
    /**
164
     * Initializes the Solr server connection.
165
     *
166
     * @throws    \Exception when no Solr connection can be established.
167
     */
168
    protected function initializeSolrConnection()
169 66
    {
170
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, Util::getLanguageUid());
171 66
172
        // do not continue if no server is available
173
        if (!$solr->getWriteService()->ping()) {
174 64
            throw new \Exception(
175
                'No Solr instance available while trying to index a page.',
176
                1234790825
177
            );
178
        }
179
180
        $this->solrConnection = $solr;
181 64
    }
182 64
183
    /**
184
     * Gets the current page's Solr document ID.
185
     *
186
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
187
     */
188
    public static function getPageSolrDocumentId()
189
    {
190
        return self::$pageSolrDocumentId;
191
    }
192
193
    /**
194
     * Gets the Solr document generated for the current page.
195
     *
196
     * @return Document|NULL The page's Solr document or NULL if it has not been generated yet.
197
     */
198
    public static function getPageSolrDocument()
199 10
    {
200
        return self::$pageSolrDocument;
201 10
    }
202
203
    /**
204
     * Allows to provide a Solr server connection other than the one
205
     * initialized by the constructor.
206
     *
207
     * @param SolrConnection $solrConnection Solr connection
208
     * @throws \Exception if the Solr server cannot be reached
209
     */
210
    public function setSolrConnection(SolrConnection $solrConnection)
211 10
    {
212
        if (!$solrConnection->getWriteService()->ping()) {
213 10
            throw new \Exception(
214
                'Could not connect to Solr server.',
215
                1323946472
216
            );
217
        }
218
219
        $this->solrConnection = $solrConnection;
220 10
    }
221 10
222
    /**
223
     * Indexes a page.
224
     *
225
     * @return bool TRUE after successfully indexing the page, FALSE on error
226
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
227
     */
228
    public function indexPage()
229 66
    {
230
        $pageIndexed = false;
231 66
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
232 66
233
        if (is_null($this->solrConnection)) {
234 66
            // intended early return as it doesn't make sense to continue
235
            // and waste processing time if the solr server isn't available
236
            // anyways
237
            // FIXME use an exception
238
            return $pageIndexed;
239
        }
240
241
        $pageDocument = $this->getPageDocument();
242 66
        $pageDocument = $this->substitutePageDocument($pageDocument);
243 66
244
        $this->applyIndexPagePostProcessors($pageDocument);
245 66
246
        self::$pageSolrDocument = $pageDocument;
247 66
        $documents[] = $pageDocument;
248 66
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
249 66
        $this->processDocuments($documents);
250 66
251
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
252 66
        $this->documentsSentToSolr = $documents;
253 66
254
        return $pageIndexed;
255 66
    }
256
257
    /**
258
     * Applies the configured post processors (indexPagePostProcessPageDocument)
259
     *
260
     * @param Document $pageDocument
261
     */
262
    protected function applyIndexPagePostProcessors($pageDocument)
263 66
    {
264
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
265 66
            return;
266 65
        }
267
268
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
269 1
            $postProcessor = GeneralUtility::makeInstance($classReference);
270 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
271 1
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
272
            }
273
274
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
275 1
        }
276
    }
277 1
278
    /**
279
     * Builds the Solr document for the current page.
280
     *
281
     * @return Document A document representing the page
282
     */
283
    protected function getPageDocument()
284 66
    {
285
        $documentBuilder = GeneralUtility::makeInstance(Builder::class);
286 66
        $document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, (string)$this->mountPointParameter);
287 66
288 66
        self::$pageSolrDocumentId = $document['id'];
289
290 66
        return $document;
291
    }
292 66
293
294
    // Logging
295
    // TODO replace by a central logger
296
297
    /**
298
     * Gets the mount point parameter that is used in the Frontend controller.
299
     *
300
     * @return string
301
     */
302
    public function getMountPointParameter()
303
    {
304
        return $this->mountPointParameter;
305
    }
306
307
    // Misc
308
309
    /**
310
     * Sets the mount point parameter that is used in the Frontend controller.
311
     *
312
     * @param string $mountPointParameter
313
     */
314
    public function setMountPointParameter($mountPointParameter)
315
    {
316 10
        $this->mountPointParameter = (string)$mountPointParameter;
317
    }
318 10
319 10
    /**
320
     * Allows third party extensions to replace or modify the page document
321
     * created by this indexer.
322
     *
323
     * @param Document $pageDocument The page document created by this indexer.
324
     * @return Document An Apache Solr document representing the currently indexed page
325
     */
326
    protected function substitutePageDocument(Document $pageDocument)
327
    {
328 66
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
329
            return $pageDocument;
330 66
        }
331 20
332
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
333
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
334 46
            $substituteIndexer = GeneralUtility::makeInstance($classReference);
335 46
336 46
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
337
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
338 46
                throw new \UnexpectedValueException($message, 1310491001);
339
            }
340
341
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
342
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
343 46
            }
344 46
345
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
346
            if (!$substituteDocument instanceof Document) {
347 46
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Document object.';
348 46
                throw new \UnexpectedValueException($message, 1310490952);
349
            }
350
            $pageDocument = $substituteDocument;
351
        }
352 46
353
        return $pageDocument;
354
    }
355 46
356
    /**
357
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
358
     *
359
     * @return string
360
     */
361
    protected function getIndexConfigurationNameForCurrentPage()
362
    {
363 46
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
364
    }
365 46
366
    /**
367
     * Allows third party extensions to provide additional documents which
368
     * should be indexed for the current page.
369
     *
370
     * @param Document $pageDocument The main document representing this page.
371
     * @param Document[] $existingDocuments An array of documents already created for this page.
372
     * @return array An array of additional Document objects to index
373
     */
374
    protected function getAdditionalDocuments(Document $pageDocument, array $existingDocuments)
375
    {
376 66
        $documents = $existingDocuments;
377
378 66
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
379
            return $documents;
380 66
        }
381 65
382
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
383
            $additionalIndexer = GeneralUtility::makeInstance($classReference);
384 1
385 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
386
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
387 1
                throw new \UnexpectedValueException($message, 1310491024);
388
            }
389
390
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
391
            if (is_array($additionalDocuments)) {
392 1
                $documents = array_merge($documents, $additionalDocuments);
393 1
            }
394 1
        }
395
396
        return $documents;
397
    }
398 1
399
    /**
400
     * Sends the given documents to the field processing service which takes
401
     * care of manipulating fields as defined in the field's configuration.
402
     *
403
     * @param array $documents An array of documents to manipulate
404
     */
405
    protected function processDocuments(array $documents)
406
    {
407 66
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
408
        if (count($processingInstructions) > 0) {
409 66
            $service = GeneralUtility::makeInstance(Service::class);
410 66
            $service->processDocuments($documents, $processingInstructions);
411 66
        }
412 66
    }
413
414 66
    /**
415
     * Adds the collected documents to the Solr index.
416
     *
417
     * @param array $documents An array of Document objects.
418
     * @return bool TRUE if documents were added successfully, FALSE otherwise
419
     */
420
    protected function addDocumentsToSolrIndex(array $documents)
421
    {
422 66
        $documentsAdded = false;
423
424 66
        if (!count($documents)) {
425
            return $documentsAdded;
426 66
        }
427
428
        try {
429
            $this->logger->log(SolrLogManager::INFO, 'Adding ' . count($documents) . ' documents.', $documents);
430
431 66
            // chunk adds by 20
432 66
            $documentChunks = array_chunk($documents, 20);
433 66
            foreach ($documentChunks as $documentChunk) {
434 66
                $response = $this->solrConnection->getWriteService()->addDocuments($documentChunk);
435
                if ($response->getHttpStatus() != 200) {
436
                    throw new \RuntimeException('Solr Request failed.', 1331834983);
437
                }
438 66
            }
439 66
440 66
            $documentsAdded = true;
441
        } catch (\Exception $e) {
442 66
            $this->logger->log(SolrLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode());
443
444 66
            if ($this->configuration->getLoggingExceptions()) {
445
                $this->logger->log(SolrLogManager::ERROR, 'Exception while adding documents', [$e->__toString()]);
446
            }
447
        }
448 66
449
        return $documentsAdded;
450
    }
451
452
    /**
453
     * Gets the current page's URL.
454
     *
455
     * @return string URL of the current page.
456
     */
457
    public function getPageUrl()
458
    {
459
        return $this->pageUrl;
460
    }
461
462
    /**
463
     * Sets the URL to use for the page document.
464
     *
465
     * @param string $url The page's URL.
466 66
     */
467
    public function setPageUrl($url)
468
    {
469
        $this->pageUrl = $url;
470
    }
471
472
    /**
473
     * Gets the page's access rootline.
474
     *
475
     * @return Rootline The page's access rootline
476
     */
477
    public function getPageAccessRootline()
478
    {
479
        return $this->pageAccessRootline;
480
    }
481
482
    /**
483
     * Sets the page's access rootline.
484 10
     *
485
     * @param Rootline $accessRootline The page's access rootline
486 10
     */
487 10
    public function setPageAccessRootline(Rootline $accessRootline)
488
    {
489
        $this->pageAccessRootline = $accessRootline;
490
    }
491
492
    /**
493
     * Gets the documents that have been sent to Solr
494
     *
495
     * @return array An array of Document objects
496
     */
497
    public function getDocumentsSentToSolr()
498
    {
499
        return $this->documentsSentToSolr;
500
    }
501
}
502