Completed
Push — master ( 9517e4...034cdd )
by Timo
44s
created

Typo3PageIndexer::__construct()   A

Complexity

Conditions 4
Paths 6

Size

Total Lines 21
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 7
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 21
rs 9.0534
c 0
b 0
f 0
ccs 7
cts 14
cp 0.5
cc 4
eloc 13
nc 6
nop 2
crap 6
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use Apache_Solr_Document;
28
use ApacheSolrForTypo3\Solr\Access\Rootline;
29
use ApacheSolrForTypo3\Solr\Domain\Variants\IdBuilder;
30
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
31
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
32
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
33
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
34
use TYPO3\CMS\Core\Utility\GeneralUtility;
35
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
36
37
/**
38
 * Page Indexer to index TYPO3 pages used by the Index Queue.
39
 *
40
 * @author Ingo Renner <[email protected]>
41
 * @author Daniel Poetzinger <[email protected]>
42
 * @author Timo Schmidt <[email protected]>
43
 */
44
class Typo3PageIndexer
45
{
46
47
    /**
48
     * ID of the current page's Solr document.
49
     *
50
     * @var string
51
     */
52
    protected static $pageSolrDocumentId = '';
53
    /**
54
     * The Solr document generated for the current page.
55
     *
56
     * @var \Apache_Solr_Document
57
     */
58
    protected static $pageSolrDocument = null;
59
    /**
60
     * The mount point parameter used in the Frontend controller.
61
     *
62
     * @var string
63
     */
64
    protected $mountPointParameter;
65
    /**
66
     * Solr server connection.
67
     *
68
     * @var SolrService
69
     */
70
    protected $solrConnection = null;
71
    /**
72
     * Frontend page object (TSFE).
73
     *
74
     * @var TypoScriptFrontendController
75
     */
76
    protected $page = null;
77
    /**
78
     * Content extractor to extract content from TYPO3 pages
79
     *
80
     * @var Typo3PageContentExtractor
81
     */
82
    protected $contentExtractor = null;
83
    /**
84
     * URL to be indexed as the page's URL
85
     *
86
     * @var string
87
     */
88
    protected $pageUrl = '';
89
    /**
90
     * The page's access rootline
91
     *
92
     * @var Rootline
93
     */
94
    protected $pageAccessRootline = null;
95
    /**
96
     * Documents that have been sent to Solr
97
     *
98
     * @var array
99
     */
100
    protected $documentsSentToSolr = [];
101
102
    /**
103
     * @var TypoScriptConfiguration
104
     */
105
    protected $configuration;
106
107
    /**
108
     * @var Item
109
     */
110
    protected $indexQueueItem;
111
112
    /**
113
     * @var IdBuilder
114
     */
115
    protected $variantIdBuilder;
116 41
117
    /**
118 41
     * Constructor
119 41
     *
120 41
     * @param TypoScriptFrontendController $page The page to index
121
     * @param IdBuilder $variantIdBuilder
122
     */
123 41
    public function __construct(TypoScriptFrontendController $page, IdBuilder $variantIdBuilder = null)
124
    {
125
        $this->page = $page;
126
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
127
        $this->configuration = Util::getSolrConfiguration();
128
129
        try {
130
            $this->initializeSolrConnection();
131
        } catch (\Exception $e) {
132
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 3);
133 41
134 41
            // TODO extract to a class "ExceptionLogger"
135 41
            if ($this->configuration->getLoggingExceptions()) {
136
                GeneralUtility::devLog('Exception while trying to index a page', 'solr', 3, [$e->__toString()]);
137
            }
138
        }
139
140 5
        $this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content);
141
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
142 5
        $this->variantIdBuilder = is_null($variantIdBuilder) ? GeneralUtility::makeInstance(IdBuilder::class) : $variantIdBuilder;
143 5
    }
144
145
    /**
146
     * @param Item $indexQueueItem
147
     */
148
    public function setIndexQueueItem($indexQueueItem)
149
    {
150
        $this->indexQueueItem = $indexQueueItem;
151 41
    }
152
153 41
154
    /**
155
     * Initializes the Solr server connection.
156 41
     *
157
     * @throws    \Exception when no Solr connection can be established.
158
     */
159
    protected function initializeSolrConnection()
160
    {
161
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
162
163 41
        // do not continue if no server is available
164 41
        if (!$solr->ping()) {
165
            throw new \Exception(
166
                'No Solr instance available while trying to index a page.',
167
                1234790825
168
            );
169
        }
170
171
        $this->solrConnection = $solr;
172
    }
173
174 41
    /**
175
     * Logs messages to devlog and TS log (admin panel)
176 41
     *
177 41
     * @param string $message Message to set
178
     * @param int $errorNum Error number
179
     * @param array $data Additional data to log
180 41
     * @return void
181
     */
182
    protected function log($message, $errorNum = 0, array $data = [])
183
    {
184
        if (is_object($GLOBALS['TT'])) {
185
            $GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum);
186
        }
187
188
        if ($this->configuration->getLoggingIndexing()) {
189
            $logData = [];
190 41
            if (!empty($data)) {
191
                foreach ($data as $value) {
192
                    $logData[] = (array)$value;
193
                }
194
            }
195
196
            GeneralUtility::devLog($message, 'solr', $errorNum, $logData);
197
        }
198
    }
199
200
    /**
201
     * Gets the current page's Solr document ID.
202
     *
203
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
204
     */
205
    public static function getPageSolrDocumentId()
206
    {
207 5
        return self::$pageSolrDocumentId;
208
    }
209 5
210
    /**
211
     * Gets the Solr document generated for the current page.
212
     *
213
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
214
     */
215
    public static function getPageSolrDocument()
216
    {
217
        return self::$pageSolrDocument;
218
    }
219 5
220
    /**
221 5
     * Allows to provide a Solr server connection other than the one
222
     * initialized by the constructor.
223
     *
224
     * @param SolrService $solrConnection Solr connection
225
     * @throws \Exception if the Solr server cannot be reached
226
     */
227
    public function setSolrConnection(SolrService $solrConnection)
228 5
    {
229 5
        if (!$solrConnection->ping()) {
230
            throw new \Exception(
231
                'Could not connect to Solr server.',
232
                1323946472
233
            );
234
        }
235
236
        $this->solrConnection = $solrConnection;
237 41
    }
238
239 41
    /**
240 41
     * Indexes a page.
241
     *
242 41
     * @return bool TRUE after successfully indexing the page, FALSE on error
243
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
244
     */
245
    public function indexPage()
246
    {
247
        $pageIndexed = false;
248
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
249
250 41
        if (is_null($this->solrConnection)) {
251 41
            // intended early return as it doesn't make sense to continue
252
            // and waste processing time if the solr server isn't available
253 41
            // anyways
254
            // FIXME use an exception
255 41
            return $pageIndexed;
256 41
        }
257 41
258 41
        $pageDocument = $this->getPageDocument();
259
        $pageDocument = $this->substitutePageDocument($pageDocument);
260 41
261 41
        $this->applyIndexPagePostProcessors($pageDocument);
262
263 41
        self::$pageSolrDocument = $pageDocument;
264
        $documents[] = $pageDocument;
265
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
266
        $this->processDocuments($documents);
267
268
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
269
        $this->documentsSentToSolr = $documents;
270
271 41
        return $pageIndexed;
272
    }
273 41
274 40
    /**
275
     * Applies the configured post processors (indexPagePostProcessPageDocument)
276
     *
277 1
     * @param \Apache_Solr_Document $pageDocument
278 1
     */
279 1
    protected function applyIndexPagePostProcessors($pageDocument)
280
    {
281
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
282
            return;
283 1
        }
284
285 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
286
            $postProcessor = GeneralUtility::getUserObj($classReference);
287
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
288
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
289
            }
290
291
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
292 41
        }
293
    }
294 41
295
    /**
296 41
     * Builds the Solr document for the current page.
297 41
     *
298
     * @return \Apache_Solr_Document A document representing the page
299 41
     */
300 41
    protected function getPageDocument()
301 41
    {
302 41
        $document = GeneralUtility::makeInstance(Apache_Solr_Document::class);
303 41
        /* @var $document \Apache_Solr_Document */
304 41
        $site = Site::getSiteByPageId($this->page->id);
305
        $pageRecord = $this->page->page;
306 41
307 41
        self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId(
308 41
            $this->page->id,
309 41
            $this->page->type,
310 41
            $this->page->sys_language_uid,
311
            $this->getDocumentIdGroups(),
312
            $this->getMountPointParameter()
313 41
        );
314 41
        $document->setField('id', $documentId);
315
        $document->setField('site', $site->getDomain());
316
        $document->setField('siteHash', $site->getSiteHash());
317 41
        $document->setField('appKey', 'EXT:solr');
318
        $document->setField('type', 'pages');
319 41
320 41
        // system fields
321 41
        $document->setField('uid', $this->page->id);
322
        $document->setField('pid', $pageRecord['pid']);
323 41
324 41
        // variantId
325
        $variantId = $this->variantIdBuilder->buildFromTypeAndUid('pages', $this->page->id);
326
        $document->setField('variantId', $variantId);
327 41
328 41
        $document->setField('typeNum', $this->page->type);
329
        $document->setField('created', $pageRecord['crdate']);
330
        $document->setField('changed', $pageRecord['SYS_LASTCHANGED']);
331 41
332 41
        $rootline = $this->getRootLineFieldValue();
333 41
        $document->setField('rootline', $rootline);
334 41
335 41
        // access
336 41
        $this->addAccessField($document);
337 41
        $this->addEndtimeField($document, $pageRecord);
338 41
339
        // content
340 41
        $document->setField('title', $this->contentExtractor->getPageTitle());
341 41
        $document->setField('subTitle', $pageRecord['subtitle']);
342
        $document->setField('navTitle', $pageRecord['nav_title']);
343 41
        $document->setField('author', $pageRecord['author']);
344
        $document->setField('description', $pageRecord['description']);
345
        $document->setField('abstract', $pageRecord['abstract']);
346
        $document->setField('content', $this->contentExtractor->getIndexableContent());
347
        $document->setField('url', $this->pageUrl);
348
349
        $this->addKeywordsField($document, $pageRecord);
350
        $this->addTagContentFields($document);
351 41
352
        return $document;
353 41
    }
354 41
355 8
    /**
356
     * Adds the access field to the document if needed.
357 41
     *
358
     * @param \Apache_Solr_Document $document
359
     */
360
    protected function addAccessField(\Apache_Solr_Document $document)
361
    {
362
        $access = (string)$this->pageAccessRootline;
363 41
        if (trim($access) !== '') {
364
            $document->setField('access', $access);
365 41
        }
366
    }
367
368 41
    /**
369
     * @param $document
370
     * @param $pageRecord
371
     */
372
    protected function addEndtimeField(\Apache_Solr_Document  $document, $pageRecord)
373
    {
374
        if ($this->page->page['endtime']) {
375
            $document->setField('endtime', $pageRecord['endtime']);
376 41
        }
377
    }
378 41
379 41
    /**
380
     * Adds keywords, multi valued.
381
     *
382 41
     * @param \Apache_Solr_Document $document
383
     * @param array $pageRecord
384
     */
385
    protected function addKeywordsField(\Apache_Solr_Document $document, $pageRecord)
386
    {
387
        $keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true));
388
        foreach ($keywords as $keyword) {
389 41
            $document->addField('keywords', $keyword);
390
        }
391 41
    }
392 41
393
    /**
394
     * Add content from several tags like headers, anchors, ...
395 41
     *
396
     * @param \Apache_Solr_Document $document
397
     */
398
    protected function addTagContentFields(\Apache_Solr_Document  $document)
399
    {
400
        $tagContent = $this->contentExtractor->getTagContent();
401
        foreach ($tagContent as $fieldName => $fieldValue) {
402 41
            $document->setField($fieldName, $fieldValue);
403
        }
404 41
    }
405 41
406 41
    /**
407 36
     * Builds the content for the rootline field.
408
     *
409 41
     * @return string
410
     */
411
    protected function getRootLineFieldValue()
412
    {
413
        $rootline = $this->page->id;
414
        $mountPointParameter = $this->getMountPointParameter();
415
        if ($mountPointParameter !== '') {
416
            $rootline .= ',' . $mountPointParameter;
417
        }
418 41
        return $rootline;
419
    }
420 41
421 41
    /**
422
     * Gets a comma separated list of frontend user groups to use for the
423 41
     * document ID.
424 35
     *
425
     * @return string A comma separated list of frontend user groups.
426
     */
427 41
    protected function getDocumentIdGroups()
428
    {
429 41
        $groups = $this->pageAccessRootline->getGroups();
430
        $groups = Rootline::cleanGroupArray($groups);
431
432
        if (empty($groups)) {
433
            $groups[] = 0;
434
        }
435
436
        $groups = implode(',', $groups);
437
438
        return $groups;
439
    }
440 41
441
    // Logging
442 41
    // TODO replace by a central logger
443
444
    /**
445
     * Gets the mount point parameter that is used in the Frontend controller.
446
     *
447
     * @return string
448
     */
449
    public function getMountPointParameter()
450
    {
451
        return $this->mountPointParameter;
452 5
    }
453
454 5
    // Misc
455 5
456
    /**
457
     * Sets the mount point parameter that is used in the Frontend controller.
458
     *
459
     * @param string $mountPointParameter
460
     */
461
    public function setMountPointParameter($mountPointParameter)
462
    {
463
        $this->mountPointParameter = (string)$mountPointParameter;
464 41
    }
465
466 41
    /**
467 36
     * Allows third party extensions to replace or modify the page document
468
     * created by this indexer.
469
     *
470 5
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
471 5
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
472 5
     */
473
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
474 5
    {
475
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
476
            return $pageDocument;
477
        }
478
479 5
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
480 5
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
481
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
482
483 5
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
484 5
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
485
                throw new \UnexpectedValueException($message, 1310491001);
486
            }
487
488 5
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
489
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
490
            }
491 5
492
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
493
            if (!$substituteDocument instanceof \Apache_Solr_Document) {
0 ignored issues
show
Bug introduced by
The class Apache_Solr_Document does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
494
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
495
                throw new \UnexpectedValueException($message, 1310490952);
496
            }
497
            $pageDocument = $substituteDocument;
498
        }
499 5
500
        return $pageDocument;
501 5
    }
502
503
    /**
504
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
505
     *
506
     * @return string
507
     */
508
    protected function getIndexConfigurationNameForCurrentPage()
509
    {
510
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
511
    }
512 41
513
    /**
514 41
     * Allows third party extensions to provide additional documents which
515
     * should be indexed for the current page.
516 41
     *
517 40
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
518
     * @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page.
519
     * @return array An array of additional \Apache_Solr_Document objects to index
520 1
     */
521 1
    protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments)
522
    {
523 1
        $documents = $existingDocuments;
524
525
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
526
            return $documents;
527
        }
528 1
529 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
530 1
            $additionalIndexer = GeneralUtility::getUserObj($classReference);
531
532
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
533
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
534 1
                throw new \UnexpectedValueException($message, 1310491024);
535
            }
536
537
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
538
            if (is_array($additionalDocuments)) {
539
                $documents = array_merge($documents, $additionalDocuments);
540
            }
541
        }
542
543 41
        return $documents;
544
    }
545 41
546 41
    /**
547 41
     * Sends the given documents to the field processing service which takes
548 41
     * care of manipulating fields as defined in the field's configuration.
549
     *
550 41
     * @param array $documents An array of documents to manipulate
551
     */
552
    protected function processDocuments(array $documents)
553
    {
554
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
555
        if (count($processingInstructions) > 0) {
556
            $service = GeneralUtility::makeInstance(Service::class);
557
            $service->processDocuments($documents, $processingInstructions);
558 41
        }
559
    }
560 41
561
    /**
562 41
     * Adds the collected documents to the Solr index.
563
     *
564
     * @param array $documents An array of \Apache_Solr_Document objects.
565
     * @return bool TRUE if documents were added successfully, FALSE otherwise
566
     */
567 41
    protected function addDocumentsToSolrIndex(array $documents)
568
    {
569
        $documentsAdded = false;
570 41
571 41
        if (!count($documents)) {
572 41
            return $documentsAdded;
573
        }
574 41
575
        try {
576 41
            $this->log('Adding ' . count($documents) . ' documents.', 0, $documents);
577
578
            // chunk adds by 20
579
            $documentChunks = array_chunk($documents, 20);
580 41
            foreach ($documentChunks as $documentChunk) {
581
                $response = $this->solrConnection->addDocuments($documentChunk);
582
583
                if ($response->getHttpStatus() != 200) {
584
                    $transportException = new \Apache_Solr_HttpTransportException($response);
585
                    throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException);
586
                }
587
            }
588
589 41
            $documentsAdded = true;
590
        } catch (\Exception $e) {
591
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2);
592
593
            if ($this->configuration->getLoggingExceptions()) {
594
                GeneralUtility::devLog('Exception while adding documents', 'solr', 3, [$e->__toString()]);
595
            }
596
        }
597
598
        return $documentsAdded;
599
    }
600
601
    /**
602
     * Gets the current page's URL.
603
     *
604
     * @return string URL of the current page.
605
     */
606
    public function getPageUrl()
607 5
    {
608
        return $this->pageUrl;
609 5
    }
610 5
611
    /**
612
     * Sets the URL to use for the page document.
613
     *
614
     * @param string $url The page's URL.
615
     */
616
    public function setPageUrl($url)
617
    {
618
        $this->pageUrl = $url;
619
    }
620
621
    /**
622
     * Gets the page's access rootline.
623
     *
624
     * @return Rootline The page's access rootline
625
     */
626
    public function getPageAccessRootline()
627 40
    {
628
        return $this->pageAccessRootline;
629 40
    }
630 40
631
    /**
632
     * Sets the page's access rootline.
633
     *
634
     * @param Rootline $accessRootline The page's access rootline
635
     */
636
    public function setPageAccessRootline(Rootline $accessRootline)
637 5
    {
638
        $this->pageAccessRootline = $accessRootline;
639 5
    }
640
641
    /**
642
     * Gets the documents that have been sent to Solr
643
     *
644
     * @return array An array of \Apache_Solr_Document objects
645
     */
646
    public function getDocumentsSentToSolr()
647
    {
648
        return $this->documentsSentToSolr;
649
    }
650
}
651