Completed
Push — master ( 722d75...afea8e )
by Timo
37:55 queued 16:30
created

Typo3PageIndexer::addAccessField()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2.0116

Importance

Changes 0
Metric Value
dl 0
loc 7
ccs 6
cts 7
cp 0.8571
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 4
nc 2
nop 1
crap 2.0116
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use Apache_Solr_Document;
28
use ApacheSolrForTypo3\Solr\Access\Rootline;
29
use ApacheSolrForTypo3\Solr\Domain\Variants\IdBuilder;
30
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
31
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
32
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
33
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
34
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
35
use TYPO3\CMS\Core\Utility\GeneralUtility;
36
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
37
38
/**
39
 * Page Indexer to index TYPO3 pages used by the Index Queue.
40
 *
41
 * @author Ingo Renner <[email protected]>
42
 * @author Daniel Poetzinger <[email protected]>
43
 * @author Timo Schmidt <[email protected]>
44
 */
45
class Typo3PageIndexer
46
{
47
48
    /**
49
     * ID of the current page's Solr document.
50
     *
51
     * @var string
52
     */
53
    protected static $pageSolrDocumentId = '';
54
    /**
55
     * The Solr document generated for the current page.
56
     *
57
     * @var \Apache_Solr_Document
58
     */
59
    protected static $pageSolrDocument = null;
60
    /**
61
     * The mount point parameter used in the Frontend controller.
62
     *
63
     * @var string
64
     */
65
    protected $mountPointParameter;
66
    /**
67
     * Solr server connection.
68
     *
69
     * @var SolrService
70
     */
71
    protected $solrConnection = null;
72
    /**
73
     * Frontend page object (TSFE).
74
     *
75
     * @var TypoScriptFrontendController
76
     */
77
    protected $page = null;
78
    /**
79
     * Content extractor to extract content from TYPO3 pages
80
     *
81
     * @var Typo3PageContentExtractor
82
     */
83
    protected $contentExtractor = null;
84
    /**
85
     * URL to be indexed as the page's URL
86
     *
87
     * @var string
88
     */
89
    protected $pageUrl = '';
90
    /**
91
     * The page's access rootline
92
     *
93
     * @var Rootline
94
     */
95
    protected $pageAccessRootline = null;
96
    /**
97
     * Documents that have been sent to Solr
98
     *
99
     * @var array
100
     */
101
    protected $documentsSentToSolr = [];
102
103
    /**
104
     * @var TypoScriptConfiguration
105
     */
106
    protected $configuration;
107
108
    /**
109
     * @var Item
110
     */
111
    protected $indexQueueItem;
112
113
    /**
114
     * @var IdBuilder
115
     */
116
    protected $variantIdBuilder;
117
118
    /**
119
     * @var \ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager
120
     */
121
    protected $logger = null;
122
123
    /**
124
     * Constructor
125
     *
126
     * @param TypoScriptFrontendController $page The page to index
127
     * @param IdBuilder $variantIdBuilder
128
     */
129 42
    public function __construct(TypoScriptFrontendController $page, IdBuilder $variantIdBuilder = null)
130
    {
131 42
        $this->logger = GeneralUtility::makeInstance(SolrLogManager::class, __CLASS__);
132
133 42
        $this->page = $page;
134 42
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
135 42
        $this->configuration = Util::getSolrConfiguration();
136
137
        try {
138 42
            $this->initializeSolrConnection();
139 42
        } catch (\Exception $e) {
140
            $this->logger->log(
141
                SolrLogManager::ERROR,
142
                $e->getMessage() . ' Error code: ' . $e->getCode()
143
            );
144
145
            // TODO extract to a class "ExceptionLogger"
146
            if ($this->configuration->getLoggingExceptions()) {
147
                $this->logger->log(
148
                    SolrLogManager::ERROR,
149
                    'Exception while trying to index a page',
150
                    [
151
                        $e->__toString()
152
                    ]
153
                );
154
            }
155
        }
156
157 42
        $this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content);
158 42
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
159 42
        $this->variantIdBuilder = is_null($variantIdBuilder) ? GeneralUtility::makeInstance(IdBuilder::class) : $variantIdBuilder;
160 42
    }
161
162
    /**
163
     * @param Item $indexQueueItem
164
     */
165 6
    public function setIndexQueueItem($indexQueueItem)
166
    {
167 6
        $this->indexQueueItem = $indexQueueItem;
168 6
    }
169
170
171
    /**
172
     * Initializes the Solr server connection.
173
     *
174
     * @throws    \Exception when no Solr connection can be established.
175
     */
176 42
    protected function initializeSolrConnection()
177
    {
178 42
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
179
180
        // do not continue if no server is available
181 42
        if (!$solr->ping()) {
182
            throw new \Exception(
183
                'No Solr instance available while trying to index a page.',
184 9
                1234790825
185
            );
186
        }
187
188 42
        $this->solrConnection = $solr;
189 42
    }
190
191
    /**
192
     * Gets the current page's Solr document ID.
193
     *
194
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
195
     */
196
    public static function getPageSolrDocumentId()
197
    {
198
        return self::$pageSolrDocumentId;
199
    }
200
201
    /**
202
     * Gets the Solr document generated for the current page.
203
     *
204
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
205
     */
206 6
    public static function getPageSolrDocument()
207
    {
208 6
        return self::$pageSolrDocument;
209
    }
210
211
    /**
212
     * Allows to provide a Solr server connection other than the one
213
     * initialized by the constructor.
214
     *
215
     * @param SolrService $solrConnection Solr connection
216
     * @throws \Exception if the Solr server cannot be reached
217
     */
218 6
    public function setSolrConnection(SolrService $solrConnection)
219
    {
220 6
        if (!$solrConnection->ping()) {
221
            throw new \Exception(
222
                'Could not connect to Solr server.',
223
                1323946472
224
            );
225
        }
226
227 6
        $this->solrConnection = $solrConnection;
228 6
    }
229
230
    /**
231
     * Indexes a page.
232
     *
233
     * @return bool TRUE after successfully indexing the page, FALSE on error
234
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
235
     */
236 42
    public function indexPage()
237
    {
238 42
        $pageIndexed = false;
239 42
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
240
241 42
        if (is_null($this->solrConnection)) {
242
            // intended early return as it doesn't make sense to continue
243
            // and waste processing time if the solr server isn't available
244
            // anyways
245
            // FIXME use an exception
246
            return $pageIndexed;
247
        }
248
249 42
        $pageDocument = $this->getPageDocument();
250 42
        $pageDocument = $this->substitutePageDocument($pageDocument);
251
252 42
        $this->applyIndexPagePostProcessors($pageDocument);
253
254 42
        self::$pageSolrDocument = $pageDocument;
255 42
        $documents[] = $pageDocument;
256 42
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
257 42
        $this->processDocuments($documents);
258
259 42
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
260 42
        $this->documentsSentToSolr = $documents;
261
262 42
        return $pageIndexed;
263
    }
264
265
    /**
266
     * Applies the configured post processors (indexPagePostProcessPageDocument)
267
     *
268
     * @param \Apache_Solr_Document $pageDocument
269
     */
270 42
    protected function applyIndexPagePostProcessors($pageDocument)
271
    {
272 42
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
273 41
            return;
274
        }
275
276 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
277 1
            $postProcessor = GeneralUtility::getUserObj($classReference);
278 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
279
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
280
            }
281
282 1
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
283 1
        }
284 1
    }
285
286
    /**
287
     * Builds the Solr document for the current page.
288
     *
289
     * @return \Apache_Solr_Document A document representing the page
290
     */
291 42
    protected function getPageDocument()
292
    {
293 42
        $document = GeneralUtility::makeInstance(Apache_Solr_Document::class);
294
        /* @var $document \Apache_Solr_Document */
295 42
        $site = Site::getSiteByPageId($this->page->id);
296 42
        $pageRecord = $this->page->page;
297
298 42
        self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId(
299 42
            $this->page->id,
300 42
            $this->page->type,
301 42
            $this->page->sys_language_uid,
302 42
            $this->getDocumentIdGroups(),
303 42
            $this->getMountPointParameter()
304 42
        );
305 42
        $document->setField('id', $documentId);
306 42
        $document->setField('site', $site->getDomain());
307 42
        $document->setField('siteHash', $site->getSiteHash());
308 42
        $document->setField('appKey', 'EXT:solr');
309 42
        $document->setField('type', 'pages');
310
311
        // system fields
312 42
        $document->setField('uid', $this->page->id);
313 42
        $document->setField('pid', $pageRecord['pid']);
314
315
        // variantId
316 42
        $variantId = $this->variantIdBuilder->buildFromTypeAndUid('pages', $this->page->id);
317 42
        $document->setField('variantId', $variantId);
318
319 42
        $document->setField('typeNum', $this->page->type);
320 42
        $document->setField('created', $pageRecord['crdate']);
321 42
        $document->setField('changed', $pageRecord['SYS_LASTCHANGED']);
322
323 42
        $rootline = $this->getRootLineFieldValue();
324 42
        $document->setField('rootline', $rootline);
325
326
        // access
327 42
        $this->addAccessField($document);
328 42
        $this->addEndtimeField($document, $pageRecord);
329
330
        // content
331 42
        $document->setField('title', $this->contentExtractor->getPageTitle());
332 42
        $document->setField('subTitle', $pageRecord['subtitle']);
333 42
        $document->setField('navTitle', $pageRecord['nav_title']);
334 42
        $document->setField('author', $pageRecord['author']);
335 42
        $document->setField('description', $pageRecord['description']);
336 42
        $document->setField('abstract', $pageRecord['abstract']);
337 42
        $document->setField('content', $this->contentExtractor->getIndexableContent());
338 42
        $document->setField('url', $this->pageUrl);
339
340 42
        $this->addKeywordsField($document, $pageRecord);
341 42
        $this->addTagContentFields($document);
342
343 42
        return $document;
344
    }
345
346
    /**
347
     * Adds the access field to the document if needed.
348
     *
349
     * @param \Apache_Solr_Document $document
350
     */
351 42
    protected function addAccessField(\Apache_Solr_Document $document)
352
    {
353 42
        $access = (string)$this->pageAccessRootline;
354 42
        if (trim($access) !== '') {
355 9
            $document->setField('access', $access);
356 9
        }
357 42
    }
358
359
    /**
360
     * @param $document
361
     * @param $pageRecord
362
     */
363 42
    protected function addEndtimeField(\Apache_Solr_Document  $document, $pageRecord)
364
    {
365 42
        if ($this->page->page['endtime']) {
366
            $document->setField('endtime', $pageRecord['endtime']);
367
        }
368 42
    }
369
370
    /**
371
     * Adds keywords, multi valued.
372
     *
373
     * @param \Apache_Solr_Document $document
374
     * @param array $pageRecord
375
     */
376 42
    protected function addKeywordsField(\Apache_Solr_Document $document, $pageRecord)
377
    {
378 42
        $keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true));
379 42
        foreach ($keywords as $keyword) {
380
            $document->addField('keywords', $keyword);
381 42
        }
382 42
    }
383
384
    /**
385
     * Add content from several tags like headers, anchors, ...
386
     *
387
     * @param \Apache_Solr_Document $document
388
     */
389 42
    protected function addTagContentFields(\Apache_Solr_Document  $document)
390
    {
391 42
        $tagContent = $this->contentExtractor->getTagContent();
392 42
        foreach ($tagContent as $fieldName => $fieldValue) {
393
            $document->setField($fieldName, $fieldValue);
394 42
        }
395 42
    }
396
397
    /**
398
     * Builds the content for the rootline field.
399
     *
400
     * @return string
401
     */
402 42
    protected function getRootLineFieldValue()
403
    {
404 42
        $rootline = $this->page->id;
405 42
        $mountPointParameter = $this->getMountPointParameter();
406 42
        if ($mountPointParameter !== '') {
407 36
            $rootline .= ',' . $mountPointParameter;
408 36
        }
409 42
        return $rootline;
410
    }
411
412
    /**
413
     * Gets a comma separated list of frontend user groups to use for the
414
     * document ID.
415
     *
416
     * @return string A comma separated list of frontend user groups.
417
     */
418 42
    protected function getDocumentIdGroups()
419
    {
420 42
        $groups = $this->pageAccessRootline->getGroups();
421 42
        $groups = Rootline::cleanGroupArray($groups);
422
423 42
        if (empty($groups)) {
424 35
            $groups[] = 0;
425 35
        }
426
427 42
        $groups = implode(',', $groups);
428
429 42
        return $groups;
430
    }
431
432
    // Logging
433
    // TODO replace by a central logger
434
435
    /**
436
     * Gets the mount point parameter that is used in the Frontend controller.
437
     *
438
     * @return string
439
     */
440 42
    public function getMountPointParameter()
441
    {
442 42
        return $this->mountPointParameter;
443
    }
444
445
    // Misc
446
447
    /**
448
     * Sets the mount point parameter that is used in the Frontend controller.
449
     *
450
     * @param string $mountPointParameter
451
     */
452 6
    public function setMountPointParameter($mountPointParameter)
453
    {
454 6
        $this->mountPointParameter = (string)$mountPointParameter;
455 6
    }
456
457
    /**
458
     * Allows third party extensions to replace or modify the page document
459
     * created by this indexer.
460
     *
461
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
462
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
463
     */
464 42
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
465
    {
466 42
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
467 36
            return $pageDocument;
468
        }
469
470 6
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
471 6
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
472 6
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
473
474 6
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
475
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
476
                throw new \UnexpectedValueException($message, 1310491001);
477
            }
478
479 6
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
480 6
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
481 6
            }
482
483 6
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
484 6
            if (!$substituteDocument instanceof Apache_Solr_Document) {
0 ignored issues
show
Bug introduced by
The class Apache_Solr_Document does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
485
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
486
                throw new \UnexpectedValueException($message, 1310490952);
487
            }
488 6
            $pageDocument = $substituteDocument;
489 6
        }
490
491 6
        return $pageDocument;
492
    }
493
494
    /**
495
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
496
     *
497
     * @return string
498
     */
499 6
    protected function getIndexConfigurationNameForCurrentPage()
500
    {
501 6
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
502
    }
503
504
    /**
505
     * Allows third party extensions to provide additional documents which
506
     * should be indexed for the current page.
507
     *
508
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
509
     * @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page.
510
     * @return array An array of additional \Apache_Solr_Document objects to index
511
     */
512 42
    protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments)
513
    {
514 42
        $documents = $existingDocuments;
515
516 42
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
517 41
            return $documents;
518
        }
519
520 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
521 1
            $additionalIndexer = GeneralUtility::getUserObj($classReference);
522
523 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
524
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
525
                throw new \UnexpectedValueException($message, 1310491024);
526
            }
527
528 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
529 1
            if (is_array($additionalDocuments)) {
530 1
                $documents = array_merge($documents, $additionalDocuments);
531 1
            }
532 1
        }
533
534 1
        return $documents;
535
    }
536
537
    /**
538
     * Sends the given documents to the field processing service which takes
539
     * care of manipulating fields as defined in the field's configuration.
540
     *
541
     * @param array $documents An array of documents to manipulate
542
     */
543 42
    protected function processDocuments(array $documents)
544
    {
545 42
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
546 42
        if (count($processingInstructions) > 0) {
547 42
            $service = GeneralUtility::makeInstance(Service::class);
548 42
            $service->processDocuments($documents, $processingInstructions);
549 42
        }
550 42
    }
551
552
    /**
553
     * Adds the collected documents to the Solr index.
554
     *
555
     * @param array $documents An array of \Apache_Solr_Document objects.
556
     * @return bool TRUE if documents were added successfully, FALSE otherwise
557
     */
558 42
    protected function addDocumentsToSolrIndex(array $documents)
559
    {
560 42
        $documentsAdded = false;
561
562 42
        if (!count($documents)) {
563
            return $documentsAdded;
564
        }
565
566
        try {
567 42
            $this->logger->log(
568 42
                SolrLogManager::INFO,
569 42
                'Adding ' . count($documents) . ' documents.',
570
                $documents
571 42
            );
572
573
            // chunk adds by 20
574 42
            $documentChunks = array_chunk($documents, 20);
575 42
            foreach ($documentChunks as $documentChunk) {
576 42
                $response = $this->solrConnection->addDocuments($documentChunk);
577
578 42
                if ($response->getHttpStatus() != 200) {
579
                    $transportException = new \Apache_Solr_HttpTransportException($response);
580
                    throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException);
581
                }
582 42
            }
583
584 42
            $documentsAdded = true;
585 42
        } catch (\Exception $e) {
586
            $this->logger->log(
587
                SolrLogManager::ERROR,
588
                $e->getMessage() . ' Error code: ' . $e->getCode()
589
            );
590
591
            if ($this->configuration->getLoggingExceptions()) {
592
                $this->logger->log(
593
                    SolrLogManager::ERROR,
594
                    'Exception while adding documents',
595
                    [
596
                        $e->__toString()
597
                    ]
598
                );
599
            }
600
        }
601
602 42
        return $documentsAdded;
603
    }
604
605
    /**
606
     * Gets the current page's URL.
607
     *
608
     * @return string URL of the current page.
609
     */
610
    public function getPageUrl()
611
    {
612
        return $this->pageUrl;
613
    }
614
615
    /**
616
     * Sets the URL to use for the page document.
617
     *
618
     * @param string $url The page's URL.
619
     */
620 6
    public function setPageUrl($url)
621
    {
622 6
        $this->pageUrl = $url;
623 6
    }
624
625
    /**
626
     * Gets the page's access rootline.
627
     *
628
     * @return Rootline The page's access rootline
629
     */
630
    public function getPageAccessRootline()
631
    {
632
        return $this->pageAccessRootline;
633
    }
634
635
    /**
636
     * Sets the page's access rootline.
637
     *
638
     * @param Rootline $accessRootline The page's access rootline
639
     */
640 41
    public function setPageAccessRootline(Rootline $accessRootline)
641
    {
642 41
        $this->pageAccessRootline = $accessRootline;
643 41
    }
644
645
    /**
646
     * Gets the documents that have been sent to Solr
647
     *
648
     * @return array An array of \Apache_Solr_Document objects
649
     */
650 6
    public function getDocumentsSentToSolr()
651
    {
652 6
        return $this->documentsSentToSolr;
653
    }
654
}
655