Completed
Push — master ( 72d15a...e92139 )
by Timo
10s
created

Typo3PageIndexer::setMountPointParameter()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1.0156

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 3
cts 4
cp 0.75
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 1.0156
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\ConnectionManager;
29
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
30
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
31
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
32
use ApacheSolrForTypo3\Solr\IndexQueue\Queue;
33
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
34
use TYPO3\CMS\Core\Utility\GeneralUtility;
35
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
36
37
/**
38
 * Page Indexer to index TYPO3 pages used by the Index Queue.
39
 *
40
 * @author Ingo Renner <[email protected]>
41
 * @author Daniel Poetzinger <[email protected]>
42
 * @author Timo Schmidt <[email protected]>
43
 */
44
class Typo3PageIndexer
45
{
46
47
    /**
48
     * ID of the current page's Solr document.
49
     *
50
     * @var string
51
     */
52
    protected static $pageSolrDocumentId = '';
53
    /**
54
     * The Solr document generated for the current page.
55
     *
56
     * @var \Apache_Solr_Document
57
     */
58
    protected static $pageSolrDocument = null;
59
    /**
60
     * The mount point parameter used in the Frontend controller.
61
     *
62
     * @var string
63
     */
64
    protected $mountPointParameter;
65
    /**
66
     * Solr server connection.
67
     *
68
     * @var SolrService
69
     */
70
    protected $solrConnection = null;
71
    /**
72
     * Frontend page object (TSFE).
73
     *
74
     * @var TypoScriptFrontendController
75
     */
76
    protected $page = null;
77
    /**
78
     * Content extractor to extract content from TYPO3 pages
79
     *
80
     * @var Typo3PageContentExtractor
81
     */
82
    protected $contentExtractor = null;
83
    /**
84
     * URL to be indexed as the page's URL
85
     *
86
     * @var string
87
     */
88
    protected $pageUrl = '';
89
    /**
90
     * The page's access rootline
91
     *
92
     * @var Rootline
93
     */
94
    protected $pageAccessRootline = null;
95
    /**
96
     * Documents that have been sent to Solr
97
     *
98
     * @var array
99
     */
100
    protected $documentsSentToSolr = [];
101
102
    /**
103
     * @var TypoScriptConfiguration
104
     */
105
    protected $configuration;
106
107
    /**
108
     * @var Item
109
     */
110
    protected $indexQueueItem;
111
112
    /**
113
     * Constructor
114
     *
115
     * @param TypoScriptFrontendController $page The page to index
116
     */
117 35
    public function __construct(TypoScriptFrontendController $page)
118
    {
119 35
        $this->page = $page;
120 35
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
121 35
        $this->configuration = Util::getSolrConfiguration();
122
123
        try {
124 35
            $this->initializeSolrConnection();
125
        } catch (\Exception $e) {
126
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 3);
127
128
            // TODO extract to a class "ExceptionLogger"
129
            if ($this->configuration->getLoggingExceptions()) {
130
                GeneralUtility::devLog('Exception while trying to index a page', 'solr', 3, [$e->__toString()]);
131
            }
132
        }
133
134 35
        $this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content);
135 35
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
136 35
    }
137
138
    /**
139
     * @param Item $indexQueueItem
140
     */
141 5
    public function setIndexQueueItem($indexQueueItem)
142
    {
143 5
        $this->indexQueueItem = $indexQueueItem;
144 5
    }
145
146
147
    /**
148
     * Initializes the Solr server connection.
149
     *
150
     * @throws    \Exception when no Solr connection can be established.
151
     */
152 35
    protected function initializeSolrConnection()
153
    {
154 35
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
155
156
        // do not continue if no server is available
157 35
        if (!$solr->ping()) {
158
            throw new \Exception(
159
                'No Solr instance available while trying to index a page.',
160
                1234790825
161
            );
162
        }
163
164 35
        $this->solrConnection = $solr;
165 35
    }
166
167
    /**
168
     * Logs messages to devlog and TS log (admin panel)
169
     *
170
     * @param string $message Message to set
171
     * @param int $errorNum Error number
172
     * @param array $data Additional data to log
173
     * @return void
174
     */
175 35
    protected function log($message, $errorNum = 0, array $data = [])
176
    {
177 35
        if (is_object($GLOBALS['TT'])) {
178 35
            $GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum);
179
        }
180
181 35
        if ($this->configuration->getLoggingIndexing()) {
182
            $logData = [];
183
            if (!empty($data)) {
184
                foreach ($data as $value) {
185
                    $logData[] = (array)$value;
186
                }
187
            }
188
189
            GeneralUtility::devLog($message, 'solr', $errorNum, $logData);
190
        }
191 35
    }
192
193
    /**
194
     * Gets the current page's Solr document ID.
195
     *
196
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
197
     */
198
    public static function getPageSolrDocumentId()
199
    {
200
        return self::$pageSolrDocumentId;
201
    }
202
203
    /**
204
     * Gets the Solr document generated for the current page.
205
     *
206
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
207
     */
208 5
    public static function getPageSolrDocument()
209
    {
210 5
        return self::$pageSolrDocument;
211
    }
212
213
    /**
214
     * Allows to provide a Solr server connection other than the one
215
     * initialized by the constructor.
216
     *
217
     * @param SolrService $solrConnection Solr connection
218
     * @throws \Exception if the Solr server cannot be reached
219
     */
220 5
    public function setSolrConnection(SolrService $solrConnection)
221
    {
222 5
        if (!$solrConnection->ping()) {
223
            throw new \Exception(
224
                'Could not connect to Solr server.',
225
                1323946472
226
            );
227
        }
228
229 5
        $this->solrConnection = $solrConnection;
230 5
    }
231
232
    /**
233
     * Indexes a page.
234
     *
235
     * @return bool TRUE after successfully indexing the page, FALSE on error
236
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
237
     */
238 35
    public function indexPage()
239
    {
240 35
        $pageIndexed = false;
241 35
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
242
243 35
        if (is_null($this->solrConnection)) {
244
            // intended early return as it doesn't make sense to continue
245
            // and waste processing time if the solr server isn't available
246
            // anyways
247
            // FIXME use an exception
248
            return $pageIndexed;
249
        }
250
251 35
        $pageDocument = $this->getPageDocument();
252 35
        $pageDocument = $this->substitutePageDocument($pageDocument);
253
254 35
        $this->applyIndexPagePostProcessors($pageDocument);
255
256 35
        self::$pageSolrDocument = $pageDocument;
257 35
        $documents[] = $pageDocument;
258 35
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
259 35
        $this->processDocuments($documents);
260
261 35
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
262 35
        $this->documentsSentToSolr = $documents;
263
264 35
        return $pageIndexed;
265
    }
266
267
    /**
268
     * Applies the configured post processors (indexPagePostProcessPageDocument)
269
     *
270
     * @param \Apache_Solr_Document $pageDocument
271
     */
272 35
    protected function applyIndexPagePostProcessors($pageDocument)
273
    {
274 35
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
275 34
            return;
276
        }
277
278 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
279 1
            $postProcessor = GeneralUtility::getUserObj($classReference);
280 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
281
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor', 1397739154);
282
            }
283
284 1
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
285
        }
286 1
    }
287
288
    /**
289
     * Builds the Solr document for the current page.
290
     *
291
     * @return \Apache_Solr_Document A document representing the page
292
     */
293 35
    protected function getPageDocument()
294
    {
295 35
        $document = GeneralUtility::makeInstance('Apache_Solr_Document');
296
        /* @var $document \Apache_Solr_Document */
297 35
        $site = Site::getSiteByPageId($this->page->id);
298 35
        $pageRecord = $this->page->page;
299
300 35
        self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId(
301 35
            $this->page->id,
302 35
            $this->page->type,
303 35
            $this->page->sys_language_uid,
304 35
            $this->getDocumentIdGroups(),
305 35
            $this->getMountPointParameter()
306
        );
307 35
        $document->setField('id', $documentId);
308 35
        $document->setField('site', $site->getDomain());
309 35
        $document->setField('siteHash', $site->getSiteHash());
310 35
        $document->setField('appKey', 'EXT:solr');
311 35
        $document->setField('type', 'pages');
312
313
        // system fields
314 35
        $document->setField('uid', $this->page->id);
315 35
        $document->setField('pid', $pageRecord['pid']);
316
317
        // variantId
318 35
        $document->setField('variantId', 'pages/' . $this->page->id);
319
320 35
        $document->setField('typeNum', $this->page->type);
321 35
        $document->setField('created', $pageRecord['crdate']);
322 35
        $document->setField('changed', $pageRecord['SYS_LASTCHANGED']);
323
324 35
        $rootline = $this->getRootLineFieldValue();
325 35
        $document->setField('rootline', $rootline);
326
327
        // access
328 35
        $this->addAccessField($document);
329 35
        $this->addEndtimeField($document, $pageRecord);
330
331
        // content
332 35
        $document->setField('title', $this->contentExtractor->getPageTitle());
333 35
        $document->setField('subTitle', $pageRecord['subtitle']);
334 35
        $document->setField('navTitle', $pageRecord['nav_title']);
335 35
        $document->setField('author', $pageRecord['author']);
336 35
        $document->setField('description', $pageRecord['description']);
337 35
        $document->setField('abstract', $pageRecord['abstract']);
338 35
        $document->setField('content', $this->contentExtractor->getIndexableContent());
339 35
        $document->setField('url', $this->pageUrl);
340
341 35
        $this->addKeywordsField($document, $pageRecord);
342 35
        $this->addTagContentFields($document);
343
344 35
        return $document;
345
    }
346
347
    /**
348
     * Adds the access field to the document if needed.
349
     *
350
     * @param \Apache_Solr_Document $document
351
     */
352 35
    protected function addAccessField(\Apache_Solr_Document $document)
353
    {
354 35
        $access = (string)$this->pageAccessRootline;
355 35
        if (trim($access) !== '') {
356 8
            $document->setField('access', $access);
357
        }
358 35
    }
359
360
    /**
361
     * @param $document
362
     * @param $pageRecord
363
     */
364 35
    protected function addEndtimeField(\Apache_Solr_Document  $document, $pageRecord)
365
    {
366 35
        if ($this->page->page['endtime']) {
367
            $document->setField('endtime', $pageRecord['endtime']);
368
        }
369 35
    }
370
371
    /**
372
     * Adds keywords, multi valued.
373
     *
374
     * @param \Apache_Solr_Document $document
375
     * @param array $pageRecord
376
     */
377 35
    protected function addKeywordsField(\Apache_Solr_Document $document, $pageRecord)
378
    {
379 35
        $keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true));
380 35
        foreach ($keywords as $keyword) {
381
            $document->addField('keywords', $keyword);
382
        }
383 35
    }
384
385
    /**
386
     * Add content from several tags like headers, anchors, ...
387
     *
388
     * @param \Apache_Solr_Document $document
389
     */
390 35
    protected function addTagContentFields(\Apache_Solr_Document  $document)
391
    {
392 35
        $tagContent = $this->contentExtractor->getTagContent();
393 35
        foreach ($tagContent as $fieldName => $fieldValue) {
394
            $document->setField($fieldName, $fieldValue);
395
        }
396 35
    }
397
398
    /**
399
     * Builds the content for the rootline field.
400
     *
401
     * @return string
402
     */
403 35
    protected function getRootLineFieldValue()
404
    {
405 35
        $rootline = $this->page->id;
406 35
        $mountPointParameter = $this->getMountPointParameter();
407 35
        if ($mountPointParameter !== '') {
408 30
            $rootline .= ',' . $mountPointParameter;
409
        }
410 35
        return $rootline;
411
    }
412
413
    /**
414
     * Gets a comma separated list of frontend user groups to use for the
415
     * document ID.
416
     *
417
     * @return string A comma separated list of frontend user groups.
418
     */
419 35
    protected function getDocumentIdGroups()
420
    {
421 35
        $groups = $this->pageAccessRootline->getGroups();
422 35
        $groups = Rootline::cleanGroupArray($groups);
423
424 35
        if (empty($groups)) {
425 29
            $groups[] = 0;
426
        }
427
428 35
        $groups = implode(',', $groups);
429
430 35
        return $groups;
431
    }
432
433
    // Logging
434
    // TODO replace by a central logger
435
436
    /**
437
     * Gets the mount point parameter that is used in the Frontend controller.
438
     *
439
     * @return string
440
     */
441 35
    public function getMountPointParameter()
442
    {
443 35
        return $this->mountPointParameter;
444
    }
445
446
    // Misc
447
448
    /**
449
     * Sets the mount point parameter that is used in the Frontend controller.
450
     *
451
     * @param string $mountPointParameter
452
     */
453 5
    public function setMountPointParameter($mountPointParameter)
454
    {
455 5
        $this->mountPointParameter = (string)$mountPointParameter;
456 5
    }
457
458
    /**
459
     * Allows third party extensions to replace or modify the page document
460
     * created by this indexer.
461
     *
462
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
463
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
464
     */
465 35
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
466
    {
467 35
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
468 30
            return $pageDocument;
469
        }
470
471 5
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
472 5
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
473 5
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
474
475 5
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
476
                $message = get_class($substituteIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\SubstitutePageIndexer';
477
                throw new \UnexpectedValueException($message, 1310491001);
478
            }
479
480 5
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
481 5
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
482
            }
483
484 5
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
485 5
            if (!$substituteDocument instanceof \Apache_Solr_Document) {
0 ignored issues
show
Bug introduced by
The class Apache_Solr_Document does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
486
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
487
                throw new \UnexpectedValueException($message, 1310490952);
488
            }
489 5
            $pageDocument = $substituteDocument;
490
        }
491
492 5
        return $pageDocument;
493
    }
494
495
    /**
496
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
497
     *
498
     * @return string
499
     */
500 5
    protected function getIndexConfigurationNameForCurrentPage()
501
    {
502 5
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
503
    }
504
505
    /**
506
     * Allows third party extensions to provide additional documents which
507
     * should be indexed for the current page.
508
     *
509
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
510
     * @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page.
511
     * @return array An array of additional \Apache_Solr_Document objects to index
512
     */
513 35
    protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments)
514
    {
515 35
        $documents = $existingDocuments;
516
517 35
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
518 34
            return $documents;
519
        }
520
521 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
522 1
            $additionalIndexer = GeneralUtility::getUserObj($classReference);
523
524 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
525
                $message = get_class($additionalIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\AdditionalPageIndexer';
526
                throw new \UnexpectedValueException($message, 1310491024);
527
            }
528
529 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
530 1
            if (is_array($additionalDocuments)) {
531 1
                $documents = array_merge($documents, $additionalDocuments);
532
            }
533
        }
534
535 1
        return $documents;
536
    }
537
538
    /**
539
     * Sends the given documents to the field processing service which takes
540
     * care of manipulating fields as defined in the field's configuration.
541
     *
542
     * @param array $documents An array of documents to manipulate
543
     */
544 35
    protected function processDocuments(array $documents)
545
    {
546 35
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
547 35
        if (count($processingInstructions) > 0) {
548 35
            $service = GeneralUtility::makeInstance(Service::class);
549 35
            $service->processDocuments($documents, $processingInstructions);
550
        }
551 35
    }
552
553
    /**
554
     * Adds the collected documents to the Solr index.
555
     *
556
     * @param array $documents An array of \Apache_Solr_Document objects.
557
     * @return bool TRUE if documents were added successfully, FALSE otherwise
558
     */
559 35
    protected function addDocumentsToSolrIndex(array $documents)
560
    {
561 35
        $documentsAdded = false;
562
563 35
        if (!count($documents)) {
564
            return $documentsAdded;
565
        }
566
567
        try {
568 35
            $this->log('Adding ' . count($documents) . ' documents.', 0, $documents);
569
570
            // chunk adds by 20
571 35
            $documentChunks = array_chunk($documents, 20);
572 35
            foreach ($documentChunks as $documentChunk) {
573 35
                $response = $this->solrConnection->addDocuments($documentChunk);
574
575 35
                if ($response->getHttpStatus() != 200) {
576
                    $transportException = new \Apache_Solr_HttpTransportException($response);
577 35
                    throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException);
578
                }
579
            }
580
581 35
            $documentsAdded = true;
582
        } catch (\Exception $e) {
583
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2);
584
585
            if ($this->configuration->getLoggingExceptions()) {
586
                GeneralUtility::devLog('Exception while adding documents', 'solr', 3, [$e->__toString()]);
587
            }
588
        }
589
590 35
        return $documentsAdded;
591
    }
592
593
    /**
594
     * Gets the current page's URL.
595
     *
596
     * @return string URL of the current page.
597
     */
598
    public function getPageUrl()
599
    {
600
        return $this->pageUrl;
601
    }
602
603
    /**
604
     * Sets the URL to use for the page document.
605
     *
606
     * @param string $url The page's URL.
607
     */
608 5
    public function setPageUrl($url)
609
    {
610 5
        $this->pageUrl = $url;
611 5
    }
612
613
    /**
614
     * Gets the page's access rootline.
615
     *
616
     * @return Rootline The page's access rootline
617
     */
618
    public function getPageAccessRootline()
619
    {
620
        return $this->pageAccessRootline;
621
    }
622
623
    /**
624
     * Sets the page's access rootline.
625
     *
626
     * @param Rootline $accessRootline The page's access rootline
627
     */
628 34
    public function setPageAccessRootline(Rootline $accessRootline)
629
    {
630 34
        $this->pageAccessRootline = $accessRootline;
631 34
    }
632
633
    /**
634
     * Gets the documents that have been sent to Solr
635
     *
636
     * @return array An array of \Apache_Solr_Document objects
637
     */
638 5
    public function getDocumentsSentToSolr()
639
    {
640 5
        return $this->documentsSentToSolr;
641
    }
642
}
643