Completed
Branch master (b9fc31)
by Timo
05:19
created

Typo3PageIndexer::getPageDocument()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 53
Code Lines 36

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 37
CRAP Score 1.0001

Importance

Changes 0
Metric Value
dl 0
loc 53
ccs 37
cts 39
cp 0.9487
rs 9.5797
c 0
b 0
f 0
cc 1
eloc 36
nc 1
nop 0
crap 1.0001

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use Apache_Solr_Document;
28
use ApacheSolrForTypo3\Solr\Access\Rootline;
29
use ApacheSolrForTypo3\Solr\FieldProcessor\Service;
30
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
31
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
32
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
33
use TYPO3\CMS\Core\Utility\GeneralUtility;
34
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
35
36
/**
37
 * Page Indexer to index TYPO3 pages used by the Index Queue.
38
 *
39
 * @author Ingo Renner <[email protected]>
40
 * @author Daniel Poetzinger <[email protected]>
41
 * @author Timo Schmidt <[email protected]>
42
 */
43
class Typo3PageIndexer
44
{
45
46
    /**
47
     * ID of the current page's Solr document.
48
     *
49
     * @var string
50
     */
51
    protected static $pageSolrDocumentId = '';
52
    /**
53
     * The Solr document generated for the current page.
54
     *
55
     * @var \Apache_Solr_Document
56
     */
57
    protected static $pageSolrDocument = null;
58
    /**
59
     * The mount point parameter used in the Frontend controller.
60
     *
61
     * @var string
62
     */
63
    protected $mountPointParameter;
64
    /**
65
     * Solr server connection.
66
     *
67
     * @var SolrService
68
     */
69
    protected $solrConnection = null;
70
    /**
71
     * Frontend page object (TSFE).
72
     *
73
     * @var TypoScriptFrontendController
74
     */
75
    protected $page = null;
76
    /**
77
     * Content extractor to extract content from TYPO3 pages
78
     *
79
     * @var Typo3PageContentExtractor
80
     */
81
    protected $contentExtractor = null;
82
    /**
83
     * URL to be indexed as the page's URL
84
     *
85
     * @var string
86
     */
87
    protected $pageUrl = '';
88
    /**
89
     * The page's access rootline
90
     *
91
     * @var Rootline
92
     */
93
    protected $pageAccessRootline = null;
94
    /**
95
     * Documents that have been sent to Solr
96
     *
97
     * @var array
98
     */
99
    protected $documentsSentToSolr = [];
100
101
    /**
102
     * @var TypoScriptConfiguration
103
     */
104
    protected $configuration;
105
106
    /**
107
     * @var Item
108
     */
109
    protected $indexQueueItem;
110
111
    /**
112
     * Constructor
113
     *
114
     * @param TypoScriptFrontendController $page The page to index
115
     */
116 35
    public function __construct(TypoScriptFrontendController $page)
117
    {
118 35
        $this->page = $page;
119 35
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
120 35
        $this->configuration = Util::getSolrConfiguration();
121
122
        try {
123 35
            $this->initializeSolrConnection();
124 35
        } catch (\Exception $e) {
125
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 3);
126
127
            // TODO extract to a class "ExceptionLogger"
128
            if ($this->configuration->getLoggingExceptions()) {
129
                GeneralUtility::devLog('Exception while trying to index a page', 'solr', 3, [$e->__toString()]);
130
            }
131
        }
132
133 35
        $this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content);
134 35
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
135 35
    }
136
137
    /**
138
     * @param Item $indexQueueItem
139
     */
140 5
    public function setIndexQueueItem($indexQueueItem)
141
    {
142 5
        $this->indexQueueItem = $indexQueueItem;
143 5
    }
144
145
146
    /**
147
     * Initializes the Solr server connection.
148
     *
149
     * @throws    \Exception when no Solr connection can be established.
150
     */
151 35
    protected function initializeSolrConnection()
152
    {
153 35
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
154
155
        // do not continue if no server is available
156 35
        if (!$solr->ping()) {
157
            throw new \Exception(
158
                'No Solr instance available while trying to index a page.',
159
                1234790825
160
            );
161
        }
162
163 35
        $this->solrConnection = $solr;
164 35
    }
165
166
    /**
167
     * Logs messages to devlog and TS log (admin panel)
168
     *
169
     * @param string $message Message to set
170
     * @param int $errorNum Error number
171
     * @param array $data Additional data to log
172
     * @return void
173
     */
174 35
    protected function log($message, $errorNum = 0, array $data = [])
175
    {
176 35
        if (is_object($GLOBALS['TT'])) {
177 35
            $GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum);
178 35
        }
179
180 35
        if ($this->configuration->getLoggingIndexing()) {
181
            $logData = [];
182
            if (!empty($data)) {
183
                foreach ($data as $value) {
184 8
                    $logData[] = (array)$value;
185
                }
186
            }
187
188
            GeneralUtility::devLog($message, 'solr', $errorNum, $logData);
189
        }
190 35
    }
191
192
    /**
193
     * Gets the current page's Solr document ID.
194
     *
195
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
196
     */
197
    public static function getPageSolrDocumentId()
198
    {
199
        return self::$pageSolrDocumentId;
200
    }
201
202
    /**
203
     * Gets the Solr document generated for the current page.
204
     *
205
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
206
     */
207 5
    public static function getPageSolrDocument()
208
    {
209 5
        return self::$pageSolrDocument;
210
    }
211
212
    /**
213
     * Allows to provide a Solr server connection other than the one
214
     * initialized by the constructor.
215
     *
216
     * @param SolrService $solrConnection Solr connection
217
     * @throws \Exception if the Solr server cannot be reached
218
     */
219 5
    public function setSolrConnection(SolrService $solrConnection)
220
    {
221 5
        if (!$solrConnection->ping()) {
222
            throw new \Exception(
223
                'Could not connect to Solr server.',
224
                1323946472
225
            );
226
        }
227
228 5
        $this->solrConnection = $solrConnection;
229 5
    }
230
231
    /**
232
     * Indexes a page.
233
     *
234
     * @return bool TRUE after successfully indexing the page, FALSE on error
235
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
236
     */
237 35
    public function indexPage()
238
    {
239 35
        $pageIndexed = false;
240 35
        $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
241
242 35
        if (is_null($this->solrConnection)) {
243
            // intended early return as it doesn't make sense to continue
244
            // and waste processing time if the solr server isn't available
245
            // anyways
246
            // FIXME use an exception
247
            return $pageIndexed;
248
        }
249
250 35
        $pageDocument = $this->getPageDocument();
251 35
        $pageDocument = $this->substitutePageDocument($pageDocument);
252
253 35
        $this->applyIndexPagePostProcessors($pageDocument);
254
255 35
        self::$pageSolrDocument = $pageDocument;
256 35
        $documents[] = $pageDocument;
257 35
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
258 35
        $this->processDocuments($documents);
259
260 35
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
261 35
        $this->documentsSentToSolr = $documents;
262
263 35
        return $pageIndexed;
264
    }
265
266
    /**
267
     * Applies the configured post processors (indexPagePostProcessPageDocument)
268
     *
269
     * @param \Apache_Solr_Document $pageDocument
270
     */
271 35
    protected function applyIndexPagePostProcessors($pageDocument)
272
    {
273 35
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
274 34
            return;
275
        }
276
277 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
278 1
            $postProcessor = GeneralUtility::getUserObj($classReference);
279 1
            if (!$postProcessor instanceof PageDocumentPostProcessor) {
280
                throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
281
            }
282
283 1
            $postProcessor->postProcessPageDocument($pageDocument, $this->page);
284 1
        }
285 1
    }
286
287
    /**
288
     * Builds the Solr document for the current page.
289
     *
290
     * @return \Apache_Solr_Document A document representing the page
291
     */
292 35
    protected function getPageDocument()
293
    {
294 35
        $document = GeneralUtility::makeInstance(Apache_Solr_Document::class);
295
        /* @var $document \Apache_Solr_Document */
296 35
        $site = Site::getSiteByPageId($this->page->id);
297 35
        $pageRecord = $this->page->page;
298
299 35
        self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId(
300 35
            $this->page->id,
301 35
            $this->page->type,
302 35
            $this->page->sys_language_uid,
303 35
            $this->getDocumentIdGroups(),
304 35
            $this->getMountPointParameter()
305 35
        );
306 35
        $document->setField('id', $documentId);
307 35
        $document->setField('site', $site->getDomain());
308 35
        $document->setField('siteHash', $site->getSiteHash());
309 35
        $document->setField('appKey', 'EXT:solr');
310 35
        $document->setField('type', 'pages');
311
312
        // system fields
313 35
        $document->setField('uid', $this->page->id);
314 35
        $document->setField('pid', $pageRecord['pid']);
315
316
        // variantId
317 35
        $document->setField('variantId', 'pages/' . $this->page->id);
318
319 35
        $document->setField('typeNum', $this->page->type);
320 35
        $document->setField('created', $pageRecord['crdate']);
321 35
        $document->setField('changed', $pageRecord['SYS_LASTCHANGED']);
322
323 35
        $rootline = $this->getRootLineFieldValue();
324 35
        $document->setField('rootline', $rootline);
325
326
        // access
327 35
        $this->addAccessField($document);
328 35
        $this->addEndtimeField($document, $pageRecord);
329
330
        // content
331 35
        $document->setField('title', $this->contentExtractor->getPageTitle());
332 35
        $document->setField('subTitle', $pageRecord['subtitle']);
333 35
        $document->setField('navTitle', $pageRecord['nav_title']);
334 35
        $document->setField('author', $pageRecord['author']);
335 35
        $document->setField('description', $pageRecord['description']);
336 35
        $document->setField('abstract', $pageRecord['abstract']);
337 35
        $document->setField('content', $this->contentExtractor->getIndexableContent());
338 35
        $document->setField('url', $this->pageUrl);
339
340 35
        $this->addKeywordsField($document, $pageRecord);
341 35
        $this->addTagContentFields($document);
342
343 35
        return $document;
344
    }
345
346
    /**
347
     * Adds the access field to the document if needed.
348
     *
349
     * @param \Apache_Solr_Document $document
350
     */
351 35
    protected function addAccessField(\Apache_Solr_Document $document)
352
    {
353 35
        $access = (string)$this->pageAccessRootline;
354 35
        if (trim($access) !== '') {
355 8
            $document->setField('access', $access);
356 8
        }
357 35
    }
358
359
    /**
360
     * @param $document
361
     * @param $pageRecord
362
     */
363 35
    protected function addEndtimeField(\Apache_Solr_Document  $document, $pageRecord)
364
    {
365 35
        if ($this->page->page['endtime']) {
366
            $document->setField('endtime', $pageRecord['endtime']);
367
        }
368 35
    }
369
370
    /**
371
     * Adds keywords, multi valued.
372
     *
373
     * @param \Apache_Solr_Document $document
374
     * @param array $pageRecord
375
     */
376 35
    protected function addKeywordsField(\Apache_Solr_Document $document, $pageRecord)
377
    {
378 35
        $keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true));
379 35
        foreach ($keywords as $keyword) {
380
            $document->addField('keywords', $keyword);
381 35
        }
382 35
    }
383
384
    /**
385
     * Add content from several tags like headers, anchors, ...
386
     *
387
     * @param \Apache_Solr_Document $document
388
     */
389 35
    protected function addTagContentFields(\Apache_Solr_Document  $document)
390
    {
391 35
        $tagContent = $this->contentExtractor->getTagContent();
392 35
        foreach ($tagContent as $fieldName => $fieldValue) {
393
            $document->setField($fieldName, $fieldValue);
394 35
        }
395 35
    }
396
397
    /**
398
     * Builds the content for the rootline field.
399
     *
400
     * @return string
401
     */
402 35
    protected function getRootLineFieldValue()
403
    {
404 35
        $rootline = $this->page->id;
405 35
        $mountPointParameter = $this->getMountPointParameter();
406 35
        if ($mountPointParameter !== '') {
407 30
            $rootline .= ',' . $mountPointParameter;
408 30
        }
409 35
        return $rootline;
410
    }
411
412
    /**
413
     * Gets a comma separated list of frontend user groups to use for the
414
     * document ID.
415
     *
416
     * @return string A comma separated list of frontend user groups.
417
     */
418 35
    protected function getDocumentIdGroups()
419
    {
420 35
        $groups = $this->pageAccessRootline->getGroups();
421 35
        $groups = Rootline::cleanGroupArray($groups);
422
423 35
        if (empty($groups)) {
424 29
            $groups[] = 0;
425 29
        }
426
427 35
        $groups = implode(',', $groups);
428
429 35
        return $groups;
430
    }
431
432
    // Logging
433
    // TODO replace by a central logger
434
435
    /**
436
     * Gets the mount point parameter that is used in the Frontend controller.
437
     *
438
     * @return string
439
     */
440 35
    public function getMountPointParameter()
441
    {
442 35
        return $this->mountPointParameter;
443
    }
444
445
    // Misc
446
447
    /**
448
     * Sets the mount point parameter that is used in the Frontend controller.
449
     *
450
     * @param string $mountPointParameter
451
     */
452 5
    public function setMountPointParameter($mountPointParameter)
453
    {
454 5
        $this->mountPointParameter = (string)$mountPointParameter;
455 5
    }
456
457
    /**
458
     * Allows third party extensions to replace or modify the page document
459
     * created by this indexer.
460
     *
461
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
462
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
463
     */
464 35
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
465
    {
466 35
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
467 30
            return $pageDocument;
468
        }
469
470 5
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
471 5
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
472 5
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
473
474 5
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
475
                $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
476
                throw new \UnexpectedValueException($message, 1310491001);
477
            }
478
479 5
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
480 5
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
481 5
            }
482
483 5
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
484 5
            if (!$substituteDocument instanceof \Apache_Solr_Document) {
0 ignored issues
show
Bug introduced by
The class Apache_Solr_Document does not exist. Is this class maybe located in a folder that is not analyzed, or in a newer version of your dependencies than listed in your composer.lock/composer.json?
Loading history...
485
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
486
                throw new \UnexpectedValueException($message, 1310490952);
487
            }
488 5
            $pageDocument = $substituteDocument;
489 5
        }
490
491 5
        return $pageDocument;
492
    }
493
494
    /**
495
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
496
     *
497
     * @return string
498
     */
499 5
    protected function getIndexConfigurationNameForCurrentPage()
500
    {
501 5
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
502
    }
503
504
    /**
505
     * Allows third party extensions to provide additional documents which
506
     * should be indexed for the current page.
507
     *
508
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
509
     * @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page.
510
     * @return array An array of additional \Apache_Solr_Document objects to index
511
     */
512 35
    protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments)
513
    {
514 35
        $documents = $existingDocuments;
515
516 35
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
517 34
            return $documents;
518
        }
519
520 1
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
521 1
            $additionalIndexer = GeneralUtility::getUserObj($classReference);
522
523 1
            if (!$additionalIndexer instanceof AdditionalPageIndexer) {
524
                $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
525
                throw new \UnexpectedValueException($message, 1310491024);
526
            }
527
528 1
            $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
529 1
            if (is_array($additionalDocuments)) {
530 1
                $documents = array_merge($documents, $additionalDocuments);
531 1
            }
532 1
        }
533
534 1
        return $documents;
535
    }
536
537
    /**
538
     * Sends the given documents to the field processing service which takes
539
     * care of manipulating fields as defined in the field's configuration.
540
     *
541
     * @param array $documents An array of documents to manipulate
542
     */
543 35
    protected function processDocuments(array $documents)
544
    {
545 35
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
546 35
        if (count($processingInstructions) > 0) {
547 35
            $service = GeneralUtility::makeInstance(Service::class);
548 35
            $service->processDocuments($documents, $processingInstructions);
549 35
        }
550 35
    }
551
552
    /**
553
     * Adds the collected documents to the Solr index.
554
     *
555
     * @param array $documents An array of \Apache_Solr_Document objects.
556
     * @return bool TRUE if documents were added successfully, FALSE otherwise
557
     */
558 35
    protected function addDocumentsToSolrIndex(array $documents)
559
    {
560 35
        $documentsAdded = false;
561
562 35
        if (!count($documents)) {
563
            return $documentsAdded;
564
        }
565
566
        try {
567 35
            $this->log('Adding ' . count($documents) . ' documents.', 0, $documents);
568
569
            // chunk adds by 20
570 35
            $documentChunks = array_chunk($documents, 20);
571 35
            foreach ($documentChunks as $documentChunk) {
572 35
                $response = $this->solrConnection->addDocuments($documentChunk);
573
574 35
                if ($response->getHttpStatus() != 200) {
575
                    $transportException = new \Apache_Solr_HttpTransportException($response);
576
                    throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException);
577
                }
578 35
            }
579
580 35
            $documentsAdded = true;
581 35
        } catch (\Exception $e) {
582
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2);
583
584
            if ($this->configuration->getLoggingExceptions()) {
585
                GeneralUtility::devLog('Exception while adding documents', 'solr', 3, [$e->__toString()]);
586
            }
587
        }
588
589 35
        return $documentsAdded;
590
    }
591
592
    /**
593
     * Gets the current page's URL.
594
     *
595
     * @return string URL of the current page.
596
     */
597
    public function getPageUrl()
598
    {
599
        return $this->pageUrl;
600
    }
601
602
    /**
603
     * Sets the URL to use for the page document.
604
     *
605
     * @param string $url The page's URL.
606
     */
607 5
    public function setPageUrl($url)
608
    {
609 5
        $this->pageUrl = $url;
610 5
    }
611
612
    /**
613
     * Gets the page's access rootline.
614
     *
615
     * @return Rootline The page's access rootline
616
     */
617
    public function getPageAccessRootline()
618
    {
619
        return $this->pageAccessRootline;
620
    }
621
622
    /**
623
     * Sets the page's access rootline.
624
     *
625
     * @param Rootline $accessRootline The page's access rootline
626
     */
627 34
    public function setPageAccessRootline(Rootline $accessRootline)
628
    {
629 34
        $this->pageAccessRootline = $accessRootline;
630 34
    }
631
632
    /**
633
     * Gets the documents that have been sent to Solr
634
     *
635
     * @return array An array of \Apache_Solr_Document objects
636
     */
637 5
    public function getDocumentsSentToSolr()
638
    {
639 5
        return $this->documentsSentToSolr;
640
    }
641
}
642