Completed
Push — master ( b6b629...ced009 )
by Timo
47s
created

getIndexConfigurationNameForCurrentPage()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 1
CRAP Score 3.1852

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 4
ccs 1
cts 3
cp 0.3333
rs 10
cc 2
eloc 2
nc 2
nop 0
crap 3.1852
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\ConnectionManager;
29
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
30
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
31
use ApacheSolrForTypo3\Solr\IndexQueue\Queue;
32
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
33
use TYPO3\CMS\Core\Utility\GeneralUtility;
34
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
35
36
/**
37
 * Page Indexer to index TYPO3 pages used by the Index Queue.
38
 *
39
 * @author Ingo Renner <[email protected]>
40
 * @author Daniel Poetzinger <[email protected]>
41
 * @author Timo Schmidt <[email protected]>
42
 */
43
class Typo3PageIndexer
44
{
45
46
    /**
47
     * ID of the current page's Solr document.
48
     *
49
     * @var string
50
     */
51
    protected static $pageSolrDocumentId = '';
52
    /**
53
     * The Solr document generated for the current page.
54
     *
55
     * @var \Apache_Solr_Document
56
     */
57
    protected static $pageSolrDocument = null;
58
    /**
59
     * The mount point parameter used in the Frontend controller.
60
     *
61
     * @var string
62
     */
63
    protected $mountPointParameter;
64
    /**
65
     * Solr server connection.
66
     *
67
     * @var SolrService
68
     */
69
    protected $solrConnection = null;
70
    /**
71
     * Frontend page object (TSFE).
72
     *
73
     * @var TypoScriptFrontendController
74
     */
75
    protected $page = null;
76
    /**
77
     * Content extractor to extract content from TYPO3 pages
78
     *
79
     * @var Typo3PageContentExtractor
80
     */
81
    protected $contentExtractor = null;
82
    /**
83
     * URL to be indexed as the page's URL
84
     *
85
     * @var string
86
     */
87
    protected $pageUrl = '';
88
    /**
89
     * The page's access rootline
90
     *
91
     * @var Rootline
92
     */
93
    protected $pageAccessRootline = null;
94
    /**
95
     * Documents that have been sent to Solr
96
     *
97
     * @var array
98
     */
99
    protected $documentsSentToSolr = array();
100
101
    /**
102
     * @var TypoScriptConfiguration
103
     */
104
    protected $configuration;
105
106 32
    /**
107
     * @var Item
108 32
     */
109 32
    protected $indexQueueItem;
110 32
111
    /**
112
     * Constructor
113 32
     *
114
     * @param TypoScriptFrontendController $page The page to index
115
     */
116
    public function __construct(TypoScriptFrontendController $page)
117
    {
118
        $this->page = $page;
119
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
120
        $this->configuration = Util::getSolrConfiguration();
121
122
        try {
123
            $this->initializeSolrConnection();
124
        } catch (\Exception $e) {
125
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 3);
126 32
127 32
            // TODO extract to a class "ExceptionLogger"
128 32
            if ($this->configuration->getLoggingExceptions()) {
129
                GeneralUtility::devLog('Exception while trying to index a page',
130
                    'solr', 3, array(
131 32
                        $e->__toString()
132 32
                    ));
133 32
            }
134
        }
135 32
136
        $this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content);
137
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
138
    }
139
140
    /**
141
     * @param Item $indexQueueItem
142 32
     */
143
    public function setIndexQueueItem($indexQueueItem)
144 32
    {
145 32
        $this->indexQueueItem = $indexQueueItem;
146 32
    }
147
148
149
    /**
150 32
     * Initializes the Solr server connection.
151
     *
152
     * @throws    \Exception when no Solr connection can be established.
153
     */
154
    protected function initializeSolrConnection()
155
    {
156
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
157 32
158 32
        // do not continue if no server is available
159
        if (!$solr->ping()) {
160
            throw new \Exception(
161
                'No Solr instance available while trying to index a page.',
162
                1234790825
163
            );
164
        }
165
166
        $this->solrConnection = $solr;
167
    }
168 32
169
    /**
170 32
     * Logs messages to devlog and TS log (admin panel)
171 32
     *
172
     * @param string $message Message to set
173
     * @param int $errorNum Error number
174 32
     * @param array $data Additional data to log
175
     * @return void
176
     */
177
    protected function log($message, $errorNum = 0, array $data = array())
178
    {
179
        if (is_object($GLOBALS['TT'])) {
180
            $GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum);
181
        }
182
183
        if ($this->configuration->getLoggingIndexing()) {
184 32
            $logData = array();
185
            if (!empty($data)) {
186
                foreach ($data as $value) {
187
                    $logData[] = (array)$value;
188
                }
189
            }
190
191
            GeneralUtility::devLog($message, 'solr', $errorNum, $logData);
192
        }
193
    }
194
195
    /**
196
     * Gets the current page's Solr document ID.
197
     *
198
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
199
     */
200
    public static function getPageSolrDocumentId()
201 2
    {
202
        return self::$pageSolrDocumentId;
203 2
    }
204
205
    /**
206
     * Gets the Solr document generated for the current page.
207
     *
208
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
209
     */
210
    public static function getPageSolrDocument()
211
    {
212
        return self::$pageSolrDocument;
213 2
    }
214
215 2
    /**
216
     * Allows to provide a Solr server connection other than the one
217
     * initialized by the constructor.
218
     *
219
     * @param SolrService $solrConnection Solr connection
220
     * @throws \Exception if the Solr server cannot be reached
221
     */
222 2
    public function setSolrConnection(SolrService $solrConnection)
223 2
    {
224
        if (!$solrConnection->ping()) {
225
            throw new \Exception(
226
                'Could not connect to Solr server.',
227
                1323946472
228
            );
229
        }
230
231 32
        $this->solrConnection = $solrConnection;
232
    }
233 32
234 32
    /**
235
     * Indexes a page.
236 32
     *
237
     * @return bool TRUE after successfully indexing the page, FALSE on error
238
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
239
     */
240
    public function indexPage()
241
    {
242
        $pageIndexed = false;
243
        $documents = array(); // this will become useful as soon as when starting to index individual records instead of whole pages
244 32
245 32
        if (is_null($this->solrConnection)) {
246
            // intended early return as it doesn't make sense to continue
247 32
            // and waste processing time if the solr server isn't available
248
            // anyways
249
            // FIXME use an exception
250
            return $pageIndexed;
251
        }
252
253
        $pageDocument = $this->getPageDocument();
254
        $pageDocument = $this->substitutePageDocument($pageDocument);
255
256
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
257
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
258
                $postProcessor = GeneralUtility::getUserObj($classReference);
259
260
                if ($postProcessor instanceof PageDocumentPostProcessor) {
261
                    $postProcessor->postProcessPageDocument($pageDocument, $this->page);
262 32
                } else {
263 32
                    throw new \UnexpectedValueException(
264 32
                        get_class($pageDocument) . ' must implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor',
265 32
                        1397739154
266
                    );
267 32
                }
268 32
            }
269
        }
270 32
271
        self::$pageSolrDocument = $pageDocument;
272
        $documents[] = $pageDocument;
273
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
274
        $this->processDocuments($documents);
275
276
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
277
        $this->documentsSentToSolr = $documents;
278 32
279
        return $pageIndexed;
280 32
    }
281
282 32
    /**
283 32
     * Builds the Solr document for the current page.
284
     *
285 32
     * @return \Apache_Solr_Document A document representing the page
286 32
     */
287 32
    protected function getPageDocument()
288 32
    {
289 32
        $document = GeneralUtility::makeInstance('Apache_Solr_Document');
290 32
        /* @var $document \Apache_Solr_Document */
291
        $site = Site::getSiteByPageId($this->page->id);
292 32
        $pageRecord = $this->page->page;
293 32
294 32
        self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId(
295 32
            $this->page->id,
296 32
            $this->page->type,
297
            $this->page->sys_language_uid,
298
            $this->getDocumentIdGroups(),
299 32
            $this->getMountPointParameter()
300 32
        );
301
        $document->setField('id', $documentId);
302
        $document->setField('site', $site->getDomain());
303 32
        $document->setField('siteHash', $site->getSiteHash());
304
        $document->setField('appKey', 'EXT:solr');
305 32
        $document->setField('type', 'pages');
306 32
307 32
        // system fields
308
        $document->setField('uid', $this->page->id);
309 32
        $document->setField('pid', $pageRecord['pid']);
310 32
311 32
        // variantId
312 30
        $document->setField('variantId', 'pages/' . $this->page->id);
313
314 32
        $document->setField('typeNum', $this->page->type);
315
        $document->setField('created', $pageRecord['crdate']);
316
        $document->setField('changed', $pageRecord['SYS_LASTCHANGED']);
317 32
318 32
        $rootline = $this->page->id;
319 5
        $mountPointParameter = $this->getMountPointParameter();
320
        if ($mountPointParameter !== '') {
321 32
            $rootline .= ',' . $mountPointParameter;
322
        }
323
        $document->setField('rootline', $rootline);
324
325
        // access
326 32
        $access = (string)$this->pageAccessRootline;
327 32
        if (trim($access) !== '') {
328 32
            $document->setField('access', $access);
329 32
        }
330 32
        if ($this->page->page['endtime']) {
331 32
            $document->setField('endtime', $pageRecord['endtime']);
332 32
        }
333 32
334 32
        // content
335
        $document->setField('title', $this->contentExtractor->getPageTitle());
336
        $document->setField('subTitle', $pageRecord['subtitle']);
337 32
        $document->setField('navTitle', $pageRecord['nav_title']);
338 32
        $document->setField('author', $pageRecord['author']);
339 32
        $document->setField('description', $pageRecord['description']);
340 32
        $document->setField('abstract', $pageRecord['abstract']);
341
        $document->setField('content',
342 32
            $this->contentExtractor->getIndexableContent());
343
        $document->setField('url', $this->pageUrl);
344
345
        // keywords, multi valued
346
        $keywords = array_unique(GeneralUtility::trimExplode(
347 32
            ',',
348 32
            $pageRecord['keywords'],
349
            true
350
        ));
351
        foreach ($keywords as $keyword) {
352 32
            $document->addField('keywords', $keyword);
353
        }
354
355
        // content from several tags like headers, anchors, ...
356
        $tagContent = $this->contentExtractor->getTagContent();
357
        foreach ($tagContent as $fieldName => $fieldValue) {
358
            $document->setField($fieldName, $fieldValue);
359
        }
360
361 32
        return $document;
362
    }
363 32
364 32
    /**
365
     * Gets a comma separated list of frontend user groups to use for the
366 32
     * document ID.
367 29
     *
368
     * @return string A comma separated list of frontend user groups.
369
     */
370 32
    protected function getDocumentIdGroups()
371
    {
372 32
        $groups = $this->pageAccessRootline->getGroups();
373
        $groups = Rootline::cleanGroupArray($groups);
374
375
        if (empty($groups)) {
376
            $groups[] = 0;
377
        }
378
379
        $groups = implode(',', $groups);
380
381
        return $groups;
382
    }
383 32
384
    // Logging
385 32
    // TODO replace by a central logger
386
387
    /**
388
     * Gets the mount point parameter that is used in the Frontend controller.
389
     *
390
     * @return string
391
     */
392
    public function getMountPointParameter()
393
    {
394
        return $this->mountPointParameter;
395 2
    }
396
397 2
    // Misc
398 2
399
    /**
400
     * Sets the mount point parameter that is used in the Frontend controller.
401
     *
402
     * @param string $mountPointParameter
403
     */
404
    public function setMountPointParameter($mountPointParameter)
405
    {
406
        $this->mountPointParameter = (string)$mountPointParameter;
407 32
    }
408
409
    /**
410 32
     * Allows third party extensions to replace or modify the page document
411 2
     * created by this indexer.
412 2
     *
413
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
414 2
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
415 2
     */
416
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
417 2
    {
418 2
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
419
            return $pageDocument;
420
        }
421
422 2
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
423
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
424
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
425
426
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
427
                $message = get_class($substituteIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\SubstitutePageIndexer';
428 2
                throw new \UnexpectedValueException($message, 1310491001);
429
            }
430
431
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
432
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
433
            }
434 32
435
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
436
            if (!$substituteDocument instanceof \Apache_Solr_Document) {
0 ignored issues
show
Bug introduced by
The class Apache_Solr_Document does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
437
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
438
                throw new \UnexpectedValueException($message, 1310490952);
439
            }
440
            $pageDocument = $substituteDocument;
441
        }
442
443
        return $pageDocument;
444
    }
445 32
446
    /**
447
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
448
     *
449 32
     * @return string
450
     */
451 32
    protected function getIndexConfigurationNameForCurrentPage()
452
    {
453
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
454
    }
455
456
    /**
457
     * Allows third party extensions to provide additional documents which
458
     * should be indexed for the current page.
459
     *
460
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
461
     * @param array $existingDocuments An array of documents already created for this page.
462
     * @return array An array of additional \Apache_Solr_Document objects to index
463
     */
464
    protected function getAdditionalDocuments(
465
        \Apache_Solr_Document $pageDocument,
466
        array $existingDocuments
467
    ) {
468
        $documents = $existingDocuments;
469
470
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
471
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
472 32
                $additionalIndexer = GeneralUtility::getUserObj($classReference);
473
474
                if ($additionalIndexer instanceof AdditionalPageIndexer) {
475
                    $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument,
476
                        $documents);
477
478
                    if (is_array($additionalDocuments)) {
479
                        $documents = array_merge($documents,
480
                            $additionalDocuments);
481 32
                    }
482
                } else {
483 32
                    throw new \UnexpectedValueException(
484 32
                        get_class($additionalIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\AdditionalPageIndexer',
485 32
                        1310491024
486 32
                    );
487
                }
488 32
            }
489
        }
490
491
        return $documents;
492
    }
493
494
    /**
495
     * Sends the given documents to the field processing service which takes
496 32
     * care of manipulating fields as defined in the field's configuration.
497
     *
498 32
     * @param array $documents An array of documents to manipulate
499
     */
500 32
    protected function processDocuments(array $documents)
501
    {
502
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
503
        if (count($processingInstructions) > 0) {
504
            $service = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\Service');
505 32
            $service->processDocuments($documents, $processingInstructions);
506
        }
507
    }
508
509 32
    /**
510 32
     * Adds the collected documents to the Solr index.
511 32
     *
512
     * @param array $documents An array of \Apache_Solr_Document objects.
513 32
     * @return bool TRUE if documents were added successfully, FALSE otherwise
514
     */
515
    protected function addDocumentsToSolrIndex(array $documents)
516 32
    {
517
        $documentsAdded = false;
518
519
        if (!count($documents)) {
520 32
            return $documentsAdded;
521
        }
522
523
        try {
524
            $this->log('Adding ' . count($documents) . ' documents.', 0,
525
                $documents);
526
527
            // chunk adds by 20
528
            $documentChunks = array_chunk($documents, 20);
529
            foreach ($documentChunks as $documentChunk) {
530
                $response = $this->solrConnection->addDocuments($documentChunk);
531
532 32
                if ($response->getHttpStatus() != 200) {
533
                    $transportException = new \Apache_Solr_HttpTransportException($response);
534
                    throw new \RuntimeException('Solr Request failed.',
535
                        1331834983, $transportException);
536
                }
537
            }
538
539
            $documentsAdded = true;
540
        } catch (\Exception $e) {
541
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2);
542
543
            if ($this->configuration->getLoggingExceptions()) {
544
                GeneralUtility::devLog('Exception while adding documents',
545
                    'solr', 3, array(
546
                        $e->__toString()
547
                    ));
548
            }
549
        }
550 2
551
        return $documentsAdded;
552 2
    }
553 2
554
    /**
555
     * Gets the current page's URL.
556
     *
557
     * @return string URL of the current page.
558
     */
559
    public function getPageUrl()
560
    {
561
        return $this->pageUrl;
562
    }
563
564
    /**
565
     * Sets the URL to use for the page document.
566
     *
567
     * @param string $url The page's URL.
568
     */
569
    public function setPageUrl($url)
570 31
    {
571
        $this->pageUrl = $url;
572 31
    }
573 31
574
    /**
575
     * Gets the page's access rootline.
576
     *
577
     * @return Rootline The page's access rootline
578
     */
579
    public function getPageAccessRootline()
580 2
    {
581
        return $this->pageAccessRootline;
582 2
    }
583
584
    /**
585
     * Sets the page's access rootline.
586
     *
587
     * @param Rootline $accessRootline The page's access rootline
588
     */
589
    public function setPageAccessRootline(Rootline $accessRootline)
590
    {
591
        $this->pageAccessRootline = $accessRootline;
592
    }
593
594
    /**
595
     * Gets the documents that have been sent to Solr
596
     *
597
     * @return array An array of \Apache_Solr_Document objects
598
     */
599
    public function getDocumentsSentToSolr()
600
    {
601
        return $this->documentsSentToSolr;
602
    }
603
}
604