Completed
Push — master ( b6b629...72d15a )
by Timo
10s
created

Typo3PageIndexer::getPageUrl()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 0
cts 4
cp 0
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 0
crap 2
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\ConnectionManager;
29
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
30
use ApacheSolrForTypo3\Solr\IndexQueue\Item;
31
use ApacheSolrForTypo3\Solr\IndexQueue\Queue;
32
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration;
33
use TYPO3\CMS\Core\Utility\GeneralUtility;
34
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
35
36
/**
37
 * Page Indexer to index TYPO3 pages used by the Index Queue.
38
 *
39
 * @author Ingo Renner <[email protected]>
40
 * @author Daniel Poetzinger <[email protected]>
41
 * @author Timo Schmidt <[email protected]>
42
 */
43
class Typo3PageIndexer
44
{
45
46
    /**
47
     * ID of the current page's Solr document.
48
     *
49
     * @var string
50
     */
51
    protected static $pageSolrDocumentId = '';
52
    /**
53
     * The Solr document generated for the current page.
54
     *
55
     * @var \Apache_Solr_Document
56
     */
57
    protected static $pageSolrDocument = null;
58
    /**
59
     * The mount point parameter used in the Frontend controller.
60
     *
61
     * @var string
62
     */
63
    protected $mountPointParameter;
64
    /**
65
     * Solr server connection.
66
     *
67
     * @var SolrService
68
     */
69
    protected $solrConnection = null;
70
    /**
71
     * Frontend page object (TSFE).
72
     *
73
     * @var TypoScriptFrontendController
74
     */
75
    protected $page = null;
76
    /**
77
     * Content extractor to extract content from TYPO3 pages
78
     *
79
     * @var Typo3PageContentExtractor
80
     */
81
    protected $contentExtractor = null;
82
    /**
83
     * URL to be indexed as the page's URL
84
     *
85
     * @var string
86
     */
87
    protected $pageUrl = '';
88
    /**
89
     * The page's access rootline
90
     *
91
     * @var Rootline
92
     */
93
    protected $pageAccessRootline = null;
94
    /**
95
     * Documents that have been sent to Solr
96
     *
97
     * @var array
98
     */
99
    protected $documentsSentToSolr = array();
100
101
    /**
102
     * @var TypoScriptConfiguration
103
     */
104
    protected $configuration;
105
106
    /**
107
     * @var Item
108
     */
109
    protected $indexQueueItem;
110
111
    /**
112
     * Constructor
113
     *
114
     * @param TypoScriptFrontendController $page The page to index
115
     */
116 33
    public function __construct(TypoScriptFrontendController $page)
117
    {
118 33
        $this->page = $page;
119 33
        $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
120 33
        $this->configuration = Util::getSolrConfiguration();
121
122
        try {
123 33
            $this->initializeSolrConnection();
124
        } catch (\Exception $e) {
125
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 3);
126
127
            // TODO extract to a class "ExceptionLogger"
128
            if ($this->configuration->getLoggingExceptions()) {
129
                GeneralUtility::devLog('Exception while trying to index a page',
130
                    'solr', 3, array(
131
                        $e->__toString()
132
                    ));
133
            }
134
        }
135
136 33
        $this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content);
137 33
        $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, '');
138 33
    }
139
140
    /**
141
     * @param Item $indexQueueItem
142
     */
143 3
    public function setIndexQueueItem($indexQueueItem)
144
    {
145 3
        $this->indexQueueItem = $indexQueueItem;
146 3
    }
147
148
149
    /**
150
     * Initializes the Solr server connection.
151
     *
152
     * @throws    \Exception when no Solr connection can be established.
153
     */
154 33
    protected function initializeSolrConnection()
155
    {
156 33
        $solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid);
157
158
        // do not continue if no server is available
159 33
        if (!$solr->ping()) {
160
            throw new \Exception(
161
                'No Solr instance available while trying to index a page.',
162
                1234790825
163
            );
164
        }
165
166 33
        $this->solrConnection = $solr;
167 33
    }
168
169
    /**
170
     * Logs messages to devlog and TS log (admin panel)
171
     *
172
     * @param string $message Message to set
173
     * @param int $errorNum Error number
174
     * @param array $data Additional data to log
175
     * @return void
176
     */
177 33
    protected function log($message, $errorNum = 0, array $data = array())
178
    {
179 33
        if (is_object($GLOBALS['TT'])) {
180 33
            $GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum);
181
        }
182
183 33
        if ($this->configuration->getLoggingIndexing()) {
184
            $logData = array();
185
            if (!empty($data)) {
186
                foreach ($data as $value) {
187
                    $logData[] = (array)$value;
188
                }
189
            }
190
191
            GeneralUtility::devLog($message, 'solr', $errorNum, $logData);
192
        }
193 33
    }
194
195
    /**
196
     * Gets the current page's Solr document ID.
197
     *
198
     * @return string|NULL The page's Solr document ID or NULL in case no document was generated yet.
199
     */
200
    public static function getPageSolrDocumentId()
201
    {
202
        return self::$pageSolrDocumentId;
203
    }
204
205
    /**
206
     * Gets the Solr document generated for the current page.
207
     *
208
     * @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet.
209
     */
210 3
    public static function getPageSolrDocument()
211
    {
212 3
        return self::$pageSolrDocument;
213
    }
214
215
    /**
216
     * Allows to provide a Solr server connection other than the one
217
     * initialized by the constructor.
218
     *
219
     * @param SolrService $solrConnection Solr connection
220
     * @throws \Exception if the Solr server cannot be reached
221
     */
222 3
    public function setSolrConnection(SolrService $solrConnection)
223
    {
224 3
        if (!$solrConnection->ping()) {
225
            throw new \Exception(
226
                'Could not connect to Solr server.',
227
                1323946472
228
            );
229
        }
230
231 3
        $this->solrConnection = $solrConnection;
232 3
    }
233
234
    /**
235
     * Indexes a page.
236
     *
237
     * @return bool TRUE after successfully indexing the page, FALSE on error
238
     * @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor
239
     */
240 33
    public function indexPage()
241
    {
242 33
        $pageIndexed = false;
243 33
        $documents = array(); // this will become useful as soon as when starting to index individual records instead of whole pages
244
245 33
        if (is_null($this->solrConnection)) {
246
            // intended early return as it doesn't make sense to continue
247
            // and waste processing time if the solr server isn't available
248
            // anyways
249
            // FIXME use an exception
250
            return $pageIndexed;
251
        }
252
253 33
        $pageDocument = $this->getPageDocument();
254 33
        $pageDocument = $this->substitutePageDocument($pageDocument);
255
256 33
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) {
257
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
258
                $postProcessor = GeneralUtility::getUserObj($classReference);
259
260
                if ($postProcessor instanceof PageDocumentPostProcessor) {
261
                    $postProcessor->postProcessPageDocument($pageDocument, $this->page);
262
                } else {
263
                    throw new \UnexpectedValueException(
264
                        get_class($pageDocument) . ' must implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor',
265
                        1397739154
266
                    );
267
                }
268
            }
269
        }
270
271 33
        self::$pageSolrDocument = $pageDocument;
272 33
        $documents[] = $pageDocument;
273 33
        $documents = $this->getAdditionalDocuments($pageDocument, $documents);
274 33
        $this->processDocuments($documents);
275
276 33
        $pageIndexed = $this->addDocumentsToSolrIndex($documents);
277 33
        $this->documentsSentToSolr = $documents;
278
279 33
        return $pageIndexed;
280
    }
281
282
    /**
283
     * Builds the Solr document for the current page.
284
     *
285
     * @return \Apache_Solr_Document A document representing the page
286
     */
287 33
    protected function getPageDocument()
288
    {
289 33
        $document = GeneralUtility::makeInstance('Apache_Solr_Document');
290
        /* @var $document \Apache_Solr_Document */
291 33
        $site = Site::getSiteByPageId($this->page->id);
292 33
        $pageRecord = $this->page->page;
293
294 33
        self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId(
295 33
            $this->page->id,
296 33
            $this->page->type,
297 33
            $this->page->sys_language_uid,
298 33
            $this->getDocumentIdGroups(),
299 33
            $this->getMountPointParameter()
300
        );
301 33
        $document->setField('id', $documentId);
302 33
        $document->setField('site', $site->getDomain());
303 33
        $document->setField('siteHash', $site->getSiteHash());
304 33
        $document->setField('appKey', 'EXT:solr');
305 33
        $document->setField('type', 'pages');
306
307
        // system fields
308 33
        $document->setField('uid', $this->page->id);
309 33
        $document->setField('pid', $pageRecord['pid']);
310
311
        // variantId
312 33
        $document->setField('variantId', 'pages/' . $this->page->id);
313
314 33
        $document->setField('typeNum', $this->page->type);
315 33
        $document->setField('created', $pageRecord['crdate']);
316 33
        $document->setField('changed', $pageRecord['SYS_LASTCHANGED']);
317
318 33
        $rootline = $this->page->id;
319 33
        $mountPointParameter = $this->getMountPointParameter();
320 33
        if ($mountPointParameter !== '') {
321 30
            $rootline .= ',' . $mountPointParameter;
322
        }
323 33
        $document->setField('rootline', $rootline);
324
325
        // access
326 33
        $access = (string)$this->pageAccessRootline;
327 33
        if (trim($access) !== '') {
328 6
            $document->setField('access', $access);
329
        }
330 33
        if ($this->page->page['endtime']) {
331
            $document->setField('endtime', $pageRecord['endtime']);
332
        }
333
334
        // content
335 33
        $document->setField('title', $this->contentExtractor->getPageTitle());
336 33
        $document->setField('subTitle', $pageRecord['subtitle']);
337 33
        $document->setField('navTitle', $pageRecord['nav_title']);
338 33
        $document->setField('author', $pageRecord['author']);
339 33
        $document->setField('description', $pageRecord['description']);
340 33
        $document->setField('abstract', $pageRecord['abstract']);
341 33
        $document->setField('content',
342 33
            $this->contentExtractor->getIndexableContent());
343 33
        $document->setField('url', $this->pageUrl);
344
345
        // keywords, multi valued
346 33
        $keywords = array_unique(GeneralUtility::trimExplode(
347 33
            ',',
348 33
            $pageRecord['keywords'],
349 33
            true
350
        ));
351 33
        foreach ($keywords as $keyword) {
352
            $document->addField('keywords', $keyword);
353
        }
354
355
        // content from several tags like headers, anchors, ...
356 33
        $tagContent = $this->contentExtractor->getTagContent();
357 33
        foreach ($tagContent as $fieldName => $fieldValue) {
358
            $document->setField($fieldName, $fieldValue);
359
        }
360
361 33
        return $document;
362
    }
363
364
    /**
365
     * Gets a comma separated list of frontend user groups to use for the
366
     * document ID.
367
     *
368
     * @return string A comma separated list of frontend user groups.
369
     */
370 33
    protected function getDocumentIdGroups()
371
    {
372 33
        $groups = $this->pageAccessRootline->getGroups();
373 33
        $groups = Rootline::cleanGroupArray($groups);
374
375 33
        if (empty($groups)) {
376 29
            $groups[] = 0;
377
        }
378
379 33
        $groups = implode(',', $groups);
380
381 33
        return $groups;
382
    }
383
384
    // Logging
385
    // TODO replace by a central logger
386
387
    /**
388
     * Gets the mount point parameter that is used in the Frontend controller.
389
     *
390
     * @return string
391
     */
392 33
    public function getMountPointParameter()
393
    {
394 33
        return $this->mountPointParameter;
395
    }
396
397
    // Misc
398
399
    /**
400
     * Sets the mount point parameter that is used in the Frontend controller.
401
     *
402
     * @param string $mountPointParameter
403
     */
404 3
    public function setMountPointParameter($mountPointParameter)
405
    {
406 3
        $this->mountPointParameter = (string)$mountPointParameter;
407 3
    }
408
409
    /**
410
     * Allows third party extensions to replace or modify the page document
411
     * created by this indexer.
412
     *
413
     * @param \Apache_Solr_Document $pageDocument The page document created by this indexer.
414
     * @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page
415
     */
416 33
    protected function substitutePageDocument(\Apache_Solr_Document $pageDocument)
417
    {
418 33
        if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) {
419 30
            return $pageDocument;
420
        }
421
422 3
        $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
423 3
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
424 3
            $substituteIndexer = GeneralUtility::getUserObj($classReference);
425
426 3
            if (!$substituteIndexer instanceof SubstitutePageIndexer) {
427
                $message = get_class($substituteIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\SubstitutePageIndexer';
428
                throw new \UnexpectedValueException($message, 1310491001);
429
            }
430
431 3
            if ($substituteIndexer instanceof PageFieldMappingIndexer) {
432 3
                $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
433
            }
434
435 3
            $substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
436 3
            if (!$substituteDocument instanceof \Apache_Solr_Document) {
0 ignored issues
show
Bug introduced by
The class Apache_Solr_Document does not exist. Did you forget a USE statement, or did you not list all dependencies?

This error could be the result of:

1. Missing dependencies

PHP Analyzer uses your composer.json file (if available) to determine the dependencies of your project and to determine all the available classes and functions. It expects the composer.json to be in the root folder of your repository.

Are you sure this class is defined by one of your dependencies, or did you maybe not list a dependency in either the require or require-dev section?

2. Missing use statement

PHP does not complain about undefined classes in ìnstanceof checks. For example, the following PHP code will work perfectly fine:

if ($x instanceof DoesNotExist) {
    // Do something.
}

If you have not tested against this specific condition, such errors might go unnoticed.

Loading history...
437
                $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.';
438
                throw new \UnexpectedValueException($message, 1310490952);
439
            }
440 3
            $pageDocument = $substituteDocument;
441
        }
442
443 3
        return $pageDocument;
444
    }
445
446
    /**
447
     * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
448
     *
449
     * @return string
450
     */
451 3
    protected function getIndexConfigurationNameForCurrentPage()
452
    {
453 3
        return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
454
    }
455
456
    /**
457
     * Allows third party extensions to provide additional documents which
458
     * should be indexed for the current page.
459
     *
460
     * @param \Apache_Solr_Document $pageDocument The main document representing this page.
461
     * @param array $existingDocuments An array of documents already created for this page.
462
     * @return array An array of additional \Apache_Solr_Document objects to index
463
     */
464 33
    protected function getAdditionalDocuments(
465
        \Apache_Solr_Document $pageDocument,
466
        array $existingDocuments
467
    ) {
468 33
        $documents = $existingDocuments;
469
470 33
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) {
471
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) {
472
                $additionalIndexer = GeneralUtility::getUserObj($classReference);
473
474
                if ($additionalIndexer instanceof AdditionalPageIndexer) {
475
                    $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument,
476
                        $documents);
477
478
                    if (is_array($additionalDocuments)) {
479
                        $documents = array_merge($documents,
480
                            $additionalDocuments);
481
                    }
482
                } else {
483
                    throw new \UnexpectedValueException(
484
                        get_class($additionalIndexer) . ' must implement interface ApacheSolrForTypo3\Solr\AdditionalPageIndexer',
485
                        1310491024
486
                    );
487
                }
488
            }
489
        }
490
491 33
        return $documents;
492
    }
493
494
    /**
495
     * Sends the given documents to the field processing service which takes
496
     * care of manipulating fields as defined in the field's configuration.
497
     *
498
     * @param array $documents An array of documents to manipulate
499
     */
500 33
    protected function processDocuments(array $documents)
501
    {
502 33
        $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
503 33
        if (count($processingInstructions) > 0) {
504 33
            $service = GeneralUtility::makeInstance('ApacheSolrForTypo3\\Solr\\FieldProcessor\\Service');
505 33
            $service->processDocuments($documents, $processingInstructions);
506
        }
507 33
    }
508
509
    /**
510
     * Adds the collected documents to the Solr index.
511
     *
512
     * @param array $documents An array of \Apache_Solr_Document objects.
513
     * @return bool TRUE if documents were added successfully, FALSE otherwise
514
     */
515 33
    protected function addDocumentsToSolrIndex(array $documents)
516
    {
517 33
        $documentsAdded = false;
518
519 33
        if (!count($documents)) {
520
            return $documentsAdded;
521
        }
522
523
        try {
524 33
            $this->log('Adding ' . count($documents) . ' documents.', 0,
525
                $documents);
526
527
            // chunk adds by 20
528 33
            $documentChunks = array_chunk($documents, 20);
529 33
            foreach ($documentChunks as $documentChunk) {
530 33
                $response = $this->solrConnection->addDocuments($documentChunk);
531
532 33
                if ($response->getHttpStatus() != 200) {
533
                    $transportException = new \Apache_Solr_HttpTransportException($response);
534
                    throw new \RuntimeException('Solr Request failed.',
535 33
                        1331834983, $transportException);
536
                }
537
            }
538
539 33
            $documentsAdded = true;
540
        } catch (\Exception $e) {
541
            $this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2);
542
543
            if ($this->configuration->getLoggingExceptions()) {
544
                GeneralUtility::devLog('Exception while adding documents',
545
                    'solr', 3, array(
546
                        $e->__toString()
547
                    ));
548
            }
549
        }
550
551 33
        return $documentsAdded;
552
    }
553
554
    /**
555
     * Gets the current page's URL.
556
     *
557
     * @return string URL of the current page.
558
     */
559
    public function getPageUrl()
560
    {
561
        return $this->pageUrl;
562
    }
563
564
    /**
565
     * Sets the URL to use for the page document.
566
     *
567
     * @param string $url The page's URL.
568
     */
569 3
    public function setPageUrl($url)
570
    {
571 3
        $this->pageUrl = $url;
572 3
    }
573
574
    /**
575
     * Gets the page's access rootline.
576
     *
577
     * @return Rootline The page's access rootline
578
     */
579
    public function getPageAccessRootline()
580
    {
581
        return $this->pageAccessRootline;
582
    }
583
584
    /**
585
     * Sets the page's access rootline.
586
     *
587
     * @param Rootline $accessRootline The page's access rootline
588
     */
589 32
    public function setPageAccessRootline(Rootline $accessRootline)
590
    {
591 32
        $this->pageAccessRootline = $accessRootline;
592 32
    }
593
594
    /**
595
     * Gets the documents that have been sent to Solr
596
     *
597
     * @return array An array of \Apache_Solr_Document objects
598
     */
599 3
    public function getDocumentsSentToSolr()
600
    {
601 3
        return $this->documentsSentToSolr;
602
    }
603
}
604