Completed
Push — master ( 7a82fc...a55b2b )
by
unknown
14:42
created

TypoScriptFrontendHook::indexPageContent()   B

Complexity

Conditions 6
Paths 5

Size

Total Lines 34
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 21
nc 5
nop 2
dl 0
loc 34
rs 8.9617
c 0
b 0
f 0
1
<?php
2
namespace TYPO3\CMS\IndexedSearch\Hook;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
use TYPO3\CMS\Core\Charset\CharsetConverter;
18
use TYPO3\CMS\Core\Charset\UnknownCharsetException;
19
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
20
use TYPO3\CMS\Core\Context\Context;
21
use TYPO3\CMS\Core\Context\LanguageAspect;
22
use TYPO3\CMS\Core\TimeTracker\TimeTracker;
23
use TYPO3\CMS\Core\Utility\GeneralUtility;
24
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
25
use TYPO3\CMS\IndexedSearch\Indexer;
26
27
/**
28
 * Hooks for \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController (TSFE).
29
 * @internal this is a TYPO3-internal hook implementation and not part of TYPO3's Core API.
30
 */
31
class TypoScriptFrontendHook
32
{
33
    /**
34
     * Frontend hook: If the page is not being re-generated this is our chance to force it to be
35
     * (because re-generation of the page is required in order to have the indexer called!)
36
     *
37
     * @param array $params Parameters from frontend
38
     * @param TypoScriptFrontendController $tsfe TSFE object
39
     */
40
    public function headerNoCache(array &$params, TypoScriptFrontendController $tsfe)
41
    {
42
        // Requirements are that the crawler is loaded, a crawler session is running and re-indexing requested as processing instruction:
43
        if (in_array('tx_indexedsearch_reindex', $tsfe->applicationData['tx_crawler']['parameters']['procInstructions'] ?? [], true)) {
44
            // Disables a look-up for cached page data - thus resulting in re-generation of the page even if cached.
45
            $params['disableAcquireCacheData'] = true;
46
            // Enable indexing
47
            $tsfe->applicationData['forceIndexing'] = true;
48
        }
49
    }
50
51
    /**
52
     * Trigger indexing of content, after evaluating if this page could / should be indexed.
53
     *
54
     * @param array $parameters
55
     * @param TypoScriptFrontendController $tsfe
56
     */
57
    public function indexPageContent(array $parameters, TypoScriptFrontendController $tsfe)
0 ignored issues
show
Unused Code introduced by
The parameter $parameters is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

57
    public function indexPageContent(/** @scrutinizer ignore-unused */ array $parameters, TypoScriptFrontendController $tsfe)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
58
    {
59
        // Determine if page should be indexed, and if so, configure and initialize indexer
60
        if (!$tsfe->config['config']['index_enable']) {
61
            return;
62
        }
63
64
        // Indexer configuration from Extension Manager interface:
65
        $disableFrontendIndexing = (bool)GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('indexed_search', 'disableFrontendIndexing');
66
        $forceIndexing = $tsfe->applicationData['forceIndexing'] ?? false;
67
68
        $timeTracker = GeneralUtility::makeInstance(TimeTracker::class);
69
        $timeTracker->push('Index page');
70
        if ($disableFrontendIndexing && !$forceIndexing) {
71
            $timeTracker->setTSlogMessage('Index page? No, Ordinary Frontend indexing during rendering is disabled.');
72
            return;
73
        }
74
75
        if ($tsfe->page['no_search']) {
76
            $timeTracker->setTSlogMessage('Index page? No, The "No Search" flag has been set in the page properties!');
77
            return;
78
        }
79
        /** @var LanguageAspect $languageAspect */
80
        $languageAspect = GeneralUtility::makeInstance(Context::class)->getAspect('language');
81
        if ($languageAspect->getId() !== $languageAspect->getContentId()) {
82
            $timeTracker->setTSlogMessage('Index page? No, languageId was different from contentId which indicates that the page contains fall-back content and that would be falsely indexed as localized content.');
83
            return;
84
        }
85
        // Init and start indexing
86
        $indexer = GeneralUtility::makeInstance(Indexer::class);
87
        $indexer->forceIndexing = $forceIndexing;
88
        $indexer->init($this->initializeIndexerConfiguration($tsfe, $languageAspect));
89
        $indexer->indexTypo3PageContent();
90
        $timeTracker->pull();
91
    }
92
93
    /**
94
     * Setting up internal configuration from config array based on TypoScriptFrontendController
95
     * Information about page for which the indexing takes place
96
     *
97
     * @param TypoScriptFrontendController $tsfe
98
     * @param LanguageAspect $languageAspect
99
     * @return array
100
     */
101
    protected function initializeIndexerConfiguration(TypoScriptFrontendController $tsfe, LanguageAspect $languageAspect): array
102
    {
103
        $pageArguments = $tsfe->getPageArguments();
104
        $configuration = [
105
            // Page id
106
            'id' => $tsfe->id,
107
            // Page type
108
            'type'=> $tsfe->type,
0 ignored issues
show
Coding Style introduced by
Expected 1 space before "=>"; 0 found
Loading history...
109
            // sys_language UID of the language of the indexing.
110
            'sys_language_uid' => $languageAspect->getId(),
111
            // MP variable, if any (Mount Points)
112
            'MP' => $tsfe->MP,
113
            // Group list
114
            'gr_list' => implode(',', GeneralUtility::makeInstance(Context::class)->getPropertyFromAspect('frontend.user', 'groupIds', [0, -1])),
115
            // page arguments array
116
            'staticPageArguments' => $pageArguments->getStaticArguments(),
117
            // The creation date of the TYPO3 page
118
            'crdate' => $tsfe->page['crdate'],
119
            'rootline_uids' => [],
120
        ];
121
122
        // Root line uids
123
        foreach ($tsfe->config['rootLine'] as $rlkey => $rldat) {
124
            $configuration['rootline_uids'][$rlkey] = $rldat['uid'];
125
        }
126
        // Content of page
127
        $configuration['content'] = $this->convOutputCharset($tsfe->content, $tsfe->metaCharset);
128
        // Content string (HTML of TYPO3 page)
129
        $configuration['indexedDocTitle'] = $this->convOutputCharset($tsfe->indexedDocTitle, $tsfe->metaCharset);
130
        // Alternative title for indexing
131
        $configuration['metaCharset'] = $tsfe->metaCharset;
132
        // Character set of content (will be converted to utf-8 during indexing)
133
        $configuration['mtime'] = $tsfe->register['SYS_LASTCHANGED'] ?? $tsfe->page['SYS_LASTCHANGED'];
134
        // Most recent modification time (seconds) of the content on the page. Used to evaluate whether it should be re-indexed.
135
        // Configuration of behavior
136
        $configuration['index_externals'] = $tsfe->config['config']['index_externals'];
137
        // Whether to index external documents like PDF, DOC etc. (if possible)
138
        $configuration['index_descrLgd'] = $tsfe->config['config']['index_descrLgd'];
139
        // Length of description text (max 250, default 200)
140
        $configuration['index_metatags'] = $tsfe->config['config']['index_metatags'] ?? true;
141
        // Set to zero
142
        $configuration['recordUid'] = 0;
143
        $configuration['freeIndexUid'] = 0;
144
        $configuration['freeIndexSetId'] = 0;
145
        return $configuration;
146
    }
147
148
    /**
149
     * Converts input string from utf-8 to metaCharset IF the two charsets are different.
150
     *
151
     * @param string $content Content to be converted.
152
     * @param string $metaCharset
153
     * @return string Converted content string.
154
     */
155
    protected function convOutputCharset(string $content, string $metaCharset): string
156
    {
157
        if ($metaCharset !== 'utf-8') {
158
            $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
159
            try {
160
                $content = $charsetConverter->conv($content, 'utf-8', $metaCharset);
161
            } catch (UnknownCharsetException $e) {
162
                throw new \RuntimeException('Invalid config.metaCharset: ' . $e->getMessage(), 1508916285);
163
            }
164
        }
165
        return $content;
166
    }
167
}
168