Completed
Push — master ( ee3b45...4fa3ae )
by
unknown
16:22
created

initializeIndexerConfiguration()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 45
Code Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 23
nc 4
nop 2
dl 0
loc 45
rs 9.552
c 0
b 0
f 0
1
<?php
2
namespace TYPO3\CMS\IndexedSearch\Hook;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
use TYPO3\CMS\Core\Charset\CharsetConverter;
18
use TYPO3\CMS\Core\Charset\UnknownCharsetException;
19
use TYPO3\CMS\Core\Configuration\ExtensionConfiguration;
20
use TYPO3\CMS\Core\Context\Context;
21
use TYPO3\CMS\Core\Context\LanguageAspect;
22
use TYPO3\CMS\Core\TimeTracker\TimeTracker;
23
use TYPO3\CMS\Core\Utility\GeneralUtility;
24
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
25
use TYPO3\CMS\IndexedSearch\Indexer;
26
27
/**
28
 * Hooks for \TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController (TSFE).
29
 * @internal this is a TYPO3-internal hook implementation and not part of TYPO3's Core API.
30
 */
31
class TypoScriptFrontendHook
32
{
33
    /**
34
     * Frontend hook: If the page is not being re-generated this is our chance to force it to be
35
     * (because re-generation of the page is required in order to have the indexer called!)
36
     *
37
     * @param array $params Parameters from frontend
38
     * @param TypoScriptFrontendController $tsfe TSFE object
39
     */
40
    public function headerNoCache(array &$params, TypoScriptFrontendController $tsfe)
41
    {
42
        // Requirements are that the crawler is loaded, a crawler session is running and re-indexing requested as processing instruction:
43
        if (in_array('tx_indexedsearch_reindex', $tsfe->applicationData['tx_crawler']['parameters']['procInstructions'] ?? [], true)) {
44
            // Disables a look-up for cached page data - thus resulting in re-generation of the page even if cached.
45
            $params['disableAcquireCacheData'] = true;
46
            // Enable indexing
47
            $tsfe->applicationData['forceIndexing'] = true;
48
        }
49
    }
50
51
    /**
52
     * Trigger indexing of content, after evaluating if this page could / should be indexed.
53
     *
54
     * @param TypoScriptFrontendController $tsfe
55
     */
56
    public function hook_indexContent(TypoScriptFrontendController $tsfe)
0 ignored issues
show
Coding Style introduced by
Method name "TypoScriptFrontendHook::hook_indexContent" is not in camel caps format
Loading history...
57
    {
58
        // Determine if page should be indexed, and if so, configure and initialize indexer
59
        if (!$tsfe->config['config']['index_enable']) {
60
            return;
61
        }
62
63
        // Indexer configuration from Extension Manager interface:
64
        $disableFrontendIndexing = (bool)GeneralUtility::makeInstance(ExtensionConfiguration::class)->get('indexed_search', 'disableFrontendIndexing');
65
        $forceIndexing = $tsfe->applicationData['forceIndexing'] ?? false;
66
67
        $timeTracker = GeneralUtility::makeInstance(TimeTracker::class);
68
        $timeTracker->push('Index page');
69
        if ($disableFrontendIndexing && !$forceIndexing) {
70
            $timeTracker->setTSlogMessage('Index page? No, Ordinary Frontend indexing during rendering is disabled.');
71
            return;
72
        }
73
74
        if ($tsfe->page['no_search']) {
75
            $timeTracker->setTSlogMessage('Index page? No, The "No Search" flag has been set in the page properties!');
76
            return;
77
        }
78
        if ($tsfe->no_cache) {
79
            $timeTracker->setTSlogMessage('Index page? No, Ordinary Frontend indexing during rendering is disabled.');
80
            return;
81
        }
82
        /** @var LanguageAspect $languageAspect */
83
        $languageAspect = GeneralUtility::makeInstance(Context::class)->getAspect('language');
84
        if ($languageAspect->getId() !== $languageAspect->getContentId()) {
85
            $timeTracker->setTSlogMessage('Index page? No, languageId was different from contentId which indicates that the page contains fall-back content and that would be falsely indexed as localized content.');
86
            return;
87
        }
88
        // Init and start indexing
89
        $indexer = GeneralUtility::makeInstance(Indexer::class);
90
        $indexer->forceIndexing = $forceIndexing;
91
        $indexer->init($this->initializeIndexerConfiguration($tsfe, $languageAspect));
92
        $indexer->indexTypo3PageContent();
93
        $timeTracker->pull();
94
    }
95
96
    /**
97
     * Setting up internal configuration from config array based on TypoScriptFrontendController
98
     * Information about page for which the indexing takes place
99
     *
100
     * @param TypoScriptFrontendController $tsfe
101
     * @param LanguageAspect $languageAspect
102
     * @return array
103
     */
104
    protected function initializeIndexerConfiguration(TypoScriptFrontendController $tsfe, LanguageAspect $languageAspect): array
105
    {
106
        $pageArguments = $tsfe->getPageArguments();
107
        $configuration = [
108
            // Page id
109
            'id' => $tsfe->id,
110
            // Page type
111
            'type'=> $tsfe->type,
0 ignored issues
show
Coding Style introduced by
Expected 1 space before "=>"; 0 found
Loading history...
112
            // sys_language UID of the language of the indexing.
113
            'sys_language_uid' => $languageAspect->getId(),
114
            // MP variable, if any (Mount Points)
115
            'MP' => $tsfe->MP,
116
            // Group list
117
            'gr_list' => implode(',', GeneralUtility::makeInstance(Context::class)->getPropertyFromAspect('frontend.user', 'groupIds', [0, -1])),
118
            // page arguments array
119
            'staticPageArguments' => $pageArguments ? $pageArguments->getStaticArguments() : [],
0 ignored issues
show
introduced by
$pageArguments is of type TYPO3\CMS\Core\Routing\PageArguments, thus it always evaluated to true.
Loading history...
120
            // The creation date of the TYPO3 page
121
            'crdate' => $tsfe->page['crdate'],
122
            'rootline_uids' => [],
123
        ];
124
125
        // Root line uids
126
        foreach ($tsfe->config['rootLine'] as $rlkey => $rldat) {
127
            $configuration['rootline_uids'][$rlkey] = $rldat['uid'];
128
        }
129
        // Content of page
130
        $configuration['content'] = $tsfe->content;
131
        // Content string (HTML of TYPO3 page)
132
        $configuration['indexedDocTitle'] = $this->convOutputCharset($tsfe->indexedDocTitle, $tsfe->metaCharset);
133
        // Alternative title for indexing
134
        $configuration['metaCharset'] = $tsfe->metaCharset;
135
        // Character set of content (will be converted to utf-8 during indexing)
136
        $configuration['mtime'] = $tsfe->register['SYS_LASTCHANGED'] ?? $tsfe->page['SYS_LASTCHANGED'];
137
        // Most recent modification time (seconds) of the content on the page. Used to evaluate whether it should be re-indexed.
138
        // Configuration of behavior
139
        $configuration['index_externals'] = $tsfe->config['config']['index_externals'];
140
        // Whether to index external documents like PDF, DOC etc. (if possible)
141
        $configuration['index_descrLgd'] = $tsfe->config['config']['index_descrLgd'];
142
        // Length of description text (max 250, default 200)
143
        $configuration['index_metatags'] = $tsfe->config['config']['index_metatags'] ?? true;
144
        // Set to zero
145
        $configuration['recordUid'] = 0;
146
        $configuration['freeIndexUid'] = 0;
147
        $configuration['freeIndexSetId'] = 0;
148
        return $configuration;
149
    }
150
151
    /**
152
     * Converts input string from utf-8 to metaCharset IF the two charsets are different.
153
     *
154
     * @param string $content Content to be converted.
155
     * @param string $metaCharset
156
     * @return string Converted content string.
157
     */
158
    protected function convOutputCharset(string $content, string $metaCharset): string
159
    {
160
        if ($metaCharset !== 'utf-8') {
161
            $charsetConverter = GeneralUtility::makeInstance(CharsetConverter::class);
162
            try {
163
                $content = $charsetConverter->conv($content, 'utf-8', $metaCharset);
164
            } catch (UnknownCharsetException $e) {
165
                throw new \RuntimeException('Invalid config.metaCharset: ' . $e->getMessage(), 1508916285);
166
            }
167
        }
168
        return $content;
169
    }
170
}
171