Failed Conditions
Push — master ( 5f60a5...9b80eb )
by Rafael
21:42
created

Classes/IndexQueue/PageIndexer.php (1 issue)

Labels
Severity
1
<?php
2
namespace ApacheSolrForTypo3\Solr\IndexQueue;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 3 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\Access\RootlineElement;
29
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
30
use TYPO3\CMS\Backend\Utility\BackendUtility;
0 ignored issues
show
The type TYPO3\CMS\Backend\Utility\BackendUtility was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
31
use TYPO3\CMS\Core\Utility\GeneralUtility;
32
33
/**
34
 * A special purpose indexer to index pages.
35
 *
36
 * In the case of pages we can't directly index the page records, we need to
37
 * retrieve the content that belongs to a page from tt_content, too. Also
38
 * plugins may be included on a page and thus may need to be executed.
39
 *
40
 * @author Ingo Renner <[email protected]>
41
 */
42
class PageIndexer extends Indexer
43
{
44
    /**
45
     * Indexes an item from the indexing queue.
46
     *
47
     * @param Item $item An index queue item
48
     * @return bool Whether indexing was successful
49
     */
50 1
    public function index(Item $item)
51
    {
52 1
        $this->setLogging($item);
53
54
        // check whether we should move on at all
55 1
        if (!$this->isPageIndexable($item)) {
56
            return false;
57
        }
58
59 1
        $solrConnections = $this->getSolrConnectionsByItem($item);
60 1
        foreach ($solrConnections as $systemLanguageUid => $solrConnection) {
61 1
            $contentAccessGroups = $this->getAccessGroupsFromContent($item,
62 1
                $systemLanguageUid);
63
64 1
            if (empty($contentAccessGroups)) {
65
                // might be an empty page w/no content elements or some TYPO3 error / bug
66
                // FIXME logging needed
67
                continue;
68
            }
69
70 1
            foreach ($contentAccessGroups as $userGroup) {
71 1
                $this->indexPage($item, $systemLanguageUid, $userGroup);
72
            }
73
        }
74
75 1
        return true;
76
    }
77
78
    /**
79
     * Checks whether we can index this page.
80
     *
81
     * @param Item $item The page we want to index encapsulated in an index queue item
82
     * @return bool True if we can index this page, FALSE otherwise
83
     */
84 1
    protected function isPageIndexable(Item $item)
85
    {
86
87
        // TODO do we still need this?
88
        // shouldn't those be sorted out by the record monitor / garbage collector already?
89
90 1
        $isIndexable = true;
91 1
        $record = $item->getRecord();
92
93 1
        if (isset($GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled'])
94 1
            && $record[$GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']]
95
        ) {
96
            $isIndexable = false;
97
        }
98
99 1
        return $isIndexable;
100
    }
101
102
    /**
103
     * Gets the Solr connections applicable for a page.
104
     *
105
     * The connections include the default connection and connections to be used
106
     * for translations of a page.
107
     *
108
     * @param Item $item An index queue item
109
     * @return array An array of ApacheSolrForTypo3\Solr\System\Solr\SolrConnection connections, the array's keys are the sys_language_uid of the language of the connection
110
     */
111 1
    protected function getSolrConnectionsByItem(Item $item)
112
    {
113 1
        $solrConnections = parent::getSolrConnectionsByItem($item);
114
115 1
        $page = $item->getRecord();
116
        // may use \TYPO3\CMS\Core\Utility\GeneralUtility::hideIfDefaultLanguage($page['l18n_cfg']) with TYPO3 4.6
117 1
        if ($page['l18n_cfg'] & 1) {
118
            // page is configured to hide the default translation -> remove Solr connection for default language
119
            unset($solrConnections[0]);
120
        }
121
122 1
        if (GeneralUtility::hideIfNotTranslated($page['l18n_cfg'])) {
123
            $accessibleSolrConnections = [];
124
            if (isset($solrConnections[0])) {
125
                $accessibleSolrConnections[0] = $solrConnections[0];
126
            }
127
128
            $translationOverlays = $this->pagesRepository->findTranslationOverlaysByPageId((int)$page['uid']);
129
130
            foreach ($translationOverlays as $overlay) {
131
                $languageId = $overlay['sys_language_uid'];
132
                if (array_key_exists($languageId, $solrConnections)) {
133
                    $accessibleSolrConnections[$languageId] = $solrConnections[$languageId];
134
                }
135
            }
136
137
            $solrConnections = $accessibleSolrConnections;
138
        }
139
140 1
        return $solrConnections;
141
    }
142
143
    /**
144
     * Finds the FE user groups used on a page including all groups of content
145
     * elements and groups of records of extensions that have correctly been
146
     * pushed through ContentObjectRenderer during rendering.
147
     *
148
     * @param Item $item Index queue item representing the current page to get the user groups from
149
     * @param int $language The sys_language_uid language ID
150
     * @return array Array of user group IDs
151
     */
152 1
    protected function getAccessGroupsFromContent(Item $item, $language = 0)
153
    {
154 1
        static $accessGroupsCache;
155
156 1
        $accessGroupsCacheEntryId = $item->getRecordUid() . '|' . $language;
157 1
        if (!isset($accessGroupsCache[$accessGroupsCacheEntryId])) {
158 1
            $request = $this->buildBasePageIndexerRequest();
159 1
            $request->setIndexQueueItem($item);
160 1
            $request->addAction('findUserGroups');
161
162 1
            $indexRequestUrl = $this->getDataUrl($item, $language);
163 1
            $response = $request->send($indexRequestUrl);
164
165 1
            $groups = $response->getActionResult('findUserGroups');
166 1
            if (is_array($groups)) {
167 1
                $accessGroupsCache[$accessGroupsCacheEntryId] = $groups;
168
            }
169
170 1
            if ($this->loggingEnabled) {
171
                $this->logger->log(
172
                    SolrLogManager::INFO,
173
                    'Page Access Groups',
174
                    [
175
                        'item' => (array)$item,
176
                        'language' => $language,
177
                        'index request url' => $indexRequestUrl,
178
                        'request' => (array)$request,
179
                        'response' => (array)$response,
180
                        'groups' => $groups
181
                    ]
182
                );
183
            }
184
        }
185
186 1
        return $accessGroupsCache[$accessGroupsCacheEntryId];
187
    }
188
189
    // Utility methods
190
191
    /**
192
     * Builds a base page indexer request with configured headers and other
193
     * parameters.
194
     *
195
     * @return PageIndexerRequest Base page indexer request
196
     */
197 1
    protected function buildBasePageIndexerRequest()
198
    {
199 1
        $request = GeneralUtility::makeInstance(PageIndexerRequest::class);
200 1
        $request->setParameter('loggingEnabled', $this->loggingEnabled);
201
202 1
        if (!empty($this->options['authorization.'])) {
203
            $request->setAuthorizationCredentials(
204
                $this->options['authorization.']['username'],
205
                $this->options['authorization.']['password']
206
            );
207
        }
208
209 1
        if (!empty($this->options['frontendDataHelper.']['headers.'])) {
210
            foreach ($this->options['frontendDataHelper.']['headers.'] as $headerValue) {
211
                $request->addHeader($headerValue);
212
            }
213
        }
214
215 1
        if (!empty($this->options['frontendDataHelper.']['requestTimeout'])) {
216
            $request->setTimeout((float)$this->options['frontendDataHelper.']['requestTimeout']);
217
        }
218
219 1
        return $request;
220
    }
221
222
    /**
223
     * Determines a page ID's URL.
224
     *
225
     * Tries to find a domain record to use to build an URL for a given page ID
226
     * and then actually build and return the page URL.
227
     *
228
     * @param Item $item Item to index
229
     * @param int $language The language id
230
     * @return string URL to send the index request to
231
     * @throws \RuntimeException
232
     */
233 1
    protected function getDataUrl(Item $item, $language = 0)
234
    {
235 1
        $scheme = 'http';
236 1
        $host = $item->getSite()->getDomain();
237 1
        $path = '/';
238 1
        $pageId = $item->getRecordUid();
239
240
        // deprecated
241 1
        if (!empty($this->options['scheme'])) {
242
            $this->logger->log(
243
                SolrLogManager::INFO,
244
                'Using deprecated option "scheme" to set the scheme (http / https) for the page indexer frontend helper. Use plugin.tx_solr.index.queue.pages.indexer.frontendDataHelper.scheme instead'
245
            );
246
            $scheme = $this->options['scheme'];
247
        }
248
249
        // check whether we should use ssl / https
250 1
        if (!empty($this->options['frontendDataHelper.']['scheme'])) {
251
            $scheme = $this->options['frontendDataHelper.']['scheme'];
252
        }
253
254
        // overwriting the host
255 1
        if (!empty($this->options['frontendDataHelper.']['host'])) {
256
            $host = $this->options['frontendDataHelper.']['host'];
257
        }
258
259
        // setting a path if TYPO3 is installed in a sub directory
260 1
        if (!empty($this->options['frontendDataHelper.']['path'])) {
261
            $path = $this->options['frontendDataHelper.']['path'];
262
        }
263
264 1
        $mountPointParameter = $this->getMountPageDataUrlParameter($item);
265 1
        $dataUrl = $scheme . '://' . $host . $path . 'index.php?id=' . $pageId;
266 1
        $dataUrl .= ($mountPointParameter !== '') ? '&MP=' . $mountPointParameter : '';
267 1
        $dataUrl .= '&L=' . $language;
268
269 1
        if (!GeneralUtility::isValidUrl($dataUrl)) {
270
            $this->logger->log(
271
                SolrLogManager::ERROR,
272
                'Could not create a valid URL to get frontend data while trying to index a page.',
273
                [
274
                    'item' => (array)$item,
275
                    'constructed URL' => $dataUrl,
276
                    'scheme' => $scheme,
277
                    'host' => $host,
278
                    'path' => $path,
279
                    'page ID' => $pageId,
280
                    'indexer options' => $this->options
281
                ]
282
            );
283
284
            throw new \RuntimeException(
285
                'Could not create a valid URL to get frontend data while trying to index a page. Created URL: ' . $dataUrl,
286
                1311080805
287
            );
288
        }
289
290 1
        if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['IndexQueuePageIndexer']['dataUrlModifier']) {
291
            $dataUrlModifier = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['IndexQueuePageIndexer']['dataUrlModifier']);
292
293
            if ($dataUrlModifier instanceof PageIndexerDataUrlModifier) {
294
                $dataUrl = $dataUrlModifier->modifyDataUrl($dataUrl, [
295
                    'item' => $item,
296
                    'scheme' => $scheme,
297
                    'host' => $host,
298
                    'path' => $path,
299
                    'pageId' => $pageId,
300
                    'language' => $language
301
                ]);
302
            } else {
303
                throw new \RuntimeException(
304
                    $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['IndexQueuePageIndexer']['dataUrlModifier']
305
                    . ' is not an implementation of ApacheSolrForTypo3\Solr\IndexQueue\PageIndexerDataUrlModifier',
306
                    1290523345
307
                );
308
            }
309
        }
310
311 1
        return $dataUrl;
312
    }
313
314
    /**
315
     * Generates the MP URL parameter needed to access mount pages. If the item
316
     * is identified as being a mounted page, the &MP parameter is generated.
317
     *
318
     * @param Item $item Item to get an &MP URL parameter for
319
     * @return string &MP URL parameter if $item is a mounted page
320
     */
321 1
    protected function getMountPageDataUrlParameter(Item $item)
322
    {
323 1
        $mountPageUrlParameter = '';
324
325 1
        if ($item->hasIndexingProperty('isMountedPage')) {
326
            $mountPageUrlParameter =
327
                $item->getIndexingProperty('mountPageSource')
328
                . '-'
329
                . $item->getIndexingProperty('mountPageDestination');
330
        }
331
332 1
        return $mountPageUrlParameter;
333
    }
334
335
    #
336
    # Frontend User Groups Access
337
    #
338
339
    /**
340
     * Creates a single Solr Document for a page in a specific language and for
341
     * a specific frontend user group.
342
     *
343
     * @param Item $item The index queue item representing the page.
344
     * @param int $language The language to use.
345
     * @param int $userGroup The frontend user group to use.
346
     * @return PageIndexerResponse Page indexer response
347
     * @throws \RuntimeException if indexing an item failed
348
     */
349 1
    protected function indexPage(Item $item, $language = 0, $userGroup = 0)
350
    {
351 1
        $accessRootline = $this->getAccessRootline($item, $language,
352 1
            $userGroup);
353
354 1
        $request = $this->buildBasePageIndexerRequest();
355 1
        $request->setIndexQueueItem($item);
356 1
        $request->addAction('indexPage');
357 1
        $request->setParameter('accessRootline', (string)$accessRootline);
358
359 1
        $indexRequestUrl = $this->getDataUrl($item, $language);
360 1
        $response = $request->send($indexRequestUrl);
361 1
        $indexActionResult = $response->getActionResult('indexPage');
362
363 1
        if ($this->loggingEnabled) {
364
            $logSeverity = SolrLogManager::INFO;
365
            $logStatus = 'Info';
366
            if ($indexActionResult['pageIndexed']) {
367
                $logSeverity = SolrLogManager::NOTICE;
368
                $logStatus = 'Success';
369
            }
370
371
            $this->logger->log(
372
                $logSeverity,
373
                'Page Indexer: ' . $logStatus,
374
                [
375
                    'item' => (array)$item,
376
                    'language' => $language,
377
                    'user group' => $userGroup,
378
                    'index request url' => $indexRequestUrl,
379
                    'request' => (array)$request,
380
                    'request headers' => $request->getHeaders(),
381
                    'response' => (array)$response
382
                ]
383
            );
384
        }
385
386 1
        if (!$indexActionResult['pageIndexed']) {
387
            $message = 'Failed indexing page Index Queue item: ' . $item->getIndexQueueUid() . ' url: ' . $indexRequestUrl;
388
389
            throw new \RuntimeException($message, 1331837081);
390
        }
391
392 1
        return $response;
393
    }
394
395
    /**
396
     * Generates a page document's "Access Rootline".
397
     *
398
     * The Access Rootline collects frontend user group access restrictions set
399
     * for pages up in a page's rootline extended to sub-pages.
400
     *
401
     * The format is like this:
402
     * pageId1:group1,group2|groupId2:group3|c:group1,group4,groupN
403
     *
404
     * The single elements of the access rootline are separated by a pipe
405
     * character. All but the last elements represent pages, the last element
406
     * defines the access restrictions applied to the page's content elements
407
     * and records shown on the page.
408
     * Each page element is composed by the page ID of the page setting frontend
409
     * user access restrictions, a colon, and a comma separated list of frontend
410
     * user group IDs restricting access to the page.
411
     * The content access element does not have a page ID, instead it replaces
412
     * the ID by a lower case C.
413
     *
414
     * @param Item $item Index queue item representing the current page
415
     * @param int $language The sys_language_uid language ID
416
     * @param int $contentAccessGroup The user group to use for the content access rootline element. Optional, will be determined automatically if not set.
417
     * @return string An Access Rootline.
418
     */
419 1
    protected function getAccessRootline(
420
        Item $item,
421
        $language = 0,
422
        $contentAccessGroup = null
423
    ) {
424 1
        static $accessRootlineCache;
425
426 1
        $mountPointParameter = $this->getMountPageDataUrlParameter($item);
427
428 1
        $accessRootlineCacheEntryId = $item->getRecordUid() . '|' . $language;
429 1
        if ($mountPointParameter !== '') {
430
            $accessRootlineCacheEntryId .= '|' . $mountPointParameter;
431
        }
432 1
        if (!is_null($contentAccessGroup)) {
433 1
            $accessRootlineCacheEntryId .= '|' . $contentAccessGroup;
434
        }
435
436 1
        if (!isset($accessRootlineCache[$accessRootlineCacheEntryId])) {
437 1
            $accessRootline = Rootline::getAccessRootlineByPageId(
438 1
                $item->getRecordUid(),
439 1
                $mountPointParameter
440
            );
441
442
            // current page's content access groups
443 1
            $contentAccessGroups = [$contentAccessGroup];
444 1
            if (is_null($contentAccessGroup)) {
445
                $contentAccessGroups = $this->getAccessGroupsFromContent($item, $language);
446
            }
447 1
            $element = GeneralUtility::makeInstance(RootlineElement::class, 'c:' . implode(',', $contentAccessGroups));
448 1
            $accessRootline->push($element);
449
450 1
            $accessRootlineCache[$accessRootlineCacheEntryId] = $accessRootline;
451
        }
452
453 1
        return $accessRootlineCache[$accessRootlineCacheEntryId];
454
    }
455
}
456