Issues (216)

Classes/IndexQueue/PageIndexer.php (1 issue)

Severity
1
<?php
2
namespace ApacheSolrForTypo3\Solr\IndexQueue;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 3 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\Access\RootlineElement;
29
use ApacheSolrForTypo3\Solr\Domain\Index\PageIndexer\Helper\UriBuilder\AbstractUriStrategy;
30
use ApacheSolrForTypo3\Solr\Domain\Index\PageIndexer\Helper\UriStrategyFactory;
31
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
32
use TYPO3\CMS\Core\Utility\GeneralUtility;
33
34
/**
35
 * A special purpose indexer to index pages.
36
 *
37
 * In the case of pages we can't directly index the page records, we need to
38
 * retrieve the content that belongs to a page from tt_content, too. Also
39
 * plugins may be included on a page and thus may need to be executed.
40
 *
41
 * @author Ingo Renner <[email protected]>
42
 */
43
class PageIndexer extends Indexer
44
{
45
    /**
46
     * Indexes an item from the indexing queue.
47
     *
48
     * @param Item $item An index queue item
49 1
     * @return bool Whether indexing was successful
50
     */
51 1
    public function index(Item $item)
52
    {
53
        $this->setLogging($item);
54 1
55
        // check whether we should move on at all
56
        if (!$this->isPageIndexable($item)) {
57
            return false;
58 1
        }
59 1
60 1
        $solrConnections = $this->getSolrConnectionsByItem($item);
61
        foreach ($solrConnections as $systemLanguageUid => $solrConnection) {
62 1
            $contentAccessGroups = $this->getAccessGroupsFromContent($item, $systemLanguageUid);
63
64
            if (empty($contentAccessGroups)) {
65
                // might be an empty page w/no content elements or some TYPO3 error / bug
66
                // FIXME logging needed
67
                continue;
68 1
            }
69 1
70
            foreach ($contentAccessGroups as $userGroup) {
71
                $this->indexPage($item, $systemLanguageUid, $userGroup);
72
            }
73 1
        }
74
75
        return true;
76
    }
77
78
    /**
79
     * Checks whether we can index this page.
80
     *
81
     * @param Item $item The page we want to index encapsulated in an index queue item
82 1
     * @return bool True if we can index this page, FALSE otherwise
83
     */
84
    protected function isPageIndexable(Item $item)
85
    {
86
87
        // TODO do we still need this?
88 1
        // shouldn't those be sorted out by the record monitor / garbage collector already?
89 1
90
        $isIndexable = true;
91 1
        $record = $item->getRecord();
92 1
93
        if (isset($GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled'])
94
            && $record[$GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']]
95
        ) {
96
            $isIndexable = false;
97 1
        }
98
99
        return $isIndexable;
100
    }
101
102
    /**
103
     * Gets the Solr connections applicable for a page.
104
     *
105
     * The connections include the default connection and connections to be used
106
     * for translations of a page.
107
     *
108
     * @param Item $item An index queue item
109 1
     * @return array An array of ApacheSolrForTypo3\Solr\System\Solr\SolrConnection connections, the array's keys are the sys_language_uid of the language of the connection
110
     */
111 1
    protected function getSolrConnectionsByItem(Item $item)
112
    {
113 1
        $solrConnections = parent::getSolrConnectionsByItem($item);
114
115 1
        $page = $item->getRecord();
116
        // may use \TYPO3\CMS\Core\Utility\GeneralUtility::hideIfDefaultLanguage($page['l18n_cfg']) with TYPO3 4.6
117
        if ($page['l18n_cfg'] & 1) {
118
            // page is configured to hide the default translation -> remove Solr connection for default language
119
            unset($solrConnections[0]);
120 1
        }
121
122
        if (GeneralUtility::hideIfNotTranslated($page['l18n_cfg'])) {
123
            $accessibleSolrConnections = [];
124
            if (isset($solrConnections[0])) {
125
                $accessibleSolrConnections[0] = $solrConnections[0];
126
            }
127
128
            $translationOverlays = $this->pagesRepository->findTranslationOverlaysByPageId((int)$page['uid']);
129
130
            foreach ($translationOverlays as $overlay) {
131
                $languageId = $overlay['sys_language_uid'];
132
                if (array_key_exists($languageId, $solrConnections)) {
133
                    $accessibleSolrConnections[$languageId] = $solrConnections[$languageId];
134
                }
135
            }
136
137
            $solrConnections = $accessibleSolrConnections;
138 1
        }
139
140
        return $solrConnections;
141
    }
142
143
    /**
144
     * Finds the FE user groups used on a page including all groups of content
145
     * elements and groups of records of extensions that have correctly been
146
     * pushed through ContentObjectRenderer during rendering.
147
     *
148
     * @param Item $item Index queue item representing the current page to get the user groups from
149
     * @param int $language The sys_language_uid language ID
150 1
     * @return array Array of user group IDs
151
     */
152 1
    protected function getAccessGroupsFromContent(Item $item, $language = 0)
153
    {
154 1
        static $accessGroupsCache;
155 1
156 1
        $accessGroupsCacheEntryId = $item->getRecordUid() . '|' . $language;
157 1
        if (!isset($accessGroupsCache[$accessGroupsCacheEntryId])) {
158 1
            $request = $this->buildBasePageIndexerRequest();
159
            $request->setIndexQueueItem($item);
160 1
            $request->addAction('findUserGroups');
161 1
162
            $indexRequestUrl = $this->getDataUrl($item, $language);
163 1
            $response = $request->send($indexRequestUrl);
164 1
165 1
            $groups = $response->getActionResult('findUserGroups');
166
            if (is_array($groups)) {
0 ignored issues
show
The condition is_array($groups) is always true.
Loading history...
167
                $accessGroupsCache[$accessGroupsCacheEntryId] = $groups;
168 1
            }
169
170
            if ($this->loggingEnabled) {
171
                $this->logger->log(
172
                    SolrLogManager::INFO,
173
                    'Page Access Groups',
174
                    [
175
                        'item' => (array)$item,
176
                        'language' => $language,
177
                        'index request url' => $indexRequestUrl,
178
                        'request' => (array)$request,
179
                        'response' => (array)$response,
180
                        'groups' => $groups
181
                    ]
182
                );
183
            }
184 1
        }
185
186
        return $accessGroupsCache[$accessGroupsCacheEntryId];
187
    }
188
189
    // Utility methods
190
191
    /**
192
     * Builds a base page indexer request with configured headers and other
193
     * parameters.
194
     *
195 1
     * @return PageIndexerRequest Base page indexer request
196
     */
197 1
    protected function buildBasePageIndexerRequest()
198 1
    {
199
        $request = $this->getPageIndexerRequest();
200 1
        $request->setParameter('loggingEnabled', $this->loggingEnabled);
201
202
        if (!empty($this->options['authorization.'])) {
203
            $request->setAuthorizationCredentials(
204
                $this->options['authorization.']['username'],
205
                $this->options['authorization.']['password']
206
            );
207 1
        }
208
209
        if (!empty($this->options['frontendDataHelper.']['headers.'])) {
210
            foreach ($this->options['frontendDataHelper.']['headers.'] as $headerValue) {
211
                $request->addHeader($headerValue);
212
            }
213 1
        }
214
215
        if (!empty($this->options['frontendDataHelper.']['requestTimeout'])) {
216
            $request->setTimeout((float)$this->options['frontendDataHelper.']['requestTimeout']);
217 1
        }
218
219
        return $request;
220
    }
221
222
    /**
223
     * @return PageIndexerRequest
224
     */
225
    protected function getPageIndexerRequest()
226
    {
227
        return GeneralUtility::makeInstance(PageIndexerRequest::class);
228
    }
229
230
    /**
231
     * Determines a page ID's URL.
232
     *
233
     * Tries to find a domain record to use to build an URL for a given page ID
234
     * and then actually build and return the page URL.
235
     *
236
     * @param Item $item Item to index
237
     * @param int $language The language id
238
     * @return string URL to send the index request to
239 1
     * @throws \RuntimeException
240
     */
241 1
    protected function getDataUrl(Item $item, $language = 0)
242 1
    {
243 1
        $pageId = $item->getRecordUid();
244 1
        $strategy = $this->getUriStrategy($pageId);
245
        $mountPointParameter = $this->getMountPageDataUrlParameter($item);
246
        $dataUrl = $strategy->getPageIndexingUriFromPageItemAndLanguageId($item, $language, $mountPointParameter, $this->options);
247 1
248
        return $dataUrl;
249
    }
250
251
    /**
252
     * @param int $pageId
253
     * @return AbstractUriStrategy
254
     */
255
    protected function getUriStrategy($pageId)
256 1
    {
257
        return GeneralUtility::makeInstance(UriStrategyFactory::class)->getForPageId($pageId);
258
    }
259
260
    /**
261 1
     * Generates the MP URL parameter needed to access mount pages. If the item
262
     * is identified as being a mounted page, the &MP parameter is generated.
263
     *
264
     * @param Item $item Item to get an &MP URL parameter for
265
     * @return string &MP URL parameter if $item is a mounted page
266 1
     */
267
    protected function getMountPageDataUrlParameter(Item $item)
268
    {
269
        if (!$item->hasIndexingProperty('isMountedPage')) {
270 1
            return '';
271 1
        }
272 1
273 1
        return $item->getIndexingProperty('mountPageSource') . '-' . $item->getIndexingProperty('mountPageDestination');
274
    }
275 1
276
    #
277
    # Frontend User Groups Access
278
    #
279
280
    /**
281
     * Creates a single Solr Document for a page in a specific language and for
282
     * a specific frontend user group.
283
     *
284
     * @param Item $item The index queue item representing the page.
285
     * @param int $language The language to use.
286
     * @param int $userGroup The frontend user group to use.
287
     * @return PageIndexerResponse Page indexer response
288
     * @throws \RuntimeException if indexing an item failed
289
     */
290
    protected function indexPage(Item $item, $language = 0, $userGroup = 0)
291
    {
292
        $accessRootline = $this->getAccessRootline($item, $language, $userGroup);
293
        $request = $this->buildBasePageIndexerRequest();
294
        $request->setIndexQueueItem($item);
295
        $request->addAction('indexPage');
296 1
        $request->setParameter('accessRootline', (string)$accessRootline);
297
298
        $indexRequestUrl = $this->getDataUrl($item, $language);
299
        $response = $request->send($indexRequestUrl);
300
        $indexActionResult = $response->getActionResult('indexPage');
301
302
        if ($this->loggingEnabled) {
303
            $logSeverity = SolrLogManager::INFO;
304
            $logStatus = 'Info';
305
            if ($indexActionResult['pageIndexed']) {
306
                $logSeverity = SolrLogManager::NOTICE;
307
                $logStatus = 'Success';
308
            }
309
310
            $this->logger->log(
311
                $logSeverity,
312
                'Page Indexer: ' . $logStatus,
313
                [
314
                    'item' => (array)$item,
315
                    'language' => $language,
316
                    'user group' => $userGroup,
317 1
                    'index request url' => $indexRequestUrl,
318
                    'request' => (array)$request,
319
                    'request headers' => $request->getHeaders(),
320
                    'response' => (array)$response
321
                ]
322
            );
323
        }
324
325
        if (!$indexActionResult['pageIndexed']) {
326
            $message = 'Failed indexing page Index Queue item: ' . $item->getIndexQueueUid() . ' url: ' . $indexRequestUrl;
327 1
328
            throw new \RuntimeException($message, 1331837081);
329 1
        }
330 1
331
        return $response;
332
    }
333
334
    /**
335
     * Generates a page document's "Access Rootline".
336
     *
337
     * The Access Rootline collects frontend user group access restrictions set
338
     * for pages up in a page's rootline extended to sub-pages.
339
     *
340
     * The format is like this:
341
     * pageId1:group1,group2|groupId2:group3|c:group1,group4,groupN
342
     *
343
     * The single elements of the access rootline are separated by a pipe
344
     * character. All but the last elements represent pages, the last element
345
     * defines the access restrictions applied to the page's content elements
346
     * and records shown on the page.
347
     * Each page element is composed by the page ID of the page setting frontend
348
     * user access restrictions, a colon, and a comma separated list of frontend
349
     * user group IDs restricting access to the page.
350 1
     * The content access element does not have a page ID, instead it replaces
351
     * the ID by a lower case C.
352 1
     *
353 1
     * @param Item $item Index queue item representing the current page
354 1
     * @param int $language The sys_language_uid language ID
355 1
     * @param int $contentAccessGroup The user group to use for the content access rootline element. Optional, will be determined automatically if not set.
356 1
     * @return string An Access Rootline.
357
     */
358 1
    protected function getAccessRootline(Item $item, $language = 0, $contentAccessGroup = null)
359 1
    {
360 1
        static $accessRootlineCache;
361
362 1
        $mountPointParameter = $this->getMountPageDataUrlParameter($item);
363
364
        $accessRootlineCacheEntryId = $item->getRecordUid() . '|' . $language;
365
        if ($mountPointParameter !== '') {
366
            $accessRootlineCacheEntryId .= '|' . $mountPointParameter;
367
        }
368
        if (!is_null($contentAccessGroup)) {
369
            $accessRootlineCacheEntryId .= '|' . $contentAccessGroup;
370
        }
371
372
        if (!isset($accessRootlineCache[$accessRootlineCacheEntryId])) {
373
            $accessRootline = $this->getAccessRootlineByPageId($item->getRecordUid(), $mountPointParameter);
374
375
            // current page's content access groups
376
            $contentAccessGroups = [$contentAccessGroup];
377
            if (is_null($contentAccessGroup)) {
378
                $contentAccessGroups = $this->getAccessGroupsFromContent($item, $language);
379
            }
380
            $element = GeneralUtility::makeInstance(RootlineElement::class, /** @scrutinizer ignore-type */ 'c:' . implode(',', $contentAccessGroups));
381
            $accessRootline->push($element);
382
383
            $accessRootlineCache[$accessRootlineCacheEntryId] = $accessRootline;
384
        }
385 1
386
        return $accessRootlineCache[$accessRootlineCacheEntryId];
387
    }
388
389
    /**
390
     * Returns the access rootLine for a certain pageId.
391 1
     *
392
     * @param int $pageId
393
     * @param string $mountPointparameter
394
     * @return Rootline
395
     */
396
    protected function getAccessRootlineByPageId($pageId, $mountPointParameter)
397
    {
398
        return Rootline::getAccessRootlineByPageId($pageId, $mountPointParameter);
399
    }
400
401
}
402