Issues (202)

Classes/IndexQueue/PageIndexer.php (1 issue)

Severity
1
<?php
2
namespace ApacheSolrForTypo3\Solr\IndexQueue;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2009-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 3 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Access\Rootline;
28
use ApacheSolrForTypo3\Solr\Access\RootlineElement;
29
use ApacheSolrForTypo3\Solr\Domain\Index\PageIndexer\Helper\UriBuilder\AbstractUriStrategy;
30
use ApacheSolrForTypo3\Solr\Domain\Index\PageIndexer\Helper\UriStrategyFactory;
31
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager;
32
use TYPO3\CMS\Core\Utility\GeneralUtility;
33
34
/**
35
 * A special purpose indexer to index pages.
36
 *
37
 * In the case of pages we can't directly index the page records, we need to
38
 * retrieve the content that belongs to a page from tt_content, too. Also
39
 * plugins may be included on a page and thus may need to be executed.
40
 *
41
 * @author Ingo Renner <[email protected]>
42
 */
43
class PageIndexer extends Indexer
44
{
45
    /**
46
     * Indexes an item from the indexing queue.
47
     *
48
     * @param Item $item An index queue item
49
     * @return bool Whether indexing was successful
50 1
     */
51
    public function index(Item $item)
52 1
    {
53
        $this->setLogging($item);
54
55 1
        // check whether we should move on at all
56
        if (!$this->isPageIndexable($item)) {
57
            return false;
58
        }
59 1
60 1
        $solrConnections = $this->getSolrConnectionsByItem($item);
61 1
        foreach ($solrConnections as $systemLanguageUid => $solrConnection) {
62 1
            $contentAccessGroups = $this->getAccessGroupsFromContent($item, $systemLanguageUid);
63
64 1
            if (empty($contentAccessGroups)) {
65
                // might be an empty page w/no content elements or some TYPO3 error / bug
66
                // FIXME logging needed
67
                continue;
68
            }
69
70 1
            foreach ($contentAccessGroups as $userGroup) {
71 1
                $this->indexPage($item, $systemLanguageUid, $userGroup);
72
            }
73
        }
74
75 1
        return true;
76
    }
77
78
    /**
79
     * Checks whether we can index this page.
80
     *
81
     * @param Item $item The page we want to index encapsulated in an index queue item
82
     * @return bool True if we can index this page, FALSE otherwise
83
     */
84 1
    protected function isPageIndexable(Item $item)
85
    {
86
87
        // TODO do we still need this?
88
        // shouldn't those be sorted out by the record monitor / garbage collector already?
89
90 1
        $isIndexable = true;
91 1
        $record = $item->getRecord();
92
93 1
        if (isset($GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled'])
94 1
            && $record[$GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']]
95
        ) {
96
            $isIndexable = false;
97
        }
98
99 1
        return $isIndexable;
100
    }
101
102
    /**
103
     * Gets the Solr connections applicable for a page.
104
     *
105
     * The connections include the default connection and connections to be used
106
     * for translations of a page.
107
     *
108
     * @param Item $item An index queue item
109
     * @return array An array of ApacheSolrForTypo3\Solr\System\Solr\SolrConnection connections, the array's keys are the sys_language_uid of the language of the connection
110
     */
111 1
    protected function getSolrConnectionsByItem(Item $item)
112
    {
113 1
        $solrConnections = parent::getSolrConnectionsByItem($item);
114
115 1
        $page = $item->getRecord();
116
        // may use \TYPO3\CMS\Core\Utility\GeneralUtility::hideIfDefaultLanguage($page['l18n_cfg']) with TYPO3 4.6
117 1
        if ($page['l18n_cfg'] & 1) {
118
            // page is configured to hide the default translation -> remove Solr connection for default language
119
            unset($solrConnections[0]);
120
        }
121
122 1
        if (GeneralUtility::hideIfNotTranslated($page['l18n_cfg'])) {
123
            $accessibleSolrConnections = [];
124
            if (isset($solrConnections[0])) {
125
                $accessibleSolrConnections[0] = $solrConnections[0];
126
            }
127
128
            $translationOverlays = $this->pagesRepository->findTranslationOverlaysByPageId((int)$page['uid']);
129
130
            foreach ($translationOverlays as $overlay) {
131
                $languageId = $overlay['sys_language_uid'];
132
                if (array_key_exists($languageId, $solrConnections)) {
133
                    $accessibleSolrConnections[$languageId] = $solrConnections[$languageId];
134
                }
135
            }
136
137
            $solrConnections = $accessibleSolrConnections;
138
        }
139
140 1
        return $solrConnections;
141
    }
142
143
    /**
144
     * Finds the FE user groups used on a page including all groups of content
145
     * elements and groups of records of extensions that have correctly been
146
     * pushed through ContentObjectRenderer during rendering.
147
     *
148
     * @param Item $item Index queue item representing the current page to get the user groups from
149
     * @param int $language The sys_language_uid language ID
150
     * @return array Array of user group IDs
151
     */
152 1
    protected function getAccessGroupsFromContent(Item $item, $language = 0)
153
    {
154 1
        static $accessGroupsCache;
155
156 1
        $accessGroupsCacheEntryId = $item->getRecordUid() . '|' . $language;
157 1
        if (!isset($accessGroupsCache[$accessGroupsCacheEntryId])) {
158 1
            $request = $this->buildBasePageIndexerRequest();
159 1
            $request->setIndexQueueItem($item);
160 1
            $request->addAction('findUserGroups');
161
162 1
            $indexRequestUrl = $this->getDataUrl($item, $language);
163 1
            $response = $request->send($indexRequestUrl);
164
165 1
            $groups = $response->getActionResult('findUserGroups');
166 1
            if (is_array($groups)) {
0 ignored issues
show
The condition is_array($groups) is always true.
Loading history...
167 1
                $accessGroupsCache[$accessGroupsCacheEntryId] = $groups;
168
            }
169
170 1
            if ($this->loggingEnabled) {
171
                $this->logger->log(
172
                    SolrLogManager::INFO,
173
                    'Page Access Groups',
174
                    [
175
                        'item' => (array)$item,
176
                        'language' => $language,
177
                        'index request url' => $indexRequestUrl,
178
                        'request' => (array)$request,
179
                        'response' => (array)$response,
180
                        'groups' => $groups
181
                    ]
182
                );
183
            }
184
        }
185
186 1
        return $accessGroupsCache[$accessGroupsCacheEntryId];
187
    }
188
189
    // Utility methods
190
191
    /**
192
     * Builds a base page indexer request with configured headers and other
193
     * parameters.
194
     *
195
     * @return PageIndexerRequest Base page indexer request
196
     */
197 1
    protected function buildBasePageIndexerRequest()
198
    {
199 1
        $request = $this->getPageIndexerRequest();
200 1
        $request->setParameter('loggingEnabled', $this->loggingEnabled);
201
202 1
        if (!empty($this->options['authorization.'])) {
203
            $request->setAuthorizationCredentials(
204
                $this->options['authorization.']['username'],
205
                $this->options['authorization.']['password']
206
            );
207
        }
208
209 1
        if (!empty($this->options['frontendDataHelper.']['headers.'])) {
210
            foreach ($this->options['frontendDataHelper.']['headers.'] as $headerValue) {
211
                $request->addHeader($headerValue);
212
            }
213
        }
214
215 1
        if (!empty($this->options['frontendDataHelper.']['requestTimeout'])) {
216
            $request->setTimeout((float)$this->options['frontendDataHelper.']['requestTimeout']);
217
        }
218
219 1
        return $request;
220
    }
221
222
    /**
223
     * @return PageIndexerRequest
224
     */
225
    protected function getPageIndexerRequest()
226
    {
227
        return GeneralUtility::makeInstance(PageIndexerRequest::class);
228
    }
229
230
    /**
231
     * Determines a page ID's URL.
232
     *
233 1
     * Tries to find a domain record to use to build an URL for a given page ID
234
     * and then actually build and return the page URL.
235 1
     *
236 1
     * @param Item $item Item to index
237 1
     * @param int $language The language id
238 1
     * @return string URL to send the index request to
239
     * @throws \RuntimeException
240
     */
241 1
    protected function getDataUrl(Item $item, $language = 0)
242
    {
243
        $pageId = $item->getRecordUid();
244
        $strategy = $this->getUriStrategy($pageId);
245
        $mountPointParameter = $this->getMountPageDataUrlParameter($item);
246
        $dataUrl = $strategy->getPageIndexingUriFromPageItemAndLanguageId($item, $language, $mountPointParameter, $this->options);
247
248
        return $dataUrl;
249
    }
250 1
251
    /**
252
     * @param int $pageId
253
     * @return AbstractUriStrategy
254
     */
255 1
    protected function getUriStrategy($pageId)
256
    {
257
        return GeneralUtility::makeInstance(UriStrategyFactory::class)->getForPageId($pageId);
258
    }
259
260 1
    /**
261
     * Generates the MP URL parameter needed to access mount pages. If the item
262
     * is identified as being a mounted page, the &MP parameter is generated.
263
     *
264 1
     * @param Item $item Item to get an &MP URL parameter for
265 1
     * @return string &MP URL parameter if $item is a mounted page
266 1
     */
267 1
    protected function getMountPageDataUrlParameter(Item $item)
268
    {
269 1
        if (!$item->hasIndexingProperty('isMountedPage')) {
270
            return '';
271
        }
272
273
        return $item->getIndexingProperty('mountPageSource') . '-' . $item->getIndexingProperty('mountPageDestination');
274
    }
275
276
    #
277
    # Frontend User Groups Access
278
    #
279
280
    /**
281
     * Creates a single Solr Document for a page in a specific language and for
282
     * a specific frontend user group.
283
     *
284
     * @param Item $item The index queue item representing the page.
285
     * @param int $language The language to use.
286
     * @param int $userGroup The frontend user group to use.
287
     * @return PageIndexerResponse Page indexer response
288
     * @throws \RuntimeException if indexing an item failed
289
     */
290 1
    protected function indexPage(Item $item, $language = 0, $userGroup = 0)
291
    {
292
        $accessRootline = $this->getAccessRootline($item, $language, $userGroup);
293
        $request = $this->buildBasePageIndexerRequest();
294
        $request->setIndexQueueItem($item);
295
        $request->addAction('indexPage');
296
        $request->setParameter('accessRootline', (string)$accessRootline);
297
298
        $indexRequestUrl = $this->getDataUrl($item, $language);
299
        $response = $request->send($indexRequestUrl);
300
        $indexActionResult = $response->getActionResult('indexPage');
301
302
        if ($this->loggingEnabled) {
303
            $logSeverity = SolrLogManager::INFO;
304
            $logStatus = 'Info';
305
            if ($indexActionResult['pageIndexed']) {
306
                $logSeverity = SolrLogManager::NOTICE;
307
                $logStatus = 'Success';
308
            }
309
310
            $this->logger->log(
311 1
                $logSeverity,
312
                'Page Indexer: ' . $logStatus,
313
                [
314
                    'item' => (array)$item,
315
                    'language' => $language,
316
                    'user group' => $userGroup,
317
                    'index request url' => $indexRequestUrl,
318
                    'request' => (array)$request,
319
                    'request headers' => $request->getHeaders(),
320
                    'response' => (array)$response
321 1
                ]
322
            );
323 1
        }
324
325 1
        if (!$indexActionResult['pageIndexed']) {
326
            $message = 'Failed indexing page Index Queue item: ' . $item->getIndexQueueUid() . ' url: ' . $indexRequestUrl;
327
328
            throw new \RuntimeException($message, 1331837081);
329
        }
330
331
        return $response;
332 1
    }
333
334
    /**
335
     * Generates a page document's "Access Rootline".
336
     *
337
     * The Access Rootline collects frontend user group access restrictions set
338
     * for pages up in a page's rootline extended to sub-pages.
339
     *
340
     * The format is like this:
341
     * pageId1:group1,group2|groupId2:group3|c:group1,group4,groupN
342
     *
343
     * The single elements of the access rootline are separated by a pipe
344
     * character. All but the last elements represent pages, the last element
345
     * defines the access restrictions applied to the page's content elements
346
     * and records shown on the page.
347
     * Each page element is composed by the page ID of the page setting frontend
348
     * user access restrictions, a colon, and a comma separated list of frontend
349 1
     * user group IDs restricting access to the page.
350
     * The content access element does not have a page ID, instead it replaces
351 1
     * the ID by a lower case C.
352 1
     *
353
     * @param Item $item Index queue item representing the current page
354 1
     * @param int $language The sys_language_uid language ID
355 1
     * @param int $contentAccessGroup The user group to use for the content access rootline element. Optional, will be determined automatically if not set.
356 1
     * @return string An Access Rootline.
357 1
     */
358
    protected function getAccessRootline(Item $item, $language = 0, $contentAccessGroup = null)
359 1
    {
360 1
        static $accessRootlineCache;
361 1
362
        $mountPointParameter = $this->getMountPageDataUrlParameter($item);
363 1
364
        $accessRootlineCacheEntryId = $item->getRecordUid() . '|' . $language;
365
        if ($mountPointParameter !== '') {
366
            $accessRootlineCacheEntryId .= '|' . $mountPointParameter;
367
        }
368
        if (!is_null($contentAccessGroup)) {
369
            $accessRootlineCacheEntryId .= '|' . $contentAccessGroup;
370
        }
371
372
        if (!isset($accessRootlineCache[$accessRootlineCacheEntryId])) {
373
            $accessRootline = $this->getAccessRootlineByPageId($item->getRecordUid(), $mountPointParameter);
374
375
            // current page's content access groups
376
            $contentAccessGroups = [$contentAccessGroup];
377
            if (is_null($contentAccessGroup)) {
378
                $contentAccessGroups = $this->getAccessGroupsFromContent($item, $language);
379
            }
380
            $element = GeneralUtility::makeInstance(RootlineElement::class, /** @scrutinizer ignore-type */ 'c:' . implode(',', $contentAccessGroups));
381
            $accessRootline->push($element);
382
383
            $accessRootlineCache[$accessRootlineCacheEntryId] = $accessRootline;
384
        }
385
386 1
        return $accessRootlineCache[$accessRootlineCacheEntryId];
387
    }
388
389
    /**
390
     * Returns the access rootLine for a certain pageId.
391
     *
392 1
     * @param int $pageId
393
     * @param string $mountPointparameter
394
     * @return Rootline
395
     */
396
    protected function getAccessRootlineByPageId($pageId, $mountPointParameter)
397
    {
398
        return Rootline::getAccessRootlineByPageId($pageId, $mountPointParameter);
399
    }
400
401
}
402