1
|
|
|
<?php |
2
|
|
|
namespace ApacheSolrForTypo3\Solr\IndexQueue; |
3
|
|
|
|
4
|
|
|
/*************************************************************** |
5
|
|
|
* Copyright notice |
6
|
|
|
* |
7
|
|
|
* (c) 2009-2015 Ingo Renner <[email protected]> |
8
|
|
|
* All rights reserved |
9
|
|
|
* |
10
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
11
|
|
|
* free software; you can redistribute it and/or modify |
12
|
|
|
* it under the terms of the GNU General Public License as published by |
13
|
|
|
* the Free Software Foundation; either version 3 of the License, or |
14
|
|
|
* (at your option) any later version. |
15
|
|
|
* |
16
|
|
|
* The GNU General Public License can be found at |
17
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
18
|
|
|
* |
19
|
|
|
* This script is distributed in the hope that it will be useful, |
20
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
21
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22
|
|
|
* GNU General Public License for more details. |
23
|
|
|
* |
24
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
25
|
|
|
***************************************************************/ |
26
|
|
|
|
27
|
|
|
use ApacheSolrForTypo3\Solr\Access\Rootline; |
28
|
|
|
use ApacheSolrForTypo3\Solr\Access\RootlineElement; |
29
|
|
|
use ApacheSolrForTypo3\Solr\Domain\Index\PageIndexer\Helper\UriBuilder\AbstractUriStrategy; |
30
|
|
|
use ApacheSolrForTypo3\Solr\Domain\Index\PageIndexer\Helper\UriStrategyFactory; |
31
|
|
|
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager; |
32
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* A special purpose indexer to index pages. |
36
|
|
|
* |
37
|
|
|
* In the case of pages we can't directly index the page records, we need to |
38
|
|
|
* retrieve the content that belongs to a page from tt_content, too. Also |
39
|
|
|
* plugins may be included on a page and thus may need to be executed. |
40
|
|
|
* |
41
|
|
|
* @author Ingo Renner <[email protected]> |
42
|
|
|
*/ |
43
|
|
|
class PageIndexer extends Indexer |
44
|
|
|
{ |
45
|
|
|
/** |
46
|
|
|
* Indexes an item from the indexing queue. |
47
|
|
|
* |
48
|
|
|
* @param Item $item An index queue item |
49
|
|
|
* @return bool Whether indexing was successful |
50
|
1 |
|
*/ |
51
|
|
|
public function index(Item $item) |
52
|
1 |
|
{ |
53
|
|
|
$this->setLogging($item); |
54
|
|
|
|
55
|
1 |
|
// check whether we should move on at all |
56
|
|
|
if (!$this->isPageIndexable($item)) { |
57
|
|
|
return false; |
58
|
|
|
} |
59
|
1 |
|
|
60
|
1 |
|
$solrConnections = $this->getSolrConnectionsByItem($item); |
61
|
1 |
|
foreach ($solrConnections as $systemLanguageUid => $solrConnection) { |
62
|
1 |
|
$contentAccessGroups = $this->getAccessGroupsFromContent($item, $systemLanguageUid); |
63
|
|
|
|
64
|
1 |
|
if (empty($contentAccessGroups)) { |
65
|
|
|
// might be an empty page w/no content elements or some TYPO3 error / bug |
66
|
|
|
// FIXME logging needed |
67
|
|
|
continue; |
68
|
|
|
} |
69
|
|
|
|
70
|
1 |
|
foreach ($contentAccessGroups as $userGroup) { |
71
|
1 |
|
$this->indexPage($item, $systemLanguageUid, $userGroup); |
72
|
|
|
} |
73
|
|
|
} |
74
|
|
|
|
75
|
1 |
|
return true; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* Checks whether we can index this page. |
80
|
|
|
* |
81
|
|
|
* @param Item $item The page we want to index encapsulated in an index queue item |
82
|
|
|
* @return bool True if we can index this page, FALSE otherwise |
83
|
|
|
*/ |
84
|
1 |
|
protected function isPageIndexable(Item $item) |
85
|
|
|
{ |
86
|
|
|
|
87
|
|
|
// TODO do we still need this? |
88
|
|
|
// shouldn't those be sorted out by the record monitor / garbage collector already? |
89
|
|
|
|
90
|
1 |
|
$isIndexable = true; |
91
|
1 |
|
$record = $item->getRecord(); |
92
|
|
|
|
93
|
1 |
|
if (isset($GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']) |
94
|
1 |
|
&& $record[$GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']] |
95
|
|
|
) { |
96
|
|
|
$isIndexable = false; |
97
|
|
|
} |
98
|
|
|
|
99
|
1 |
|
return $isIndexable; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* Gets the Solr connections applicable for a page. |
104
|
|
|
* |
105
|
|
|
* The connections include the default connection and connections to be used |
106
|
|
|
* for translations of a page. |
107
|
|
|
* |
108
|
|
|
* @param Item $item An index queue item |
109
|
|
|
* @return array An array of ApacheSolrForTypo3\Solr\System\Solr\SolrConnection connections, the array's keys are the sys_language_uid of the language of the connection |
110
|
|
|
*/ |
111
|
1 |
|
protected function getSolrConnectionsByItem(Item $item) |
112
|
|
|
{ |
113
|
1 |
|
$solrConnections = parent::getSolrConnectionsByItem($item); |
114
|
|
|
|
115
|
1 |
|
$page = $item->getRecord(); |
116
|
|
|
// may use \TYPO3\CMS\Core\Utility\GeneralUtility::hideIfDefaultLanguage($page['l18n_cfg']) with TYPO3 4.6 |
117
|
1 |
|
if ($page['l18n_cfg'] & 1) { |
118
|
|
|
// page is configured to hide the default translation -> remove Solr connection for default language |
119
|
|
|
unset($solrConnections[0]); |
120
|
|
|
} |
121
|
|
|
|
122
|
1 |
|
if (GeneralUtility::hideIfNotTranslated($page['l18n_cfg'])) { |
123
|
|
|
$accessibleSolrConnections = []; |
124
|
|
|
if (isset($solrConnections[0])) { |
125
|
|
|
$accessibleSolrConnections[0] = $solrConnections[0]; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
$translationOverlays = $this->pagesRepository->findTranslationOverlaysByPageId((int)$page['uid']); |
129
|
|
|
|
130
|
|
|
foreach ($translationOverlays as $overlay) { |
131
|
|
|
$languageId = $overlay['sys_language_uid']; |
132
|
|
|
if (array_key_exists($languageId, $solrConnections)) { |
133
|
|
|
$accessibleSolrConnections[$languageId] = $solrConnections[$languageId]; |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
$solrConnections = $accessibleSolrConnections; |
138
|
|
|
} |
139
|
|
|
|
140
|
1 |
|
return $solrConnections; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Finds the FE user groups used on a page including all groups of content |
145
|
|
|
* elements and groups of records of extensions that have correctly been |
146
|
|
|
* pushed through ContentObjectRenderer during rendering. |
147
|
|
|
* |
148
|
|
|
* @param Item $item Index queue item representing the current page to get the user groups from |
149
|
|
|
* @param int $language The sys_language_uid language ID |
150
|
|
|
* @return array Array of user group IDs |
151
|
|
|
*/ |
152
|
1 |
|
protected function getAccessGroupsFromContent(Item $item, $language = 0) |
153
|
|
|
{ |
154
|
1 |
|
static $accessGroupsCache; |
155
|
|
|
|
156
|
1 |
|
$accessGroupsCacheEntryId = $item->getRecordUid() . '|' . $language; |
157
|
1 |
|
if (!isset($accessGroupsCache[$accessGroupsCacheEntryId])) { |
158
|
1 |
|
$request = $this->buildBasePageIndexerRequest(); |
159
|
1 |
|
$request->setIndexQueueItem($item); |
160
|
1 |
|
$request->addAction('findUserGroups'); |
161
|
|
|
|
162
|
1 |
|
$indexRequestUrl = $this->getDataUrl($item, $language); |
163
|
1 |
|
$response = $request->send($indexRequestUrl); |
164
|
|
|
|
165
|
1 |
|
$groups = $response->getActionResult('findUserGroups'); |
166
|
1 |
|
if (is_array($groups)) { |
|
|
|
|
167
|
1 |
|
$accessGroupsCache[$accessGroupsCacheEntryId] = $groups; |
168
|
|
|
} |
169
|
|
|
|
170
|
1 |
|
if ($this->loggingEnabled) { |
171
|
|
|
$this->logger->log( |
172
|
|
|
SolrLogManager::INFO, |
173
|
|
|
'Page Access Groups', |
174
|
|
|
[ |
175
|
|
|
'item' => (array)$item, |
176
|
|
|
'language' => $language, |
177
|
|
|
'index request url' => $indexRequestUrl, |
178
|
|
|
'request' => (array)$request, |
179
|
|
|
'response' => (array)$response, |
180
|
|
|
'groups' => $groups |
181
|
|
|
] |
182
|
|
|
); |
183
|
|
|
} |
184
|
|
|
} |
185
|
|
|
|
186
|
1 |
|
return $accessGroupsCache[$accessGroupsCacheEntryId]; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
// Utility methods |
190
|
|
|
|
191
|
|
|
/** |
192
|
|
|
* Builds a base page indexer request with configured headers and other |
193
|
|
|
* parameters. |
194
|
|
|
* |
195
|
|
|
* @return PageIndexerRequest Base page indexer request |
196
|
|
|
*/ |
197
|
1 |
|
protected function buildBasePageIndexerRequest() |
198
|
|
|
{ |
199
|
1 |
|
$request = $this->getPageIndexerRequest(); |
200
|
1 |
|
$request->setParameter('loggingEnabled', $this->loggingEnabled); |
201
|
|
|
|
202
|
1 |
|
if (!empty($this->options['authorization.'])) { |
203
|
|
|
$request->setAuthorizationCredentials( |
204
|
|
|
$this->options['authorization.']['username'], |
205
|
|
|
$this->options['authorization.']['password'] |
206
|
|
|
); |
207
|
|
|
} |
208
|
|
|
|
209
|
1 |
|
if (!empty($this->options['frontendDataHelper.']['headers.'])) { |
210
|
|
|
foreach ($this->options['frontendDataHelper.']['headers.'] as $headerValue) { |
211
|
|
|
$request->addHeader($headerValue); |
212
|
|
|
} |
213
|
|
|
} |
214
|
|
|
|
215
|
1 |
|
if (!empty($this->options['frontendDataHelper.']['requestTimeout'])) { |
216
|
|
|
$request->setTimeout((float)$this->options['frontendDataHelper.']['requestTimeout']); |
217
|
|
|
} |
218
|
|
|
|
219
|
1 |
|
return $request; |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
/** |
223
|
|
|
* @return PageIndexerRequest |
224
|
|
|
*/ |
225
|
|
|
protected function getPageIndexerRequest() |
226
|
|
|
{ |
227
|
|
|
return GeneralUtility::makeInstance(PageIndexerRequest::class); |
228
|
|
|
} |
229
|
|
|
|
230
|
|
|
/** |
231
|
|
|
* Determines a page ID's URL. |
232
|
|
|
* |
233
|
1 |
|
* Tries to find a domain record to use to build an URL for a given page ID |
234
|
|
|
* and then actually build and return the page URL. |
235
|
1 |
|
* |
236
|
1 |
|
* @param Item $item Item to index |
237
|
1 |
|
* @param int $language The language id |
238
|
1 |
|
* @return string URL to send the index request to |
239
|
|
|
* @throws \RuntimeException |
240
|
|
|
*/ |
241
|
1 |
|
protected function getDataUrl(Item $item, $language = 0) |
242
|
|
|
{ |
243
|
|
|
$pageId = $item->getRecordUid(); |
244
|
|
|
$strategy = $this->getUriStrategy($pageId); |
245
|
|
|
$mountPointParameter = $this->getMountPageDataUrlParameter($item); |
246
|
|
|
$dataUrl = $strategy->getPageIndexingUriFromPageItemAndLanguageId($item, $language, $mountPointParameter, $this->options); |
247
|
|
|
|
248
|
|
|
return $dataUrl; |
249
|
|
|
} |
250
|
1 |
|
|
251
|
|
|
/** |
252
|
|
|
* @param int $pageId |
253
|
|
|
* @return AbstractUriStrategy |
254
|
|
|
*/ |
255
|
1 |
|
protected function getUriStrategy($pageId) |
256
|
|
|
{ |
257
|
|
|
return GeneralUtility::makeInstance(UriStrategyFactory::class)->getForPageId($pageId); |
258
|
|
|
} |
259
|
|
|
|
260
|
1 |
|
/** |
261
|
|
|
* Generates the MP URL parameter needed to access mount pages. If the item |
262
|
|
|
* is identified as being a mounted page, the &MP parameter is generated. |
263
|
|
|
* |
264
|
1 |
|
* @param Item $item Item to get an &MP URL parameter for |
265
|
1 |
|
* @return string &MP URL parameter if $item is a mounted page |
266
|
1 |
|
*/ |
267
|
1 |
|
protected function getMountPageDataUrlParameter(Item $item) |
268
|
|
|
{ |
269
|
1 |
|
if (!$item->hasIndexingProperty('isMountedPage')) { |
270
|
|
|
return ''; |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
return $item->getIndexingProperty('mountPageSource') . '-' . $item->getIndexingProperty('mountPageDestination'); |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
# |
277
|
|
|
# Frontend User Groups Access |
278
|
|
|
# |
279
|
|
|
|
280
|
|
|
/** |
281
|
|
|
* Creates a single Solr Document for a page in a specific language and for |
282
|
|
|
* a specific frontend user group. |
283
|
|
|
* |
284
|
|
|
* @param Item $item The index queue item representing the page. |
285
|
|
|
* @param int $language The language to use. |
286
|
|
|
* @param int $userGroup The frontend user group to use. |
287
|
|
|
* @return PageIndexerResponse Page indexer response |
288
|
|
|
* @throws \RuntimeException if indexing an item failed |
289
|
|
|
*/ |
290
|
1 |
|
protected function indexPage(Item $item, $language = 0, $userGroup = 0) |
291
|
|
|
{ |
292
|
|
|
$accessRootline = $this->getAccessRootline($item, $language, $userGroup); |
293
|
|
|
$request = $this->buildBasePageIndexerRequest(); |
294
|
|
|
$request->setIndexQueueItem($item); |
295
|
|
|
$request->addAction('indexPage'); |
296
|
|
|
$request->setParameter('accessRootline', (string)$accessRootline); |
297
|
|
|
|
298
|
|
|
$indexRequestUrl = $this->getDataUrl($item, $language); |
299
|
|
|
$response = $request->send($indexRequestUrl); |
300
|
|
|
$indexActionResult = $response->getActionResult('indexPage'); |
301
|
|
|
|
302
|
|
|
if ($this->loggingEnabled) { |
303
|
|
|
$logSeverity = SolrLogManager::INFO; |
304
|
|
|
$logStatus = 'Info'; |
305
|
|
|
if ($indexActionResult['pageIndexed']) { |
306
|
|
|
$logSeverity = SolrLogManager::NOTICE; |
307
|
|
|
$logStatus = 'Success'; |
308
|
|
|
} |
309
|
|
|
|
310
|
|
|
$this->logger->log( |
311
|
1 |
|
$logSeverity, |
312
|
|
|
'Page Indexer: ' . $logStatus, |
313
|
|
|
[ |
314
|
|
|
'item' => (array)$item, |
315
|
|
|
'language' => $language, |
316
|
|
|
'user group' => $userGroup, |
317
|
|
|
'index request url' => $indexRequestUrl, |
318
|
|
|
'request' => (array)$request, |
319
|
|
|
'request headers' => $request->getHeaders(), |
320
|
|
|
'response' => (array)$response |
321
|
1 |
|
] |
322
|
|
|
); |
323
|
1 |
|
} |
324
|
|
|
|
325
|
1 |
|
if (!$indexActionResult['pageIndexed']) { |
326
|
|
|
$message = 'Failed indexing page Index Queue item: ' . $item->getIndexQueueUid() . ' url: ' . $indexRequestUrl; |
327
|
|
|
|
328
|
|
|
throw new \RuntimeException($message, 1331837081); |
329
|
|
|
} |
330
|
|
|
|
331
|
|
|
return $response; |
332
|
1 |
|
} |
333
|
|
|
|
334
|
|
|
/** |
335
|
|
|
* Generates a page document's "Access Rootline". |
336
|
|
|
* |
337
|
|
|
* The Access Rootline collects frontend user group access restrictions set |
338
|
|
|
* for pages up in a page's rootline extended to sub-pages. |
339
|
|
|
* |
340
|
|
|
* The format is like this: |
341
|
|
|
* pageId1:group1,group2|groupId2:group3|c:group1,group4,groupN |
342
|
|
|
* |
343
|
|
|
* The single elements of the access rootline are separated by a pipe |
344
|
|
|
* character. All but the last elements represent pages, the last element |
345
|
|
|
* defines the access restrictions applied to the page's content elements |
346
|
|
|
* and records shown on the page. |
347
|
|
|
* Each page element is composed by the page ID of the page setting frontend |
348
|
|
|
* user access restrictions, a colon, and a comma separated list of frontend |
349
|
1 |
|
* user group IDs restricting access to the page. |
350
|
|
|
* The content access element does not have a page ID, instead it replaces |
351
|
1 |
|
* the ID by a lower case C. |
352
|
1 |
|
* |
353
|
|
|
* @param Item $item Index queue item representing the current page |
354
|
1 |
|
* @param int $language The sys_language_uid language ID |
355
|
1 |
|
* @param int $contentAccessGroup The user group to use for the content access rootline element. Optional, will be determined automatically if not set. |
356
|
1 |
|
* @return string An Access Rootline. |
357
|
1 |
|
*/ |
358
|
|
|
protected function getAccessRootline(Item $item, $language = 0, $contentAccessGroup = null) |
359
|
1 |
|
{ |
360
|
1 |
|
static $accessRootlineCache; |
361
|
1 |
|
|
362
|
|
|
$mountPointParameter = $this->getMountPageDataUrlParameter($item); |
363
|
1 |
|
|
364
|
|
|
$accessRootlineCacheEntryId = $item->getRecordUid() . '|' . $language; |
365
|
|
|
if ($mountPointParameter !== '') { |
366
|
|
|
$accessRootlineCacheEntryId .= '|' . $mountPointParameter; |
367
|
|
|
} |
368
|
|
|
if (!is_null($contentAccessGroup)) { |
369
|
|
|
$accessRootlineCacheEntryId .= '|' . $contentAccessGroup; |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
if (!isset($accessRootlineCache[$accessRootlineCacheEntryId])) { |
373
|
|
|
$accessRootline = $this->getAccessRootlineByPageId($item->getRecordUid(), $mountPointParameter); |
374
|
|
|
|
375
|
|
|
// current page's content access groups |
376
|
|
|
$contentAccessGroups = [$contentAccessGroup]; |
377
|
|
|
if (is_null($contentAccessGroup)) { |
378
|
|
|
$contentAccessGroups = $this->getAccessGroupsFromContent($item, $language); |
379
|
|
|
} |
380
|
|
|
$element = GeneralUtility::makeInstance(RootlineElement::class, /** @scrutinizer ignore-type */ 'c:' . implode(',', $contentAccessGroups)); |
381
|
|
|
$accessRootline->push($element); |
382
|
|
|
|
383
|
|
|
$accessRootlineCache[$accessRootlineCacheEntryId] = $accessRootline; |
384
|
|
|
} |
385
|
|
|
|
386
|
1 |
|
return $accessRootlineCache[$accessRootlineCacheEntryId]; |
387
|
|
|
} |
388
|
|
|
|
389
|
|
|
/** |
390
|
|
|
* Returns the access rootLine for a certain pageId. |
391
|
|
|
* |
392
|
1 |
|
* @param int $pageId |
393
|
|
|
* @param string $mountPointparameter |
394
|
|
|
* @return Rootline |
395
|
|
|
*/ |
396
|
|
|
protected function getAccessRootlineByPageId($pageId, $mountPointParameter) |
397
|
|
|
{ |
398
|
|
|
return Rootline::getAccessRootlineByPageId($pageId, $mountPointParameter); |
399
|
|
|
} |
400
|
|
|
|
401
|
|
|
} |
402
|
|
|
|