1
|
|
|
<?php |
2
|
|
|
namespace ApacheSolrForTypo3\Solr\IndexQueue; |
3
|
|
|
|
4
|
|
|
/*************************************************************** |
5
|
|
|
* Copyright notice |
6
|
|
|
* |
7
|
|
|
* (c) 2009-2015 Ingo Renner <[email protected]> |
8
|
|
|
* All rights reserved |
9
|
|
|
* |
10
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
11
|
|
|
* free software; you can redistribute it and/or modify |
12
|
|
|
* it under the terms of the GNU General Public License as published by |
13
|
|
|
* the Free Software Foundation; either version 3 of the License, or |
14
|
|
|
* (at your option) any later version. |
15
|
|
|
* |
16
|
|
|
* The GNU General Public License can be found at |
17
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
18
|
|
|
* |
19
|
|
|
* This script is distributed in the hope that it will be useful, |
20
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
21
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22
|
|
|
* GNU General Public License for more details. |
23
|
|
|
* |
24
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
25
|
|
|
***************************************************************/ |
26
|
|
|
|
27
|
|
|
use ApacheSolrForTypo3\Solr\Access\Rootline; |
28
|
|
|
use ApacheSolrForTypo3\Solr\Access\RootlineElement; |
29
|
|
|
use ApacheSolrForTypo3\Solr\System\Logging\SolrLogManager; |
30
|
|
|
use TYPO3\CMS\Backend\Utility\BackendUtility; |
31
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
32
|
|
|
|
33
|
|
|
/** |
34
|
|
|
* A special purpose indexer to index pages. |
35
|
|
|
* |
36
|
|
|
* In the case of pages we can't directly index the page records, we need to |
37
|
|
|
* retrieve the content that belongs to a page from tt_content, too. Also |
38
|
|
|
* plugins may be included on a page and thus may need to be executed. |
39
|
|
|
* |
40
|
|
|
* @author Ingo Renner <[email protected]> |
41
|
|
|
*/ |
42
|
|
|
class PageIndexer extends Indexer |
43
|
|
|
{ |
44
|
|
|
/** |
45
|
|
|
* Indexes an item from the indexing queue. |
46
|
|
|
* |
47
|
|
|
* @param Item $item An index queue item |
48
|
|
|
* @return bool Whether indexing was successful |
49
|
|
|
*/ |
50
|
|
|
public function index(Item $item) |
51
|
|
|
{ |
52
|
|
|
$this->setLogging($item); |
53
|
|
|
|
54
|
|
|
// check whether we should move on at all |
55
|
|
|
if (!$this->isPageIndexable($item)) { |
56
|
|
|
return false; |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
$solrConnections = $this->getSolrConnectionsByItem($item); |
60
|
|
|
foreach ($solrConnections as $systemLanguageUid => $solrConnection) { |
61
|
|
|
$contentAccessGroups = $this->getAccessGroupsFromContent($item, |
62
|
|
|
$systemLanguageUid); |
63
|
|
|
|
64
|
|
|
if (empty($contentAccessGroups)) { |
65
|
|
|
// might be an empty page w/no content elements or some TYPO3 error / bug |
66
|
|
|
// FIXME logging needed |
67
|
|
|
continue; |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
foreach ($contentAccessGroups as $userGroup) { |
71
|
|
|
$this->indexPage($item, $systemLanguageUid, $userGroup); |
72
|
|
|
} |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
return true; |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
/** |
79
|
|
|
* Checks whether we can index this page. |
80
|
|
|
* |
81
|
|
|
* @param Item $item The page we want to index encapsulated in an index queue item |
82
|
|
|
* @return bool True if we can index this page, FALSE otherwise |
83
|
|
|
*/ |
84
|
|
|
protected function isPageIndexable(Item $item) |
85
|
|
|
{ |
86
|
|
|
|
87
|
|
|
// TODO do we still need this? |
88
|
|
|
// shouldn't those be sorted out by the record monitor / garbage collector already? |
89
|
|
|
|
90
|
|
|
$isIndexable = true; |
91
|
|
|
$record = $item->getRecord(); |
92
|
|
|
|
93
|
|
|
if (isset($GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']) |
94
|
|
|
&& $record[$GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']] |
95
|
|
|
) { |
96
|
|
|
$isIndexable = false; |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
return $isIndexable; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* Gets the Solr connections applicable for a page. |
104
|
|
|
* |
105
|
|
|
* The connections include the default connection and connections to be used |
106
|
|
|
* for translations of a page. |
107
|
|
|
* |
108
|
|
|
* @param Item $item An index queue item |
109
|
|
|
* @return array An array of ApacheSolrForTypo3\Solr\System\Solr\SolrConnection connections, the array's keys are the sys_language_uid of the language of the connection |
110
|
|
|
*/ |
111
|
|
|
protected function getSolrConnectionsByItem(Item $item) |
112
|
|
|
{ |
113
|
|
|
$solrConnections = parent::getSolrConnectionsByItem($item); |
114
|
|
|
|
115
|
|
|
$page = $item->getRecord(); |
116
|
|
|
// may use \TYPO3\CMS\Core\Utility\GeneralUtility::hideIfDefaultLanguage($page['l18n_cfg']) with TYPO3 4.6 |
|
|
|
|
117
|
|
|
if ($page['l18n_cfg'] & 1) { |
118
|
|
|
// page is configured to hide the default translation -> remove Solr connection for default language |
119
|
|
|
unset($solrConnections[0]); |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
if (GeneralUtility::hideIfNotTranslated($page['l18n_cfg'])) { |
123
|
|
|
$accessibleSolrConnections = []; |
124
|
|
|
if (isset($solrConnections[0])) { |
125
|
|
|
$accessibleSolrConnections[0] = $solrConnections[0]; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
$translationOverlays = $this->pagesRepository->findTranslationOverlaysByPageId((int)$page['uid']); |
129
|
|
|
|
130
|
|
|
foreach ($translationOverlays as $overlay) { |
131
|
|
|
$languageId = $overlay['sys_language_uid']; |
132
|
|
|
if (array_key_exists($languageId, $solrConnections)) { |
133
|
|
|
$accessibleSolrConnections[$languageId] = $solrConnections[$languageId]; |
134
|
|
|
} |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
$solrConnections = $accessibleSolrConnections; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
return $solrConnections; |
141
|
|
|
} |
142
|
|
|
|
143
|
|
|
/** |
144
|
|
|
* Finds the FE user groups used on a page including all groups of content |
145
|
|
|
* elements and groups of records of extensions that have correctly been |
146
|
|
|
* pushed through ContentObjectRenderer during rendering. |
147
|
|
|
* |
148
|
|
|
* @param Item $item Index queue item representing the current page to get the user groups from |
149
|
|
|
* @param int $language The sys_language_uid language ID |
150
|
|
|
* @return array Array of user group IDs |
151
|
|
|
*/ |
152
|
|
|
protected function getAccessGroupsFromContent(Item $item, $language = 0) |
153
|
|
|
{ |
154
|
|
|
static $accessGroupsCache; |
155
|
|
|
|
156
|
|
|
$accessGroupsCacheEntryId = $item->getRecordUid() . '|' . $language; |
157
|
|
|
if (!isset($accessGroupsCache[$accessGroupsCacheEntryId])) { |
158
|
|
|
$request = $this->buildBasePageIndexerRequest(); |
159
|
|
|
$request->setIndexQueueItem($item); |
160
|
|
|
$request->addAction('findUserGroups'); |
161
|
|
|
|
162
|
|
|
$indexRequestUrl = $this->getDataUrl($item, $language); |
163
|
|
|
$response = $request->send($indexRequestUrl); |
164
|
|
|
|
165
|
|
|
$groups = $response->getActionResult('findUserGroups'); |
166
|
|
|
if (is_array($groups)) { |
|
|
|
|
167
|
|
|
$accessGroupsCache[$accessGroupsCacheEntryId] = $groups; |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
if ($this->loggingEnabled) { |
171
|
|
|
$this->logger->log( |
172
|
|
|
SolrLogManager::INFO, |
173
|
|
|
'Page Access Groups', |
174
|
|
|
[ |
175
|
|
|
'item' => (array)$item, |
176
|
|
|
'language' => $language, |
177
|
|
|
'index request url' => $indexRequestUrl, |
178
|
|
|
'request' => (array)$request, |
179
|
|
|
'response' => (array)$response, |
180
|
|
|
'groups' => $groups |
181
|
|
|
] |
182
|
|
|
); |
183
|
|
|
} |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
return $accessGroupsCache[$accessGroupsCacheEntryId]; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
// Utility methods |
190
|
|
|
|
191
|
|
|
/** |
192
|
|
|
* Builds a base page indexer request with configured headers and other |
193
|
|
|
* parameters. |
194
|
|
|
* |
195
|
|
|
* @return PageIndexerRequest Base page indexer request |
196
|
|
|
*/ |
197
|
|
|
protected function buildBasePageIndexerRequest() |
198
|
|
|
{ |
199
|
|
|
$request = GeneralUtility::makeInstance(PageIndexerRequest::class); |
200
|
|
|
$request->setParameter('loggingEnabled', $this->loggingEnabled); |
201
|
|
|
|
202
|
|
|
if (!empty($this->options['authorization.'])) { |
203
|
|
|
$request->setAuthorizationCredentials( |
204
|
|
|
$this->options['authorization.']['username'], |
205
|
|
|
$this->options['authorization.']['password'] |
206
|
|
|
); |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
if (!empty($this->options['frontendDataHelper.']['headers.'])) { |
210
|
|
|
foreach ($this->options['frontendDataHelper.']['headers.'] as $headerValue) { |
211
|
|
|
$request->addHeader($headerValue); |
212
|
|
|
} |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
if (!empty($this->options['frontendDataHelper.']['requestTimeout'])) { |
216
|
|
|
$request->setTimeout((float)$this->options['frontendDataHelper.']['requestTimeout']); |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
return $request; |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
/** |
223
|
|
|
* Determines a page ID's URL. |
224
|
|
|
* |
225
|
|
|
* Tries to find a domain record to use to build an URL for a given page ID |
226
|
|
|
* and then actually build and return the page URL. |
227
|
|
|
* |
228
|
|
|
* @param Item $item Item to index |
229
|
|
|
* @param int $language The language id |
230
|
|
|
* @return string URL to send the index request to |
231
|
|
|
* @throws \RuntimeException |
232
|
|
|
*/ |
233
|
|
|
protected function getDataUrl(Item $item, $language = 0) |
234
|
|
|
{ |
235
|
|
|
$scheme = 'http'; |
236
|
|
|
$host = $item->getSite()->getDomain(); |
237
|
|
|
$path = '/'; |
238
|
|
|
$pageId = $item->getRecordUid(); |
239
|
|
|
|
240
|
|
|
// deprecated |
241
|
|
|
if (!empty($this->options['scheme'])) { |
242
|
|
|
$this->logger->log( |
243
|
|
|
SolrLogManager::INFO, |
244
|
|
|
'Using deprecated option "scheme" to set the scheme (http / https) for the page indexer frontend helper. Use plugin.tx_solr.index.queue.pages.indexer.frontendDataHelper.scheme instead' |
245
|
|
|
); |
246
|
|
|
$scheme = $this->options['scheme']; |
247
|
|
|
} |
248
|
|
|
|
249
|
|
|
// check whether we should use ssl / https |
250
|
|
|
if (!empty($this->options['frontendDataHelper.']['scheme'])) { |
251
|
|
|
$scheme = $this->options['frontendDataHelper.']['scheme']; |
252
|
|
|
} |
253
|
|
|
|
254
|
|
|
// overwriting the host |
255
|
|
|
if (!empty($this->options['frontendDataHelper.']['host'])) { |
256
|
|
|
$host = $this->options['frontendDataHelper.']['host']; |
257
|
|
|
} |
258
|
|
|
|
259
|
|
|
// setting a path if TYPO3 is installed in a sub directory |
260
|
|
|
if (!empty($this->options['frontendDataHelper.']['path'])) { |
261
|
|
|
$path = $this->options['frontendDataHelper.']['path']; |
262
|
|
|
} |
263
|
|
|
|
264
|
|
|
$mountPointParameter = $this->getMountPageDataUrlParameter($item); |
265
|
|
|
$dataUrl = $scheme . '://' . $host . $path . 'index.php?id=' . $pageId; |
266
|
|
|
$dataUrl .= ($mountPointParameter !== '') ? '&MP=' . $mountPointParameter : ''; |
267
|
|
|
$dataUrl .= '&L=' . $language; |
268
|
|
|
|
269
|
|
|
if (!GeneralUtility::isValidUrl($dataUrl)) { |
270
|
|
|
$this->logger->log( |
271
|
|
|
SolrLogManager::ERROR, |
272
|
|
|
'Could not create a valid URL to get frontend data while trying to index a page.', |
273
|
|
|
[ |
274
|
|
|
'item' => (array)$item, |
275
|
|
|
'constructed URL' => $dataUrl, |
276
|
|
|
'scheme' => $scheme, |
277
|
|
|
'host' => $host, |
278
|
|
|
'path' => $path, |
279
|
|
|
'page ID' => $pageId, |
280
|
|
|
'indexer options' => $this->options |
281
|
|
|
] |
282
|
|
|
); |
283
|
|
|
|
284
|
|
|
throw new \RuntimeException( |
285
|
|
|
'Could not create a valid URL to get frontend data while trying to index a page. Created URL: ' . $dataUrl, |
286
|
|
|
1311080805 |
287
|
|
|
); |
288
|
|
|
} |
289
|
|
|
|
290
|
|
|
if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['IndexQueuePageIndexer']['dataUrlModifier']) { |
291
|
|
|
$dataUrlModifier = GeneralUtility::makeInstance($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['IndexQueuePageIndexer']['dataUrlModifier']); |
292
|
|
|
|
293
|
|
|
if ($dataUrlModifier instanceof PageIndexerDataUrlModifier) { |
294
|
|
|
$dataUrl = $dataUrlModifier->modifyDataUrl($dataUrl, [ |
295
|
|
|
'item' => $item, |
296
|
|
|
'scheme' => $scheme, |
297
|
|
|
'host' => $host, |
298
|
|
|
'path' => $path, |
299
|
|
|
'pageId' => $pageId, |
300
|
|
|
'language' => $language |
301
|
|
|
]); |
302
|
|
|
} else { |
303
|
|
|
throw new \RuntimeException( |
304
|
|
|
$GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['IndexQueuePageIndexer']['dataUrlModifier'] |
305
|
|
|
. ' is not an implementation of ApacheSolrForTypo3\Solr\IndexQueue\PageIndexerDataUrlModifier', |
306
|
|
|
1290523345 |
307
|
|
|
); |
308
|
|
|
} |
309
|
|
|
} |
310
|
|
|
|
311
|
|
|
return $dataUrl; |
312
|
|
|
} |
313
|
|
|
|
314
|
|
|
/** |
315
|
|
|
* Generates the MP URL parameter needed to access mount pages. If the item |
316
|
|
|
* is identified as being a mounted page, the &MP parameter is generated. |
317
|
|
|
* |
318
|
|
|
* @param Item $item Item to get an &MP URL parameter for |
319
|
|
|
* @return string &MP URL parameter if $item is a mounted page |
320
|
|
|
*/ |
321
|
|
|
protected function getMountPageDataUrlParameter(Item $item) |
322
|
|
|
{ |
323
|
|
|
$mountPageUrlParameter = ''; |
324
|
|
|
|
325
|
|
|
if ($item->hasIndexingProperty('isMountedPage')) { |
326
|
|
|
$mountPageUrlParameter = |
327
|
|
|
$item->getIndexingProperty('mountPageSource') |
328
|
|
|
. '-' |
329
|
|
|
. $item->getIndexingProperty('mountPageDestination'); |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
return $mountPageUrlParameter; |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
# |
336
|
|
|
# Frontend User Groups Access |
337
|
|
|
# |
338
|
|
|
|
339
|
|
|
/** |
340
|
|
|
* Creates a single Solr Document for a page in a specific language and for |
341
|
|
|
* a specific frontend user group. |
342
|
|
|
* |
343
|
|
|
* @param Item $item The index queue item representing the page. |
344
|
|
|
* @param int $language The language to use. |
345
|
|
|
* @param int $userGroup The frontend user group to use. |
346
|
|
|
* @return PageIndexerResponse Page indexer response |
347
|
|
|
* @throws \RuntimeException if indexing an item failed |
348
|
|
|
*/ |
349
|
|
|
protected function indexPage(Item $item, $language = 0, $userGroup = 0) |
350
|
|
|
{ |
351
|
|
|
$accessRootline = $this->getAccessRootline($item, $language, |
352
|
|
|
$userGroup); |
353
|
|
|
|
354
|
|
|
$request = $this->buildBasePageIndexerRequest(); |
355
|
|
|
$request->setIndexQueueItem($item); |
356
|
|
|
$request->addAction('indexPage'); |
357
|
|
|
$request->setParameter('accessRootline', (string)$accessRootline); |
358
|
|
|
|
359
|
|
|
$indexRequestUrl = $this->getDataUrl($item, $language); |
360
|
|
|
$response = $request->send($indexRequestUrl); |
361
|
|
|
$indexActionResult = $response->getActionResult('indexPage'); |
362
|
|
|
|
363
|
|
|
if ($this->loggingEnabled) { |
364
|
|
|
$logSeverity = SolrLogManager::INFO; |
365
|
|
|
$logStatus = 'Info'; |
366
|
|
|
if ($indexActionResult['pageIndexed']) { |
367
|
|
|
$logSeverity = SolrLogManager::NOTICE; |
368
|
|
|
$logStatus = 'Success'; |
369
|
|
|
} |
370
|
|
|
|
371
|
|
|
$this->logger->log( |
372
|
|
|
$logSeverity, |
373
|
|
|
'Page Indexer: ' . $logStatus, |
374
|
|
|
[ |
375
|
|
|
'item' => (array)$item, |
376
|
|
|
'language' => $language, |
377
|
|
|
'user group' => $userGroup, |
378
|
|
|
'index request url' => $indexRequestUrl, |
379
|
|
|
'request' => (array)$request, |
380
|
|
|
'request headers' => $request->getHeaders(), |
381
|
|
|
'response' => (array)$response |
382
|
|
|
] |
383
|
|
|
); |
384
|
|
|
} |
385
|
|
|
|
386
|
|
|
if (!$indexActionResult['pageIndexed']) { |
387
|
|
|
$message = 'Failed indexing page Index Queue item: ' . $item->getIndexQueueUid() . ' url: ' . $indexRequestUrl; |
388
|
|
|
|
389
|
|
|
throw new \RuntimeException($message, 1331837081); |
390
|
|
|
} |
391
|
|
|
|
392
|
|
|
return $response; |
393
|
|
|
} |
394
|
|
|
|
395
|
|
|
/** |
396
|
|
|
* Generates a page document's "Access Rootline". |
397
|
|
|
* |
398
|
|
|
* The Access Rootline collects frontend user group access restrictions set |
399
|
|
|
* for pages up in a page's rootline extended to sub-pages. |
400
|
|
|
* |
401
|
|
|
* The format is like this: |
402
|
|
|
* pageId1:group1,group2|groupId2:group3|c:group1,group4,groupN |
403
|
|
|
* |
404
|
|
|
* The single elements of the access rootline are separated by a pipe |
405
|
|
|
* character. All but the last elements represent pages, the last element |
406
|
|
|
* defines the access restrictions applied to the page's content elements |
407
|
|
|
* and records shown on the page. |
408
|
|
|
* Each page element is composed by the page ID of the page setting frontend |
409
|
|
|
* user access restrictions, a colon, and a comma separated list of frontend |
410
|
|
|
* user group IDs restricting access to the page. |
411
|
|
|
* The content access element does not have a page ID, instead it replaces |
412
|
|
|
* the ID by a lower case C. |
413
|
|
|
* |
414
|
|
|
* @param Item $item Index queue item representing the current page |
415
|
|
|
* @param int $language The sys_language_uid language ID |
416
|
|
|
* @param int $contentAccessGroup The user group to use for the content access rootline element. Optional, will be determined automatically if not set. |
417
|
|
|
* @return string An Access Rootline. |
418
|
|
|
*/ |
419
|
|
|
protected function getAccessRootline( |
420
|
|
|
Item $item, |
421
|
|
|
$language = 0, |
422
|
|
|
$contentAccessGroup = null |
423
|
|
|
) { |
424
|
|
|
static $accessRootlineCache; |
425
|
|
|
|
426
|
|
|
$mountPointParameter = $this->getMountPageDataUrlParameter($item); |
427
|
|
|
|
428
|
|
|
$accessRootlineCacheEntryId = $item->getRecordUid() . '|' . $language; |
429
|
|
|
if ($mountPointParameter !== '') { |
430
|
|
|
$accessRootlineCacheEntryId .= '|' . $mountPointParameter; |
431
|
|
|
} |
432
|
|
|
if (!is_null($contentAccessGroup)) { |
433
|
|
|
$accessRootlineCacheEntryId .= '|' . $contentAccessGroup; |
434
|
|
|
} |
435
|
|
|
|
436
|
|
|
if (!isset($accessRootlineCache[$accessRootlineCacheEntryId])) { |
437
|
|
|
$accessRootline = Rootline::getAccessRootlineByPageId( |
438
|
|
|
$item->getRecordUid(), |
439
|
|
|
$mountPointParameter |
440
|
|
|
); |
441
|
|
|
|
442
|
|
|
// current page's content access groups |
443
|
|
|
$contentAccessGroups = [$contentAccessGroup]; |
444
|
|
|
if (is_null($contentAccessGroup)) { |
445
|
|
|
$contentAccessGroups = $this->getAccessGroupsFromContent($item, $language); |
446
|
|
|
} |
447
|
|
|
$element = GeneralUtility::makeInstance(RootlineElement::class, 'c:' . implode(',', $contentAccessGroups)); |
|
|
|
|
448
|
|
|
$accessRootline->push($element); |
449
|
|
|
|
450
|
|
|
$accessRootlineCache[$accessRootlineCacheEntryId] = $accessRootline; |
451
|
|
|
} |
452
|
|
|
|
453
|
|
|
return $accessRootlineCache[$accessRootlineCacheEntryId]; |
454
|
|
|
} |
455
|
|
|
} |
456
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.