1
|
|
|
<?php |
2
|
|
|
namespace ApacheSolrForTypo3\Solr; |
3
|
|
|
|
4
|
|
|
/*************************************************************** |
5
|
|
|
* Copyright notice |
6
|
|
|
* |
7
|
|
|
* (c) 2009-2015 Ingo Renner <[email protected]> |
8
|
|
|
* All rights reserved |
9
|
|
|
* |
10
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
11
|
|
|
* free software; you can redistribute it and/or modify |
12
|
|
|
* it under the terms of the GNU General Public License as published by |
13
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
14
|
|
|
* (at your option) any later version. |
15
|
|
|
* |
16
|
|
|
* The GNU General Public License can be found at |
17
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
18
|
|
|
* |
19
|
|
|
* This script is distributed in the hope that it will be useful, |
20
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
21
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22
|
|
|
* GNU General Public License for more details. |
23
|
|
|
* |
24
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
25
|
|
|
***************************************************************/ |
26
|
|
|
|
27
|
|
|
use Apache_Solr_Document; |
28
|
|
|
use ApacheSolrForTypo3\Solr\Access\Rootline; |
29
|
|
|
use ApacheSolrForTypo3\Solr\Domain\Variants\IdBuilder; |
30
|
|
|
use ApacheSolrForTypo3\Solr\FieldProcessor\Service; |
31
|
|
|
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer; |
32
|
|
|
use ApacheSolrForTypo3\Solr\IndexQueue\Item; |
33
|
|
|
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration; |
34
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
35
|
|
|
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController; |
36
|
|
|
|
37
|
|
|
/** |
38
|
|
|
* Page Indexer to index TYPO3 pages used by the Index Queue. |
39
|
|
|
* |
40
|
|
|
* @author Ingo Renner <[email protected]> |
41
|
|
|
* @author Daniel Poetzinger <[email protected]> |
42
|
|
|
* @author Timo Schmidt <[email protected]> |
43
|
|
|
*/ |
44
|
|
|
class Typo3PageIndexer |
45
|
|
|
{ |
46
|
|
|
|
47
|
|
|
/** |
48
|
|
|
* ID of the current page's Solr document. |
49
|
|
|
* |
50
|
|
|
* @var string |
51
|
|
|
*/ |
52
|
|
|
protected static $pageSolrDocumentId = ''; |
53
|
|
|
/** |
54
|
|
|
* The Solr document generated for the current page. |
55
|
|
|
* |
56
|
|
|
* @var \Apache_Solr_Document |
57
|
|
|
*/ |
58
|
|
|
protected static $pageSolrDocument = null; |
59
|
|
|
/** |
60
|
|
|
* The mount point parameter used in the Frontend controller. |
61
|
|
|
* |
62
|
|
|
* @var string |
63
|
|
|
*/ |
64
|
|
|
protected $mountPointParameter; |
65
|
|
|
/** |
66
|
|
|
* Solr server connection. |
67
|
|
|
* |
68
|
|
|
* @var SolrService |
69
|
|
|
*/ |
70
|
|
|
protected $solrConnection = null; |
71
|
|
|
/** |
72
|
|
|
* Frontend page object (TSFE). |
73
|
|
|
* |
74
|
|
|
* @var TypoScriptFrontendController |
75
|
|
|
*/ |
76
|
|
|
protected $page = null; |
77
|
|
|
/** |
78
|
|
|
* Content extractor to extract content from TYPO3 pages |
79
|
|
|
* |
80
|
|
|
* @var Typo3PageContentExtractor |
81
|
|
|
*/ |
82
|
|
|
protected $contentExtractor = null; |
83
|
|
|
/** |
84
|
|
|
* URL to be indexed as the page's URL |
85
|
|
|
* |
86
|
|
|
* @var string |
87
|
|
|
*/ |
88
|
|
|
protected $pageUrl = ''; |
89
|
|
|
/** |
90
|
|
|
* The page's access rootline |
91
|
|
|
* |
92
|
|
|
* @var Rootline |
93
|
|
|
*/ |
94
|
|
|
protected $pageAccessRootline = null; |
95
|
|
|
/** |
96
|
|
|
* Documents that have been sent to Solr |
97
|
|
|
* |
98
|
|
|
* @var array |
99
|
|
|
*/ |
100
|
|
|
protected $documentsSentToSolr = []; |
101
|
|
|
|
102
|
|
|
/** |
103
|
|
|
* @var TypoScriptConfiguration |
104
|
|
|
*/ |
105
|
|
|
protected $configuration; |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* @var Item |
109
|
|
|
*/ |
110
|
|
|
protected $indexQueueItem; |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* @var IdBuilder |
114
|
|
|
*/ |
115
|
|
|
protected $variantIdBuilder; |
116
|
|
|
|
117
|
|
|
/** |
118
|
|
|
* Constructor |
119
|
|
|
* |
120
|
|
|
* @param TypoScriptFrontendController $page The page to index |
121
|
|
|
* @param IdBuilder $variantIdBuilder |
122
|
|
|
*/ |
123
|
42 |
|
public function __construct(TypoScriptFrontendController $page, IdBuilder $variantIdBuilder = null) |
124
|
|
|
{ |
125
|
42 |
|
$this->page = $page; |
126
|
42 |
|
$this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL'); |
127
|
42 |
|
$this->configuration = Util::getSolrConfiguration(); |
128
|
|
|
|
129
|
|
|
try { |
130
|
42 |
|
$this->initializeSolrConnection(); |
131
|
|
|
} catch (\Exception $e) { |
132
|
|
|
$this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 3); |
133
|
|
|
|
134
|
|
|
// TODO extract to a class "ExceptionLogger" |
135
|
|
|
if ($this->configuration->getLoggingExceptions()) { |
136
|
|
|
GeneralUtility::devLog('Exception while trying to index a page', 'solr', 3, [$e->__toString()]); |
137
|
|
|
} |
138
|
|
|
} |
139
|
|
|
|
140
|
42 |
|
$this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content); |
141
|
42 |
|
$this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, ''); |
142
|
42 |
|
$this->variantIdBuilder = is_null($variantIdBuilder) ? GeneralUtility::makeInstance(IdBuilder::class) : $variantIdBuilder; |
143
|
42 |
|
} |
144
|
|
|
|
145
|
|
|
/** |
146
|
|
|
* @param Item $indexQueueItem |
147
|
|
|
*/ |
148
|
6 |
|
public function setIndexQueueItem($indexQueueItem) |
149
|
|
|
{ |
150
|
6 |
|
$this->indexQueueItem = $indexQueueItem; |
151
|
6 |
|
} |
152
|
|
|
|
153
|
|
|
|
154
|
|
|
/** |
155
|
|
|
* Initializes the Solr server connection. |
156
|
|
|
* |
157
|
|
|
* @throws \Exception when no Solr connection can be established. |
158
|
|
|
*/ |
159
|
42 |
|
protected function initializeSolrConnection() |
160
|
|
|
{ |
161
|
42 |
|
$solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid); |
162
|
|
|
|
163
|
|
|
// do not continue if no server is available |
164
|
42 |
|
if (!$solr->ping()) { |
165
|
|
|
throw new \Exception( |
166
|
|
|
'No Solr instance available while trying to index a page.', |
167
|
|
|
1234790825 |
168
|
|
|
); |
169
|
|
|
} |
170
|
|
|
|
171
|
42 |
|
$this->solrConnection = $solr; |
172
|
42 |
|
} |
173
|
|
|
|
174
|
|
|
/** |
175
|
|
|
* Logs messages to devlog and TS log (admin panel) |
176
|
|
|
* |
177
|
|
|
* @param string $message Message to set |
178
|
|
|
* @param int $errorNum Error number |
179
|
|
|
* @param array $data Additional data to log |
180
|
|
|
* @return void |
181
|
|
|
*/ |
182
|
42 |
|
protected function log($message, $errorNum = 0, array $data = []) |
183
|
|
|
{ |
184
|
42 |
|
if (is_object($GLOBALS['TT'])) { |
185
|
42 |
|
$GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum); |
186
|
|
|
} |
187
|
|
|
|
188
|
42 |
|
if ($this->configuration->getLoggingIndexing()) { |
189
|
|
|
$logData = []; |
190
|
|
|
if (!empty($data)) { |
191
|
|
|
foreach ($data as $value) { |
192
|
|
|
$logData[] = (array)$value; |
193
|
|
|
} |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
GeneralUtility::devLog($message, 'solr', $errorNum, $logData); |
197
|
|
|
} |
198
|
42 |
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* Gets the current page's Solr document ID. |
202
|
|
|
* |
203
|
|
|
* @return string|NULL The page's Solr document ID or NULL in case no document was generated yet. |
204
|
|
|
*/ |
205
|
|
|
public static function getPageSolrDocumentId() |
206
|
|
|
{ |
207
|
|
|
return self::$pageSolrDocumentId; |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* Gets the Solr document generated for the current page. |
212
|
|
|
* |
213
|
|
|
* @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet. |
214
|
|
|
*/ |
215
|
6 |
|
public static function getPageSolrDocument() |
216
|
|
|
{ |
217
|
6 |
|
return self::$pageSolrDocument; |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
/** |
221
|
|
|
* Allows to provide a Solr server connection other than the one |
222
|
|
|
* initialized by the constructor. |
223
|
|
|
* |
224
|
|
|
* @param SolrService $solrConnection Solr connection |
225
|
|
|
* @throws \Exception if the Solr server cannot be reached |
226
|
|
|
*/ |
227
|
6 |
|
public function setSolrConnection(SolrService $solrConnection) |
228
|
|
|
{ |
229
|
6 |
|
if (!$solrConnection->ping()) { |
230
|
|
|
throw new \Exception( |
231
|
|
|
'Could not connect to Solr server.', |
232
|
|
|
1323946472 |
233
|
|
|
); |
234
|
|
|
} |
235
|
|
|
|
236
|
6 |
|
$this->solrConnection = $solrConnection; |
237
|
6 |
|
} |
238
|
|
|
|
239
|
|
|
/** |
240
|
|
|
* Indexes a page. |
241
|
|
|
* |
242
|
|
|
* @return bool TRUE after successfully indexing the page, FALSE on error |
243
|
|
|
* @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor |
244
|
|
|
*/ |
245
|
42 |
|
public function indexPage() |
246
|
|
|
{ |
247
|
42 |
|
$pageIndexed = false; |
248
|
42 |
|
$documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages |
249
|
|
|
|
250
|
42 |
|
if (is_null($this->solrConnection)) { |
251
|
|
|
// intended early return as it doesn't make sense to continue |
252
|
|
|
// and waste processing time if the solr server isn't available |
253
|
|
|
// anyways |
254
|
|
|
// FIXME use an exception |
255
|
|
|
return $pageIndexed; |
256
|
|
|
} |
257
|
|
|
|
258
|
42 |
|
$pageDocument = $this->getPageDocument(); |
259
|
42 |
|
$pageDocument = $this->substitutePageDocument($pageDocument); |
260
|
|
|
|
261
|
42 |
|
$this->applyIndexPagePostProcessors($pageDocument); |
262
|
|
|
|
263
|
42 |
|
self::$pageSolrDocument = $pageDocument; |
264
|
42 |
|
$documents[] = $pageDocument; |
265
|
42 |
|
$documents = $this->getAdditionalDocuments($pageDocument, $documents); |
266
|
42 |
|
$this->processDocuments($documents); |
267
|
|
|
|
268
|
42 |
|
$pageIndexed = $this->addDocumentsToSolrIndex($documents); |
269
|
42 |
|
$this->documentsSentToSolr = $documents; |
270
|
|
|
|
271
|
42 |
|
return $pageIndexed; |
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
/** |
275
|
|
|
* Applies the configured post processors (indexPagePostProcessPageDocument) |
276
|
|
|
* |
277
|
|
|
* @param \Apache_Solr_Document $pageDocument |
278
|
|
|
*/ |
279
|
42 |
|
protected function applyIndexPagePostProcessors($pageDocument) |
280
|
|
|
{ |
281
|
42 |
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) { |
282
|
41 |
|
return; |
283
|
|
|
} |
284
|
|
|
|
285
|
1 |
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) { |
286
|
1 |
|
$postProcessor = GeneralUtility::getUserObj($classReference); |
287
|
1 |
|
if (!$postProcessor instanceof PageDocumentPostProcessor) { |
288
|
|
|
throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154); |
289
|
|
|
} |
290
|
|
|
|
291
|
1 |
|
$postProcessor->postProcessPageDocument($pageDocument, $this->page); |
292
|
|
|
} |
293
|
1 |
|
} |
294
|
|
|
|
295
|
|
|
/** |
296
|
|
|
* Builds the Solr document for the current page. |
297
|
|
|
* |
298
|
|
|
* @return \Apache_Solr_Document A document representing the page |
299
|
|
|
*/ |
300
|
42 |
|
protected function getPageDocument() |
301
|
|
|
{ |
302
|
42 |
|
$document = GeneralUtility::makeInstance(Apache_Solr_Document::class); |
303
|
|
|
/* @var $document \Apache_Solr_Document */ |
304
|
42 |
|
$site = Site::getSiteByPageId($this->page->id); |
305
|
42 |
|
$pageRecord = $this->page->page; |
306
|
|
|
|
307
|
42 |
|
self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId( |
308
|
42 |
|
$this->page->id, |
309
|
42 |
|
$this->page->type, |
310
|
42 |
|
$this->page->sys_language_uid, |
311
|
42 |
|
$this->getDocumentIdGroups(), |
312
|
42 |
|
$this->getMountPointParameter() |
313
|
|
|
); |
314
|
42 |
|
$document->setField('id', $documentId); |
315
|
42 |
|
$document->setField('site', $site->getDomain()); |
316
|
42 |
|
$document->setField('siteHash', $site->getSiteHash()); |
317
|
42 |
|
$document->setField('appKey', 'EXT:solr'); |
318
|
42 |
|
$document->setField('type', 'pages'); |
319
|
|
|
|
320
|
|
|
// system fields |
321
|
42 |
|
$document->setField('uid', $this->page->id); |
322
|
42 |
|
$document->setField('pid', $pageRecord['pid']); |
323
|
|
|
|
324
|
|
|
// variantId |
325
|
42 |
|
$variantId = $this->variantIdBuilder->buildFromTypeAndUid('pages', $this->page->id); |
326
|
42 |
|
$document->setField('variantId', $variantId); |
327
|
|
|
|
328
|
42 |
|
$document->setField('typeNum', $this->page->type); |
329
|
42 |
|
$document->setField('created', $pageRecord['crdate']); |
330
|
42 |
|
$document->setField('changed', $pageRecord['SYS_LASTCHANGED']); |
331
|
|
|
|
332
|
42 |
|
$rootline = $this->getRootLineFieldValue(); |
333
|
42 |
|
$document->setField('rootline', $rootline); |
334
|
|
|
|
335
|
|
|
// access |
336
|
42 |
|
$this->addAccessField($document); |
337
|
42 |
|
$this->addEndtimeField($document, $pageRecord); |
338
|
|
|
|
339
|
|
|
// content |
340
|
42 |
|
$document->setField('title', $this->contentExtractor->getPageTitle()); |
341
|
42 |
|
$document->setField('subTitle', $pageRecord['subtitle']); |
342
|
42 |
|
$document->setField('navTitle', $pageRecord['nav_title']); |
343
|
42 |
|
$document->setField('author', $pageRecord['author']); |
344
|
42 |
|
$document->setField('description', $pageRecord['description']); |
345
|
42 |
|
$document->setField('abstract', $pageRecord['abstract']); |
346
|
42 |
|
$document->setField('content', $this->contentExtractor->getIndexableContent()); |
347
|
42 |
|
$document->setField('url', $this->pageUrl); |
348
|
|
|
|
349
|
42 |
|
$this->addKeywordsField($document, $pageRecord); |
350
|
42 |
|
$this->addTagContentFields($document); |
351
|
|
|
|
352
|
42 |
|
return $document; |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
/** |
356
|
|
|
* Adds the access field to the document if needed. |
357
|
|
|
* |
358
|
|
|
* @param \Apache_Solr_Document $document |
359
|
|
|
*/ |
360
|
42 |
|
protected function addAccessField(\Apache_Solr_Document $document) |
361
|
|
|
{ |
362
|
42 |
|
$access = (string)$this->pageAccessRootline; |
363
|
42 |
|
if (trim($access) !== '') { |
364
|
9 |
|
$document->setField('access', $access); |
365
|
|
|
} |
366
|
42 |
|
} |
367
|
|
|
|
368
|
|
|
/** |
369
|
|
|
* @param $document |
370
|
|
|
* @param $pageRecord |
371
|
|
|
*/ |
372
|
42 |
|
protected function addEndtimeField(\Apache_Solr_Document $document, $pageRecord) |
373
|
|
|
{ |
374
|
42 |
|
if ($this->page->page['endtime']) { |
375
|
|
|
$document->setField('endtime', $pageRecord['endtime']); |
376
|
|
|
} |
377
|
42 |
|
} |
378
|
|
|
|
379
|
|
|
/** |
380
|
|
|
* Adds keywords, multi valued. |
381
|
|
|
* |
382
|
|
|
* @param \Apache_Solr_Document $document |
383
|
|
|
* @param array $pageRecord |
384
|
|
|
*/ |
385
|
42 |
|
protected function addKeywordsField(\Apache_Solr_Document $document, $pageRecord) |
386
|
|
|
{ |
387
|
42 |
|
$keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true)); |
388
|
42 |
|
foreach ($keywords as $keyword) { |
389
|
|
|
$document->addField('keywords', $keyword); |
390
|
|
|
} |
391
|
42 |
|
} |
392
|
|
|
|
393
|
|
|
/** |
394
|
|
|
* Add content from several tags like headers, anchors, ... |
395
|
|
|
* |
396
|
|
|
* @param \Apache_Solr_Document $document |
397
|
|
|
*/ |
398
|
42 |
|
protected function addTagContentFields(\Apache_Solr_Document $document) |
399
|
|
|
{ |
400
|
42 |
|
$tagContent = $this->contentExtractor->getTagContent(); |
401
|
42 |
|
foreach ($tagContent as $fieldName => $fieldValue) { |
402
|
|
|
$document->setField($fieldName, $fieldValue); |
403
|
|
|
} |
404
|
42 |
|
} |
405
|
|
|
|
406
|
|
|
/** |
407
|
|
|
* Builds the content for the rootline field. |
408
|
|
|
* |
409
|
|
|
* @return string |
410
|
|
|
*/ |
411
|
42 |
|
protected function getRootLineFieldValue() |
412
|
|
|
{ |
413
|
42 |
|
$rootline = $this->page->id; |
414
|
42 |
|
$mountPointParameter = $this->getMountPointParameter(); |
415
|
42 |
|
if ($mountPointParameter !== '') { |
416
|
36 |
|
$rootline .= ',' . $mountPointParameter; |
417
|
|
|
} |
418
|
42 |
|
return $rootline; |
419
|
|
|
} |
420
|
|
|
|
421
|
|
|
/** |
422
|
|
|
* Gets a comma separated list of frontend user groups to use for the |
423
|
|
|
* document ID. |
424
|
|
|
* |
425
|
|
|
* @return string A comma separated list of frontend user groups. |
426
|
|
|
*/ |
427
|
42 |
|
protected function getDocumentIdGroups() |
428
|
|
|
{ |
429
|
42 |
|
$groups = $this->pageAccessRootline->getGroups(); |
430
|
42 |
|
$groups = Rootline::cleanGroupArray($groups); |
431
|
|
|
|
432
|
42 |
|
if (empty($groups)) { |
433
|
35 |
|
$groups[] = 0; |
434
|
|
|
} |
435
|
|
|
|
436
|
42 |
|
$groups = implode(',', $groups); |
437
|
|
|
|
438
|
42 |
|
return $groups; |
439
|
|
|
} |
440
|
|
|
|
441
|
|
|
// Logging |
442
|
|
|
// TODO replace by a central logger |
443
|
|
|
|
444
|
|
|
/** |
445
|
|
|
* Gets the mount point parameter that is used in the Frontend controller. |
446
|
|
|
* |
447
|
|
|
* @return string |
448
|
|
|
*/ |
449
|
42 |
|
public function getMountPointParameter() |
450
|
|
|
{ |
451
|
42 |
|
return $this->mountPointParameter; |
452
|
|
|
} |
453
|
|
|
|
454
|
|
|
// Misc |
455
|
|
|
|
456
|
|
|
/** |
457
|
|
|
* Sets the mount point parameter that is used in the Frontend controller. |
458
|
|
|
* |
459
|
|
|
* @param string $mountPointParameter |
460
|
|
|
*/ |
461
|
6 |
|
public function setMountPointParameter($mountPointParameter) |
462
|
|
|
{ |
463
|
6 |
|
$this->mountPointParameter = (string)$mountPointParameter; |
464
|
6 |
|
} |
465
|
|
|
|
466
|
|
|
/** |
467
|
|
|
* Allows third party extensions to replace or modify the page document |
468
|
|
|
* created by this indexer. |
469
|
|
|
* |
470
|
|
|
* @param \Apache_Solr_Document $pageDocument The page document created by this indexer. |
471
|
|
|
* @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page |
472
|
|
|
*/ |
473
|
42 |
|
protected function substitutePageDocument(\Apache_Solr_Document $pageDocument) |
474
|
|
|
{ |
475
|
42 |
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) { |
476
|
36 |
|
return $pageDocument; |
477
|
|
|
} |
478
|
|
|
|
479
|
6 |
|
$indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage(); |
480
|
6 |
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) { |
481
|
6 |
|
$substituteIndexer = GeneralUtility::getUserObj($classReference); |
482
|
|
|
|
483
|
6 |
|
if (!$substituteIndexer instanceof SubstitutePageIndexer) { |
484
|
|
|
$message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class; |
485
|
|
|
throw new \UnexpectedValueException($message, 1310491001); |
486
|
|
|
} |
487
|
|
|
|
488
|
6 |
|
if ($substituteIndexer instanceof PageFieldMappingIndexer) { |
489
|
6 |
|
$substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName); |
490
|
|
|
} |
491
|
|
|
|
492
|
6 |
|
$substituteDocument = $substituteIndexer->getPageDocument($pageDocument); |
493
|
6 |
|
if (!$substituteDocument instanceof Apache_Solr_Document) { |
|
|
|
|
494
|
|
|
$message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.'; |
495
|
|
|
throw new \UnexpectedValueException($message, 1310490952); |
496
|
|
|
} |
497
|
6 |
|
$pageDocument = $substituteDocument; |
498
|
|
|
} |
499
|
|
|
|
500
|
6 |
|
return $pageDocument; |
501
|
|
|
} |
502
|
|
|
|
503
|
|
|
/** |
504
|
|
|
* Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set. |
505
|
|
|
* |
506
|
|
|
* @return string |
507
|
|
|
*/ |
508
|
6 |
|
protected function getIndexConfigurationNameForCurrentPage() |
509
|
|
|
{ |
510
|
6 |
|
return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages'; |
511
|
|
|
} |
512
|
|
|
|
513
|
|
|
/** |
514
|
|
|
* Allows third party extensions to provide additional documents which |
515
|
|
|
* should be indexed for the current page. |
516
|
|
|
* |
517
|
|
|
* @param \Apache_Solr_Document $pageDocument The main document representing this page. |
518
|
|
|
* @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page. |
519
|
|
|
* @return array An array of additional \Apache_Solr_Document objects to index |
520
|
|
|
*/ |
521
|
42 |
|
protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments) |
522
|
|
|
{ |
523
|
42 |
|
$documents = $existingDocuments; |
524
|
|
|
|
525
|
42 |
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) { |
526
|
41 |
|
return $documents; |
527
|
|
|
} |
528
|
|
|
|
529
|
1 |
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) { |
530
|
1 |
|
$additionalIndexer = GeneralUtility::getUserObj($classReference); |
531
|
|
|
|
532
|
1 |
|
if (!$additionalIndexer instanceof AdditionalPageIndexer) { |
533
|
|
|
$message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class; |
534
|
|
|
throw new \UnexpectedValueException($message, 1310491024); |
535
|
|
|
} |
536
|
|
|
|
537
|
1 |
|
$additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents); |
538
|
1 |
|
if (is_array($additionalDocuments)) { |
539
|
1 |
|
$documents = array_merge($documents, $additionalDocuments); |
540
|
|
|
} |
541
|
|
|
} |
542
|
|
|
|
543
|
1 |
|
return $documents; |
544
|
|
|
} |
545
|
|
|
|
546
|
|
|
/** |
547
|
|
|
* Sends the given documents to the field processing service which takes |
548
|
|
|
* care of manipulating fields as defined in the field's configuration. |
549
|
|
|
* |
550
|
|
|
* @param array $documents An array of documents to manipulate |
551
|
|
|
*/ |
552
|
42 |
|
protected function processDocuments(array $documents) |
553
|
|
|
{ |
554
|
42 |
|
$processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration(); |
555
|
42 |
|
if (count($processingInstructions) > 0) { |
556
|
42 |
|
$service = GeneralUtility::makeInstance(Service::class); |
557
|
42 |
|
$service->processDocuments($documents, $processingInstructions); |
558
|
|
|
} |
559
|
42 |
|
} |
560
|
|
|
|
561
|
|
|
/** |
562
|
|
|
* Adds the collected documents to the Solr index. |
563
|
|
|
* |
564
|
|
|
* @param array $documents An array of \Apache_Solr_Document objects. |
565
|
|
|
* @return bool TRUE if documents were added successfully, FALSE otherwise |
566
|
|
|
*/ |
567
|
42 |
|
protected function addDocumentsToSolrIndex(array $documents) |
568
|
|
|
{ |
569
|
42 |
|
$documentsAdded = false; |
570
|
|
|
|
571
|
42 |
|
if (!count($documents)) { |
572
|
|
|
return $documentsAdded; |
573
|
|
|
} |
574
|
|
|
|
575
|
|
|
try { |
576
|
42 |
|
$this->log('Adding ' . count($documents) . ' documents.', 0, $documents); |
577
|
|
|
|
578
|
|
|
// chunk adds by 20 |
579
|
42 |
|
$documentChunks = array_chunk($documents, 20); |
580
|
42 |
|
foreach ($documentChunks as $documentChunk) { |
581
|
42 |
|
$response = $this->solrConnection->addDocuments($documentChunk); |
582
|
|
|
|
583
|
42 |
|
if ($response->getHttpStatus() != 200) { |
584
|
|
|
$transportException = new \Apache_Solr_HttpTransportException($response); |
585
|
42 |
|
throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException); |
586
|
|
|
} |
587
|
|
|
} |
588
|
|
|
|
589
|
42 |
|
$documentsAdded = true; |
590
|
|
|
} catch (\Exception $e) { |
591
|
|
|
$this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2); |
592
|
|
|
|
593
|
|
|
if ($this->configuration->getLoggingExceptions()) { |
594
|
|
|
GeneralUtility::devLog('Exception while adding documents', 'solr', 3, [$e->__toString()]); |
595
|
|
|
} |
596
|
|
|
} |
597
|
|
|
|
598
|
42 |
|
return $documentsAdded; |
599
|
|
|
} |
600
|
|
|
|
601
|
|
|
/** |
602
|
|
|
* Gets the current page's URL. |
603
|
|
|
* |
604
|
|
|
* @return string URL of the current page. |
605
|
|
|
*/ |
606
|
|
|
public function getPageUrl() |
607
|
|
|
{ |
608
|
|
|
return $this->pageUrl; |
609
|
|
|
} |
610
|
|
|
|
611
|
|
|
/** |
612
|
|
|
* Sets the URL to use for the page document. |
613
|
|
|
* |
614
|
|
|
* @param string $url The page's URL. |
615
|
|
|
*/ |
616
|
6 |
|
public function setPageUrl($url) |
617
|
|
|
{ |
618
|
6 |
|
$this->pageUrl = $url; |
619
|
6 |
|
} |
620
|
|
|
|
621
|
|
|
/** |
622
|
|
|
* Gets the page's access rootline. |
623
|
|
|
* |
624
|
|
|
* @return Rootline The page's access rootline |
625
|
|
|
*/ |
626
|
|
|
public function getPageAccessRootline() |
627
|
|
|
{ |
628
|
|
|
return $this->pageAccessRootline; |
629
|
|
|
} |
630
|
|
|
|
631
|
|
|
/** |
632
|
|
|
* Sets the page's access rootline. |
633
|
|
|
* |
634
|
|
|
* @param Rootline $accessRootline The page's access rootline |
635
|
|
|
*/ |
636
|
41 |
|
public function setPageAccessRootline(Rootline $accessRootline) |
637
|
|
|
{ |
638
|
41 |
|
$this->pageAccessRootline = $accessRootline; |
639
|
41 |
|
} |
640
|
|
|
|
641
|
|
|
/** |
642
|
|
|
* Gets the documents that have been sent to Solr |
643
|
|
|
* |
644
|
|
|
* @return array An array of \Apache_Solr_Document objects |
645
|
|
|
*/ |
646
|
6 |
|
public function getDocumentsSentToSolr() |
647
|
|
|
{ |
648
|
6 |
|
return $this->documentsSentToSolr; |
649
|
|
|
} |
650
|
|
|
} |
651
|
|
|
|