1
|
|
|
<?php |
2
|
|
|
namespace ApacheSolrForTypo3\Solr; |
3
|
|
|
|
4
|
|
|
/*************************************************************** |
5
|
|
|
* Copyright notice |
6
|
|
|
* |
7
|
|
|
* (c) 2009-2015 Ingo Renner <[email protected]> |
8
|
|
|
* All rights reserved |
9
|
|
|
* |
10
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
11
|
|
|
* free software; you can redistribute it and/or modify |
12
|
|
|
* it under the terms of the GNU General Public License as published by |
13
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
14
|
|
|
* (at your option) any later version. |
15
|
|
|
* |
16
|
|
|
* The GNU General Public License can be found at |
17
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
18
|
|
|
* |
19
|
|
|
* This script is distributed in the hope that it will be useful, |
20
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
21
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
22
|
|
|
* GNU General Public License for more details. |
23
|
|
|
* |
24
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
25
|
|
|
***************************************************************/ |
26
|
|
|
|
27
|
|
|
use Apache_Solr_Document; |
28
|
|
|
use ApacheSolrForTypo3\Solr\Access\Rootline; |
29
|
|
|
use ApacheSolrForTypo3\Solr\FieldProcessor\Service; |
30
|
|
|
use ApacheSolrForTypo3\Solr\IndexQueue\FrontendHelper\PageFieldMappingIndexer; |
31
|
|
|
use ApacheSolrForTypo3\Solr\IndexQueue\Item; |
32
|
|
|
use ApacheSolrForTypo3\Solr\System\Configuration\TypoScriptConfiguration; |
33
|
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility; |
34
|
|
|
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController; |
35
|
|
|
|
36
|
|
|
/** |
37
|
|
|
* Page Indexer to index TYPO3 pages used by the Index Queue. |
38
|
|
|
* |
39
|
|
|
* @author Ingo Renner <[email protected]> |
40
|
|
|
* @author Daniel Poetzinger <[email protected]> |
41
|
|
|
* @author Timo Schmidt <[email protected]> |
42
|
|
|
*/ |
43
|
|
|
class Typo3PageIndexer |
44
|
|
|
{ |
45
|
|
|
|
46
|
|
|
/** |
47
|
|
|
* ID of the current page's Solr document. |
48
|
|
|
* |
49
|
|
|
* @var string |
50
|
|
|
*/ |
51
|
|
|
protected static $pageSolrDocumentId = ''; |
52
|
|
|
/** |
53
|
|
|
* The Solr document generated for the current page. |
54
|
|
|
* |
55
|
|
|
* @var \Apache_Solr_Document |
56
|
|
|
*/ |
57
|
|
|
protected static $pageSolrDocument = null; |
58
|
|
|
/** |
59
|
|
|
* The mount point parameter used in the Frontend controller. |
60
|
|
|
* |
61
|
|
|
* @var string |
62
|
|
|
*/ |
63
|
|
|
protected $mountPointParameter; |
64
|
|
|
/** |
65
|
|
|
* Solr server connection. |
66
|
|
|
* |
67
|
|
|
* @var SolrService |
68
|
|
|
*/ |
69
|
|
|
protected $solrConnection = null; |
70
|
|
|
/** |
71
|
|
|
* Frontend page object (TSFE). |
72
|
|
|
* |
73
|
|
|
* @var TypoScriptFrontendController |
74
|
|
|
*/ |
75
|
|
|
protected $page = null; |
76
|
|
|
/** |
77
|
|
|
* Content extractor to extract content from TYPO3 pages |
78
|
|
|
* |
79
|
|
|
* @var Typo3PageContentExtractor |
80
|
|
|
*/ |
81
|
|
|
protected $contentExtractor = null; |
82
|
|
|
/** |
83
|
|
|
* URL to be indexed as the page's URL |
84
|
|
|
* |
85
|
|
|
* @var string |
86
|
|
|
*/ |
87
|
|
|
protected $pageUrl = ''; |
88
|
|
|
/** |
89
|
|
|
* The page's access rootline |
90
|
|
|
* |
91
|
|
|
* @var Rootline |
92
|
|
|
*/ |
93
|
|
|
protected $pageAccessRootline = null; |
94
|
|
|
/** |
95
|
|
|
* Documents that have been sent to Solr |
96
|
|
|
* |
97
|
|
|
* @var array |
98
|
|
|
*/ |
99
|
|
|
protected $documentsSentToSolr = []; |
100
|
|
|
|
101
|
|
|
/** |
102
|
|
|
* @var TypoScriptConfiguration |
103
|
|
|
*/ |
104
|
|
|
protected $configuration; |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* @var Item |
108
|
|
|
*/ |
109
|
|
|
protected $indexQueueItem; |
110
|
|
|
|
111
|
|
|
/** |
112
|
|
|
* Constructor |
113
|
|
|
* |
114
|
|
|
* @param TypoScriptFrontendController $page The page to index |
115
|
|
|
*/ |
116
|
35 |
|
public function __construct(TypoScriptFrontendController $page) |
117
|
|
|
{ |
118
|
35 |
|
$this->page = $page; |
119
|
35 |
|
$this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL'); |
120
|
35 |
|
$this->configuration = Util::getSolrConfiguration(); |
121
|
|
|
|
122
|
|
|
try { |
123
|
35 |
|
$this->initializeSolrConnection(); |
124
|
35 |
|
} catch (\Exception $e) { |
125
|
|
|
$this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 3); |
126
|
|
|
|
127
|
|
|
// TODO extract to a class "ExceptionLogger" |
128
|
|
|
if ($this->configuration->getLoggingExceptions()) { |
129
|
|
|
GeneralUtility::devLog('Exception while trying to index a page', 'solr', 3, [$e->__toString()]); |
130
|
|
|
} |
131
|
|
|
} |
132
|
|
|
|
133
|
35 |
|
$this->contentExtractor = GeneralUtility::makeInstance(Typo3PageContentExtractor::class, $this->page->content); |
134
|
35 |
|
$this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, ''); |
135
|
35 |
|
} |
136
|
|
|
|
137
|
|
|
/** |
138
|
|
|
* @param Item $indexQueueItem |
139
|
|
|
*/ |
140
|
5 |
|
public function setIndexQueueItem($indexQueueItem) |
141
|
|
|
{ |
142
|
5 |
|
$this->indexQueueItem = $indexQueueItem; |
143
|
5 |
|
} |
144
|
|
|
|
145
|
|
|
|
146
|
|
|
/** |
147
|
|
|
* Initializes the Solr server connection. |
148
|
|
|
* |
149
|
|
|
* @throws \Exception when no Solr connection can be established. |
150
|
|
|
*/ |
151
|
35 |
|
protected function initializeSolrConnection() |
152
|
|
|
{ |
153
|
35 |
|
$solr = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, $this->page->sys_language_uid); |
154
|
|
|
|
155
|
|
|
// do not continue if no server is available |
156
|
35 |
|
if (!$solr->ping()) { |
157
|
|
|
throw new \Exception( |
158
|
|
|
'No Solr instance available while trying to index a page.', |
159
|
|
|
1234790825 |
160
|
|
|
); |
161
|
|
|
} |
162
|
|
|
|
163
|
35 |
|
$this->solrConnection = $solr; |
164
|
35 |
|
} |
165
|
|
|
|
166
|
|
|
/** |
167
|
|
|
* Logs messages to devlog and TS log (admin panel) |
168
|
|
|
* |
169
|
|
|
* @param string $message Message to set |
170
|
|
|
* @param int $errorNum Error number |
171
|
|
|
* @param array $data Additional data to log |
172
|
|
|
* @return void |
173
|
|
|
*/ |
174
|
35 |
|
protected function log($message, $errorNum = 0, array $data = []) |
175
|
|
|
{ |
176
|
35 |
|
if (is_object($GLOBALS['TT'])) { |
177
|
35 |
|
$GLOBALS['TT']->setTSlogMessage('tx_solr: ' . $message, $errorNum); |
178
|
35 |
|
} |
179
|
|
|
|
180
|
35 |
|
if ($this->configuration->getLoggingIndexing()) { |
181
|
|
|
$logData = []; |
182
|
|
|
if (!empty($data)) { |
183
|
|
|
foreach ($data as $value) { |
184
|
8 |
|
$logData[] = (array)$value; |
185
|
|
|
} |
186
|
|
|
} |
187
|
|
|
|
188
|
|
|
GeneralUtility::devLog($message, 'solr', $errorNum, $logData); |
189
|
|
|
} |
190
|
35 |
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* Gets the current page's Solr document ID. |
194
|
|
|
* |
195
|
|
|
* @return string|NULL The page's Solr document ID or NULL in case no document was generated yet. |
196
|
|
|
*/ |
197
|
|
|
public static function getPageSolrDocumentId() |
198
|
|
|
{ |
199
|
|
|
return self::$pageSolrDocumentId; |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
/** |
203
|
|
|
* Gets the Solr document generated for the current page. |
204
|
|
|
* |
205
|
|
|
* @return \Apache_Solr_Document|NULL The page's Solr document or NULL if it has not been generated yet. |
206
|
|
|
*/ |
207
|
5 |
|
public static function getPageSolrDocument() |
208
|
|
|
{ |
209
|
5 |
|
return self::$pageSolrDocument; |
210
|
|
|
} |
211
|
|
|
|
212
|
|
|
/** |
213
|
|
|
* Allows to provide a Solr server connection other than the one |
214
|
|
|
* initialized by the constructor. |
215
|
|
|
* |
216
|
|
|
* @param SolrService $solrConnection Solr connection |
217
|
|
|
* @throws \Exception if the Solr server cannot be reached |
218
|
|
|
*/ |
219
|
5 |
|
public function setSolrConnection(SolrService $solrConnection) |
220
|
|
|
{ |
221
|
5 |
|
if (!$solrConnection->ping()) { |
222
|
|
|
throw new \Exception( |
223
|
|
|
'Could not connect to Solr server.', |
224
|
|
|
1323946472 |
225
|
|
|
); |
226
|
|
|
} |
227
|
|
|
|
228
|
5 |
|
$this->solrConnection = $solrConnection; |
229
|
5 |
|
} |
230
|
|
|
|
231
|
|
|
/** |
232
|
|
|
* Indexes a page. |
233
|
|
|
* |
234
|
|
|
* @return bool TRUE after successfully indexing the page, FALSE on error |
235
|
|
|
* @throws \UnexpectedValueException if a page document post processor fails to implement interface ApacheSolrForTypo3\Solr\PageDocumentPostProcessor |
236
|
|
|
*/ |
237
|
35 |
|
public function indexPage() |
238
|
|
|
{ |
239
|
35 |
|
$pageIndexed = false; |
240
|
35 |
|
$documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages |
241
|
|
|
|
242
|
35 |
|
if (is_null($this->solrConnection)) { |
243
|
|
|
// intended early return as it doesn't make sense to continue |
244
|
|
|
// and waste processing time if the solr server isn't available |
245
|
|
|
// anyways |
246
|
|
|
// FIXME use an exception |
247
|
|
|
return $pageIndexed; |
248
|
|
|
} |
249
|
|
|
|
250
|
35 |
|
$pageDocument = $this->getPageDocument(); |
251
|
35 |
|
$pageDocument = $this->substitutePageDocument($pageDocument); |
252
|
|
|
|
253
|
35 |
|
$this->applyIndexPagePostProcessors($pageDocument); |
254
|
|
|
|
255
|
35 |
|
self::$pageSolrDocument = $pageDocument; |
256
|
35 |
|
$documents[] = $pageDocument; |
257
|
35 |
|
$documents = $this->getAdditionalDocuments($pageDocument, $documents); |
258
|
35 |
|
$this->processDocuments($documents); |
259
|
|
|
|
260
|
35 |
|
$pageIndexed = $this->addDocumentsToSolrIndex($documents); |
261
|
35 |
|
$this->documentsSentToSolr = $documents; |
262
|
|
|
|
263
|
35 |
|
return $pageIndexed; |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
/** |
267
|
|
|
* Applies the configured post processors (indexPagePostProcessPageDocument) |
268
|
|
|
* |
269
|
|
|
* @param \Apache_Solr_Document $pageDocument |
270
|
|
|
*/ |
271
|
35 |
|
protected function applyIndexPagePostProcessors($pageDocument) |
272
|
|
|
{ |
273
|
35 |
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'])) { |
274
|
34 |
|
return; |
275
|
|
|
} |
276
|
|
|
|
277
|
1 |
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) { |
278
|
1 |
|
$postProcessor = GeneralUtility::getUserObj($classReference); |
279
|
1 |
|
if (!$postProcessor instanceof PageDocumentPostProcessor) { |
280
|
|
|
throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154); |
281
|
|
|
} |
282
|
|
|
|
283
|
1 |
|
$postProcessor->postProcessPageDocument($pageDocument, $this->page); |
284
|
1 |
|
} |
285
|
1 |
|
} |
286
|
|
|
|
287
|
|
|
/** |
288
|
|
|
* Builds the Solr document for the current page. |
289
|
|
|
* |
290
|
|
|
* @return \Apache_Solr_Document A document representing the page |
291
|
|
|
*/ |
292
|
35 |
|
protected function getPageDocument() |
293
|
|
|
{ |
294
|
35 |
|
$document = GeneralUtility::makeInstance(Apache_Solr_Document::class); |
295
|
|
|
/* @var $document \Apache_Solr_Document */ |
296
|
35 |
|
$site = Site::getSiteByPageId($this->page->id); |
297
|
35 |
|
$pageRecord = $this->page->page; |
298
|
|
|
|
299
|
35 |
|
self::$pageSolrDocumentId = $documentId = Util::getPageDocumentId( |
300
|
35 |
|
$this->page->id, |
301
|
35 |
|
$this->page->type, |
302
|
35 |
|
$this->page->sys_language_uid, |
303
|
35 |
|
$this->getDocumentIdGroups(), |
304
|
35 |
|
$this->getMountPointParameter() |
305
|
35 |
|
); |
306
|
35 |
|
$document->setField('id', $documentId); |
307
|
35 |
|
$document->setField('site', $site->getDomain()); |
308
|
35 |
|
$document->setField('siteHash', $site->getSiteHash()); |
309
|
35 |
|
$document->setField('appKey', 'EXT:solr'); |
310
|
35 |
|
$document->setField('type', 'pages'); |
311
|
|
|
|
312
|
|
|
// system fields |
313
|
35 |
|
$document->setField('uid', $this->page->id); |
314
|
35 |
|
$document->setField('pid', $pageRecord['pid']); |
315
|
|
|
|
316
|
|
|
// variantId |
317
|
35 |
|
$document->setField('variantId', 'pages/' . $this->page->id); |
318
|
|
|
|
319
|
35 |
|
$document->setField('typeNum', $this->page->type); |
320
|
35 |
|
$document->setField('created', $pageRecord['crdate']); |
321
|
35 |
|
$document->setField('changed', $pageRecord['SYS_LASTCHANGED']); |
322
|
|
|
|
323
|
35 |
|
$rootline = $this->getRootLineFieldValue(); |
324
|
35 |
|
$document->setField('rootline', $rootline); |
325
|
|
|
|
326
|
|
|
// access |
327
|
35 |
|
$this->addAccessField($document); |
328
|
35 |
|
$this->addEndtimeField($document, $pageRecord); |
329
|
|
|
|
330
|
|
|
// content |
331
|
35 |
|
$document->setField('title', $this->contentExtractor->getPageTitle()); |
332
|
35 |
|
$document->setField('subTitle', $pageRecord['subtitle']); |
333
|
35 |
|
$document->setField('navTitle', $pageRecord['nav_title']); |
334
|
35 |
|
$document->setField('author', $pageRecord['author']); |
335
|
35 |
|
$document->setField('description', $pageRecord['description']); |
336
|
35 |
|
$document->setField('abstract', $pageRecord['abstract']); |
337
|
35 |
|
$document->setField('content', $this->contentExtractor->getIndexableContent()); |
338
|
35 |
|
$document->setField('url', $this->pageUrl); |
339
|
|
|
|
340
|
35 |
|
$this->addKeywordsField($document, $pageRecord); |
341
|
35 |
|
$this->addTagContentFields($document); |
342
|
|
|
|
343
|
35 |
|
return $document; |
344
|
|
|
} |
345
|
|
|
|
346
|
|
|
/** |
347
|
|
|
* Adds the access field to the document if needed. |
348
|
|
|
* |
349
|
|
|
* @param \Apache_Solr_Document $document |
350
|
|
|
*/ |
351
|
35 |
|
protected function addAccessField(\Apache_Solr_Document $document) |
352
|
|
|
{ |
353
|
35 |
|
$access = (string)$this->pageAccessRootline; |
354
|
35 |
|
if (trim($access) !== '') { |
355
|
8 |
|
$document->setField('access', $access); |
356
|
8 |
|
} |
357
|
35 |
|
} |
358
|
|
|
|
359
|
|
|
/** |
360
|
|
|
* @param $document |
361
|
|
|
* @param $pageRecord |
362
|
|
|
*/ |
363
|
35 |
|
protected function addEndtimeField(\Apache_Solr_Document $document, $pageRecord) |
364
|
|
|
{ |
365
|
35 |
|
if ($this->page->page['endtime']) { |
366
|
|
|
$document->setField('endtime', $pageRecord['endtime']); |
367
|
|
|
} |
368
|
35 |
|
} |
369
|
|
|
|
370
|
|
|
/** |
371
|
|
|
* Adds keywords, multi valued. |
372
|
|
|
* |
373
|
|
|
* @param \Apache_Solr_Document $document |
374
|
|
|
* @param array $pageRecord |
375
|
|
|
*/ |
376
|
35 |
|
protected function addKeywordsField(\Apache_Solr_Document $document, $pageRecord) |
377
|
|
|
{ |
378
|
35 |
|
$keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true)); |
379
|
35 |
|
foreach ($keywords as $keyword) { |
380
|
|
|
$document->addField('keywords', $keyword); |
381
|
35 |
|
} |
382
|
35 |
|
} |
383
|
|
|
|
384
|
|
|
/** |
385
|
|
|
* Add content from several tags like headers, anchors, ... |
386
|
|
|
* |
387
|
|
|
* @param \Apache_Solr_Document $document |
388
|
|
|
*/ |
389
|
35 |
|
protected function addTagContentFields(\Apache_Solr_Document $document) |
390
|
|
|
{ |
391
|
35 |
|
$tagContent = $this->contentExtractor->getTagContent(); |
392
|
35 |
|
foreach ($tagContent as $fieldName => $fieldValue) { |
393
|
|
|
$document->setField($fieldName, $fieldValue); |
394
|
35 |
|
} |
395
|
35 |
|
} |
396
|
|
|
|
397
|
|
|
/** |
398
|
|
|
* Builds the content for the rootline field. |
399
|
|
|
* |
400
|
|
|
* @return string |
401
|
|
|
*/ |
402
|
35 |
|
protected function getRootLineFieldValue() |
403
|
|
|
{ |
404
|
35 |
|
$rootline = $this->page->id; |
405
|
35 |
|
$mountPointParameter = $this->getMountPointParameter(); |
406
|
35 |
|
if ($mountPointParameter !== '') { |
407
|
30 |
|
$rootline .= ',' . $mountPointParameter; |
408
|
30 |
|
} |
409
|
35 |
|
return $rootline; |
410
|
|
|
} |
411
|
|
|
|
412
|
|
|
/** |
413
|
|
|
* Gets a comma separated list of frontend user groups to use for the |
414
|
|
|
* document ID. |
415
|
|
|
* |
416
|
|
|
* @return string A comma separated list of frontend user groups. |
417
|
|
|
*/ |
418
|
35 |
|
protected function getDocumentIdGroups() |
419
|
|
|
{ |
420
|
35 |
|
$groups = $this->pageAccessRootline->getGroups(); |
421
|
35 |
|
$groups = Rootline::cleanGroupArray($groups); |
422
|
|
|
|
423
|
35 |
|
if (empty($groups)) { |
424
|
29 |
|
$groups[] = 0; |
425
|
29 |
|
} |
426
|
|
|
|
427
|
35 |
|
$groups = implode(',', $groups); |
428
|
|
|
|
429
|
35 |
|
return $groups; |
430
|
|
|
} |
431
|
|
|
|
432
|
|
|
// Logging |
433
|
|
|
// TODO replace by a central logger |
434
|
|
|
|
435
|
|
|
/** |
436
|
|
|
* Gets the mount point parameter that is used in the Frontend controller. |
437
|
|
|
* |
438
|
|
|
* @return string |
439
|
|
|
*/ |
440
|
35 |
|
public function getMountPointParameter() |
441
|
|
|
{ |
442
|
35 |
|
return $this->mountPointParameter; |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
// Misc |
446
|
|
|
|
447
|
|
|
/** |
448
|
|
|
* Sets the mount point parameter that is used in the Frontend controller. |
449
|
|
|
* |
450
|
|
|
* @param string $mountPointParameter |
451
|
|
|
*/ |
452
|
5 |
|
public function setMountPointParameter($mountPointParameter) |
453
|
|
|
{ |
454
|
5 |
|
$this->mountPointParameter = (string)$mountPointParameter; |
455
|
5 |
|
} |
456
|
|
|
|
457
|
|
|
/** |
458
|
|
|
* Allows third party extensions to replace or modify the page document |
459
|
|
|
* created by this indexer. |
460
|
|
|
* |
461
|
|
|
* @param \Apache_Solr_Document $pageDocument The page document created by this indexer. |
462
|
|
|
* @return \Apache_Solr_Document An Apache Solr document representing the currently indexed page |
463
|
|
|
*/ |
464
|
35 |
|
protected function substitutePageDocument(\Apache_Solr_Document $pageDocument) |
465
|
|
|
{ |
466
|
35 |
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'])) { |
467
|
30 |
|
return $pageDocument; |
468
|
|
|
} |
469
|
|
|
|
470
|
5 |
|
$indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage(); |
471
|
5 |
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) { |
472
|
5 |
|
$substituteIndexer = GeneralUtility::getUserObj($classReference); |
473
|
|
|
|
474
|
5 |
|
if (!$substituteIndexer instanceof SubstitutePageIndexer) { |
475
|
|
|
$message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class; |
476
|
|
|
throw new \UnexpectedValueException($message, 1310491001); |
477
|
|
|
} |
478
|
|
|
|
479
|
5 |
|
if ($substituteIndexer instanceof PageFieldMappingIndexer) { |
480
|
5 |
|
$substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName); |
481
|
5 |
|
} |
482
|
|
|
|
483
|
5 |
|
$substituteDocument = $substituteIndexer->getPageDocument($pageDocument); |
484
|
5 |
|
if (!$substituteDocument instanceof \Apache_Solr_Document) { |
|
|
|
|
485
|
|
|
$message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Apache_Solr_Document document.'; |
486
|
|
|
throw new \UnexpectedValueException($message, 1310490952); |
487
|
|
|
} |
488
|
5 |
|
$pageDocument = $substituteDocument; |
489
|
5 |
|
} |
490
|
|
|
|
491
|
5 |
|
return $pageDocument; |
492
|
|
|
} |
493
|
|
|
|
494
|
|
|
/** |
495
|
|
|
* Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set. |
496
|
|
|
* |
497
|
|
|
* @return string |
498
|
|
|
*/ |
499
|
5 |
|
protected function getIndexConfigurationNameForCurrentPage() |
500
|
|
|
{ |
501
|
5 |
|
return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages'; |
502
|
|
|
} |
503
|
|
|
|
504
|
|
|
/** |
505
|
|
|
* Allows third party extensions to provide additional documents which |
506
|
|
|
* should be indexed for the current page. |
507
|
|
|
* |
508
|
|
|
* @param \Apache_Solr_Document $pageDocument The main document representing this page. |
509
|
|
|
* @param \Apache_Solr_Document[] $existingDocuments An array of documents already created for this page. |
510
|
|
|
* @return array An array of additional \Apache_Solr_Document objects to index |
511
|
|
|
*/ |
512
|
35 |
|
protected function getAdditionalDocuments(\Apache_Solr_Document $pageDocument, array $existingDocuments) |
513
|
|
|
{ |
514
|
35 |
|
$documents = $existingDocuments; |
515
|
|
|
|
516
|
35 |
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'])) { |
517
|
34 |
|
return $documents; |
518
|
|
|
} |
519
|
|
|
|
520
|
1 |
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['Indexer']['indexPageAddDocuments'] as $classReference) { |
521
|
1 |
|
$additionalIndexer = GeneralUtility::getUserObj($classReference); |
522
|
|
|
|
523
|
1 |
|
if (!$additionalIndexer instanceof AdditionalPageIndexer) { |
524
|
|
|
$message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class; |
525
|
|
|
throw new \UnexpectedValueException($message, 1310491024); |
526
|
|
|
} |
527
|
|
|
|
528
|
1 |
|
$additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents); |
529
|
1 |
|
if (is_array($additionalDocuments)) { |
530
|
1 |
|
$documents = array_merge($documents, $additionalDocuments); |
531
|
1 |
|
} |
532
|
1 |
|
} |
533
|
|
|
|
534
|
1 |
|
return $documents; |
535
|
|
|
} |
536
|
|
|
|
537
|
|
|
/** |
538
|
|
|
* Sends the given documents to the field processing service which takes |
539
|
|
|
* care of manipulating fields as defined in the field's configuration. |
540
|
|
|
* |
541
|
|
|
* @param array $documents An array of documents to manipulate |
542
|
|
|
*/ |
543
|
35 |
|
protected function processDocuments(array $documents) |
544
|
|
|
{ |
545
|
35 |
|
$processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration(); |
546
|
35 |
|
if (count($processingInstructions) > 0) { |
547
|
35 |
|
$service = GeneralUtility::makeInstance(Service::class); |
548
|
35 |
|
$service->processDocuments($documents, $processingInstructions); |
549
|
35 |
|
} |
550
|
35 |
|
} |
551
|
|
|
|
552
|
|
|
/** |
553
|
|
|
* Adds the collected documents to the Solr index. |
554
|
|
|
* |
555
|
|
|
* @param array $documents An array of \Apache_Solr_Document objects. |
556
|
|
|
* @return bool TRUE if documents were added successfully, FALSE otherwise |
557
|
|
|
*/ |
558
|
35 |
|
protected function addDocumentsToSolrIndex(array $documents) |
559
|
|
|
{ |
560
|
35 |
|
$documentsAdded = false; |
561
|
|
|
|
562
|
35 |
|
if (!count($documents)) { |
563
|
|
|
return $documentsAdded; |
564
|
|
|
} |
565
|
|
|
|
566
|
|
|
try { |
567
|
35 |
|
$this->log('Adding ' . count($documents) . ' documents.', 0, $documents); |
568
|
|
|
|
569
|
|
|
// chunk adds by 20 |
570
|
35 |
|
$documentChunks = array_chunk($documents, 20); |
571
|
35 |
|
foreach ($documentChunks as $documentChunk) { |
572
|
35 |
|
$response = $this->solrConnection->addDocuments($documentChunk); |
573
|
|
|
|
574
|
35 |
|
if ($response->getHttpStatus() != 200) { |
575
|
|
|
$transportException = new \Apache_Solr_HttpTransportException($response); |
576
|
|
|
throw new \RuntimeException('Solr Request failed.', 1331834983, $transportException); |
577
|
|
|
} |
578
|
35 |
|
} |
579
|
|
|
|
580
|
35 |
|
$documentsAdded = true; |
581
|
35 |
|
} catch (\Exception $e) { |
582
|
|
|
$this->log($e->getMessage() . ' Error code: ' . $e->getCode(), 2); |
583
|
|
|
|
584
|
|
|
if ($this->configuration->getLoggingExceptions()) { |
585
|
|
|
GeneralUtility::devLog('Exception while adding documents', 'solr', 3, [$e->__toString()]); |
586
|
|
|
} |
587
|
|
|
} |
588
|
|
|
|
589
|
35 |
|
return $documentsAdded; |
590
|
|
|
} |
591
|
|
|
|
592
|
|
|
/** |
593
|
|
|
* Gets the current page's URL. |
594
|
|
|
* |
595
|
|
|
* @return string URL of the current page. |
596
|
|
|
*/ |
597
|
|
|
public function getPageUrl() |
598
|
|
|
{ |
599
|
|
|
return $this->pageUrl; |
600
|
|
|
} |
601
|
|
|
|
602
|
|
|
/** |
603
|
|
|
* Sets the URL to use for the page document. |
604
|
|
|
* |
605
|
|
|
* @param string $url The page's URL. |
606
|
|
|
*/ |
607
|
5 |
|
public function setPageUrl($url) |
608
|
|
|
{ |
609
|
5 |
|
$this->pageUrl = $url; |
610
|
5 |
|
} |
611
|
|
|
|
612
|
|
|
/** |
613
|
|
|
* Gets the page's access rootline. |
614
|
|
|
* |
615
|
|
|
* @return Rootline The page's access rootline |
616
|
|
|
*/ |
617
|
|
|
public function getPageAccessRootline() |
618
|
|
|
{ |
619
|
|
|
return $this->pageAccessRootline; |
620
|
|
|
} |
621
|
|
|
|
622
|
|
|
/** |
623
|
|
|
* Sets the page's access rootline. |
624
|
|
|
* |
625
|
|
|
* @param Rootline $accessRootline The page's access rootline |
626
|
|
|
*/ |
627
|
34 |
|
public function setPageAccessRootline(Rootline $accessRootline) |
628
|
|
|
{ |
629
|
34 |
|
$this->pageAccessRootline = $accessRootline; |
630
|
34 |
|
} |
631
|
|
|
|
632
|
|
|
/** |
633
|
|
|
* Gets the documents that have been sent to Solr |
634
|
|
|
* |
635
|
|
|
* @return array An array of \Apache_Solr_Document objects |
636
|
|
|
*/ |
637
|
5 |
|
public function getDocumentsSentToSolr() |
638
|
|
|
{ |
639
|
5 |
|
return $this->documentsSentToSolr; |
640
|
|
|
} |
641
|
|
|
} |
642
|
|
|
|