1 | <?php |
||||||
2 | namespace ApacheSolrForTypo3\Solr; |
||||||
3 | |||||||
4 | /*************************************************************** |
||||||
5 | * Copyright notice |
||||||
6 | * |
||||||
7 | * (c) 2010-2015 Ingo Renner <[email protected]> |
||||||
8 | * All rights reserved |
||||||
9 | * |
||||||
10 | * This script is part of the TYPO3 project. The TYPO3 project is |
||||||
11 | * free software; you can redistribute it and/or modify |
||||||
12 | * it under the terms of the GNU General Public License as published by |
||||||
13 | * the Free Software Foundation; either version 3 of the License, or |
||||||
14 | * (at your option) any later version. |
||||||
15 | * |
||||||
16 | * The GNU General Public License can be found at |
||||||
17 | * http://www.gnu.org/copyleft/gpl.html. |
||||||
18 | * |
||||||
19 | * This script is distributed in the hope that it will be useful, |
||||||
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
22 | * GNU General Public License for more details. |
||||||
23 | * |
||||||
24 | * This copyright notice MUST APPEAR in all copies of the script! |
||||||
25 | ***************************************************************/ |
||||||
26 | |||||||
27 | use ApacheSolrForTypo3\Solr\Domain\Index\Queue\GarbageRemover\StrategyFactory; |
||||||
28 | use ApacheSolrForTypo3\Solr\IndexQueue\Queue; |
||||||
29 | use ApacheSolrForTypo3\Solr\System\TCA\TCAService; |
||||||
30 | use TYPO3\CMS\Backend\Utility\BackendUtility; |
||||||
31 | use TYPO3\CMS\Core\DataHandling\DataHandler; |
||||||
32 | use TYPO3\CMS\Core\SingletonInterface; |
||||||
33 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||||
34 | |||||||
35 | /** |
||||||
36 | * Garbage Collector, removes related documents from the index when a record is |
||||||
37 | * set to hidden, is deleted or is otherwise made invisible to website visitors. |
||||||
38 | * |
||||||
39 | * Garbage collection will happen for online/LIVE workspaces only. |
||||||
40 | * |
||||||
41 | * @author Ingo Renner <[email protected]> |
||||||
42 | * @author Timo Schmidt <[email protected]> |
||||||
43 | */ |
||||||
44 | class GarbageCollector extends AbstractDataHandlerListener implements SingletonInterface |
||||||
45 | { |
||||||
46 | /** |
||||||
47 | * @var array |
||||||
48 | */ |
||||||
49 | protected $trackedRecords = []; |
||||||
50 | |||||||
51 | /** |
||||||
52 | * @var TCAService |
||||||
53 | */ |
||||||
54 | protected $tcaService; |
||||||
55 | |||||||
56 | /** |
||||||
57 | * GarbageCollector constructor. |
||||||
58 | * @param TCAService|null $TCAService |
||||||
59 | 11 | */ |
|||||
60 | public function __construct(TCAService $TCAService = null) |
||||||
61 | 11 | { |
|||||
62 | 11 | parent::__construct(); |
|||||
63 | 11 | $this->tcaService = $TCAService ?? GeneralUtility::makeInstance(TCAService::class); |
|||||
64 | } |
||||||
65 | |||||||
66 | /** |
||||||
67 | * Hooks into TCE main and tracks record deletion commands. |
||||||
68 | * |
||||||
69 | * @param string $command The command. |
||||||
70 | * @param string $table The table the record belongs to |
||||||
71 | * @param int $uid The record's uid |
||||||
72 | * @param string $value Not used |
||||||
73 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
74 | * @return void |
||||||
75 | 2 | */ |
|||||
76 | public function processCmdmap_preProcess($command, $table, $uid, $value, DataHandler $tceMain) |
||||||
0 ignored issues
–
show
The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||||
77 | { |
||||||
78 | // workspaces: collect garbage only for LIVE workspace |
||||||
79 | if ($command === 'delete' && $GLOBALS['BE_USER']->workspace == 0) { |
||||||
80 | $this->collectGarbage($table, $uid); |
||||||
81 | |||||||
82 | if ($table === 'pages') { |
||||||
83 | $this->getIndexQueue()->deleteItem($table, $uid); |
||||||
84 | } |
||||||
85 | 2 | } |
|||||
86 | 2 | } |
|||||
87 | |||||||
88 | 2 | /** |
|||||
89 | 1 | * Holds the configuration when a recursive page queing should be triggered. |
|||||
90 | * |
||||||
91 | * @var array |
||||||
92 | 2 | * @return array |
|||||
93 | */ |
||||||
94 | protected function getUpdateSubPagesRecursiveTriggerConfiguration() |
||||||
95 | { |
||||||
96 | return [ |
||||||
97 | // the current page has the field "extendToSubpages" enabled and the field "hidden" was set to 1 |
||||||
98 | 'extendToSubpageEnabledAndHiddenFlagWasAdded' => [ |
||||||
99 | 'currentState' => ['extendToSubpages' => '1'], |
||||||
100 | 3 | 'changeSet' => ['hidden' => '1'] |
|||||
101 | ], |
||||||
102 | // the current page has the field "hidden" enabled and the field "extendToSubpages" was set to 1 |
||||||
103 | 'hiddenIsEnabledAndExtendToSubPagesWasAdded' => [ |
||||||
104 | 3 | 'currentState' => ['hidden' => '1'], |
|||||
105 | 'changeSet' => ['extendToSubpages' => '1'] |
||||||
106 | ] |
||||||
107 | ]; |
||||||
108 | } |
||||||
109 | |||||||
110 | /** |
||||||
111 | * Tracks down index documents belonging to a particular record or page and |
||||||
112 | * removes them from the index and the Index Queue. |
||||||
113 | * |
||||||
114 | * @param string $table The record's table name. |
||||||
115 | * @param int $uid The record's uid. |
||||||
116 | * @throws \UnexpectedValueException if a hook object does not implement interface \ApacheSolrForTypo3\Solr\GarbageCollectorPostProcessor |
||||||
117 | */ |
||||||
118 | public function collectGarbage($table, $uid) |
||||||
119 | { |
||||||
120 | $garbageRemoverStrategy = StrategyFactory::getByTable($table); |
||||||
121 | $garbageRemoverStrategy->removeGarbageOf($table, $uid); |
||||||
122 | } |
||||||
123 | |||||||
124 | 10 | /** |
|||||
125 | * @param string $table |
||||||
126 | 10 | * @param int $uid |
|||||
127 | 10 | * @param array $changedFields |
|||||
128 | */ |
||||||
129 | protected function deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $changedFields) |
||||||
130 | { |
||||||
131 | if (!$this->isRecursivePageUpdateRequired($uid, $changedFields)) { |
||||||
132 | 10 | return; |
|||||
133 | } |
||||||
134 | |||||||
135 | // get affected subpages when "extendToSubpages" flag was set |
||||||
136 | $pagesToDelete = $this->getSubPageIds($uid); |
||||||
137 | // we need to at least remove this page |
||||||
138 | foreach ($pagesToDelete as $pageToDelete) { |
||||||
139 | $this->collectGarbage($table, $pageToDelete); |
||||||
140 | } |
||||||
141 | } |
||||||
142 | |||||||
143 | // methods checking whether to trigger garbage collection |
||||||
144 | |||||||
145 | /** |
||||||
146 | * Hooks into TCE main and tracks page move commands. |
||||||
147 | 10 | * |
|||||
148 | * @param string $command The command. |
||||||
149 | * @param string $table The table the record belongs to |
||||||
150 | * @param int $uid The record's uid |
||||||
151 | * @param string $value Not used |
||||||
152 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
153 | */ |
||||||
154 | public function processCmdmap_postProcess($command, $table, $uid, $value, DataHandler $tceMain) { |
||||||
0 ignored issues
–
show
The parameter
$value is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||||
155 | // workspaces: collect garbage only for LIVE workspace |
||||||
156 | 10 | if ($command === 'move' && $table === 'pages' && $GLOBALS['BE_USER']->workspace == 0) { |
|||||
157 | // TODO the below comment is not valid anymore, pid has been removed from doc ID |
||||||
158 | // ...still needed? |
||||||
159 | 10 | ||||||
160 | 4 | // must be removed from index since the pid changes and |
|||||
161 | // is part of the Solr document ID |
||||||
162 | 4 | $this->collectGarbage($table, $uid); |
|||||
163 | 4 | ||||||
164 | // now re-index with new properties |
||||||
165 | 4 | $this->getIndexQueue()->updateItem($table, $uid); |
|||||
166 | } |
||||||
167 | 4 | } |
|||||
168 | 4 | ||||||
169 | /** |
||||||
170 | 6 | * Hooks into TCE main and tracks changed records. In this case the current |
|||||
171 | * record's values are stored to do a change comparison later on for fields |
||||||
172 | * like fe_group. |
||||||
173 | * |
||||||
174 | * @param array $incomingFields An array of incoming fields, new or changed, not used |
||||||
175 | * @param string $table The table the record belongs to |
||||||
176 | * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...') |
||||||
177 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
178 | */ |
||||||
179 | public function processDatamap_preProcessFieldArray($incomingFields, $table, $uid, DataHandler $tceMain) |
||||||
0 ignored issues
–
show
The parameter
$incomingFields is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||||
180 | 6 | { |
|||||
181 | if (!is_int($uid)) { |
||||||
182 | // a newly created record, skip |
||||||
183 | 6 | return; |
|||||
184 | } |
||||||
185 | |||||||
186 | if (Util::isDraftRecord($table, $uid)) { |
||||||
187 | // skip workspaces: collect garbage only for LIVE workspace |
||||||
188 | 6 | return; |
|||||
189 | 6 | } |
|||||
190 | |||||||
191 | 6 | $hasConfiguredEnableColumnForFeGroup = $this->tcaService->isEnableColumn($table, 'fe_group'); |
|||||
192 | if (!$hasConfiguredEnableColumnForFeGroup) { |
||||||
193 | 10 | return; |
|||||
194 | } |
||||||
195 | |||||||
196 | $visibilityAffectingFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table); |
||||||
197 | $record = (array)BackendUtility::getRecord($table, $uid, $visibilityAffectingFields, '', false); |
||||||
198 | // If no record could be found skip further processing |
||||||
199 | if (empty($record)) { |
||||||
200 | 3 | return; |
|||||
201 | } |
||||||
202 | 3 | ||||||
203 | 1 | $record = $this->tcaService->normalizeFrontendGroupField($table, $record); |
|||||
204 | |||||||
205 | // keep previous state of important fields for later comparison |
||||||
206 | 2 | $this->trackedRecords[$table][$uid] = $record; |
|||||
207 | } |
||||||
208 | 2 | ||||||
209 | /** |
||||||
210 | 2 | * Hooks into TCE Main and watches all record updates. If a change is |
|||||
211 | 2 | * detected that would remove the record from the website, we try to find |
|||||
212 | 2 | * related documents and remove them from the index. |
|||||
213 | * |
||||||
214 | 2 | * @param string $status Status of the current operation, 'new' or 'update' |
|||||
215 | * @param string $table The table the record belongs to |
||||||
216 | * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...') |
||||||
217 | * @param array $fields The record's data, not used |
||||||
218 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
219 | */ |
||||||
220 | public function processDatamap_afterDatabaseOperations($status, $table, $uid, array $fields, DataHandler $tceMain) |
||||||
0 ignored issues
–
show
The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||||
221 | { |
||||||
222 | 10 | if ($status === 'new') { |
|||||
223 | // a newly created record, skip |
||||||
224 | return; |
||||||
225 | 10 | } |
|||||
226 | |||||||
227 | if (Util::isDraftRecord($table, $uid)) { |
||||||
228 | 10 | // skip workspaces: collect garbage only for LIVE workspace |
|||||
229 | 10 | return; |
|||||
230 | 9 | } |
|||||
231 | 9 | ||||||
232 | 9 | $record = $this->getRecordWithFieldRelevantForGarbageCollection($table, $uid); |
|||||
233 | |||||||
234 | // If no record could be found skip further processing |
||||||
235 | 9 | if (empty($record)) { |
|||||
236 | 9 | return; |
|||||
237 | 9 | } |
|||||
238 | 9 | ||||||
239 | 9 | $record = $this->tcaService->normalizeFrontendGroupField($table, $record); |
|||||
240 | $isGarbage = $this->getIsGarbageRecord($table, $record); |
||||||
241 | if (!$isGarbage) { |
||||||
242 | return; |
||||||
243 | 10 | } |
|||||
244 | |||||||
245 | $this->collectGarbage($table, $uid); |
||||||
246 | |||||||
247 | if ($table === 'pages') { |
||||||
248 | $this->deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $fields); |
||||||
249 | } |
||||||
250 | } |
||||||
251 | |||||||
252 | /** |
||||||
253 | * Check if a record is getting invisible due to changes in start or endtime. In addition it is checked that the related |
||||||
254 | * queue item was marked as indexed. |
||||||
255 | * |
||||||
256 | * @param string $table |
||||||
257 | * @param array $record |
||||||
258 | * @return bool |
||||||
259 | */ |
||||||
260 | protected function isInvisibleByStartOrEndtime($table, $record) |
||||||
261 | { |
||||||
262 | return ( |
||||||
263 | ($this->tcaService->isStartTimeInFuture($table, $record) || $this->tcaService->isEndTimeInPast($table, $record)) && |
||||||
264 | $this->isRelatedQueueRecordMarkedAsIndexed($table, $record) |
||||||
265 | ); |
||||||
266 | } |
||||||
267 | |||||||
268 | /** |
||||||
269 | 2 | * Checks if the related index queue item is indexed. |
|||||
270 | * |
||||||
271 | * * For tt_content the page from the pid is checked |
||||||
272 | * * For all other records the table it's self is checked |
||||||
273 | * |
||||||
274 | * @param string $table The table name. |
||||||
275 | * @param array $record An array with record fields that may affect visibility. |
||||||
276 | * @return bool True if the record is marked as being indexed |
||||||
277 | */ |
||||||
278 | protected function isRelatedQueueRecordMarkedAsIndexed($table, $record) |
||||||
279 | 2 | { |
|||||
280 | if ($table === 'tt_content') { |
||||||
281 | $table = 'pages'; |
||||||
282 | $uid = $record['pid']; |
||||||
283 | } else { |
||||||
284 | $uid = $record['uid']; |
||||||
285 | } |
||||||
286 | |||||||
287 | return $this->getIndexQueue()->containsIndexedItem($table, $uid); |
||||||
288 | } |
||||||
289 | |||||||
290 | 2 | /** |
|||||
291 | * @return Queue |
||||||
292 | */ |
||||||
293 | private function getIndexQueue() |
||||||
294 | { |
||||||
295 | return GeneralUtility::makeInstance(Queue::class); |
||||||
296 | } |
||||||
297 | |||||||
298 | /** |
||||||
299 | * Checks whether the a frontend group field exists for the record and if so |
||||||
300 | * whether groups have been removed from accessing the record thus making |
||||||
301 | * the record invisible to at least some people. |
||||||
302 | 5 | * |
|||||
303 | * @param string $table The table name. |
||||||
304 | * @param array $record An array with record fields that may affect visibility. |
||||||
305 | * @return bool TRUE if frontend groups have been removed from access to the record, FALSE otherwise. |
||||||
306 | */ |
||||||
307 | protected function hasFrontendGroupsRemoved($table, $record) |
||||||
308 | { |
||||||
309 | if (!isset($GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'])) { |
||||||
310 | 5 | return false; |
|||||
311 | } |
||||||
312 | |||||||
313 | $frontendGroupsField = $GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group']; |
||||||
314 | |||||||
315 | 5 | $previousGroups = explode(',', (string)$this->trackedRecords[$table][$record['uid']][$frontendGroupsField]); |
|||||
316 | $currentGroups = explode(',', (string)$record[$frontendGroupsField]); |
||||||
317 | $removedGroups = array_diff($previousGroups, $currentGroups); |
||||||
318 | |||||||
319 | return (boolean)count($removedGroups); |
||||||
320 | 5 | } |
|||||
321 | |||||||
322 | 5 | /** |
|||||
323 | 5 | * Checks whether the page has been excluded from searching. |
|||||
324 | 5 | * |
|||||
325 | 5 | * @param array $record An array with record fields that may affect visibility. |
|||||
326 | 5 | * @return bool True if the page has been excluded from searching, FALSE otherwise |
|||||
327 | 5 | */ |
|||||
328 | 5 | protected function isPageExcludedFromSearch($record) |
|||||
329 | 5 | { |
|||||
330 | return (boolean)$record['no_search']; |
||||||
331 | } |
||||||
332 | |||||||
333 | 5 | /** |
|||||
334 | * Checks whether a page has a page type that can be indexed. |
||||||
335 | * Currently standard pages and mount pages can be indexed. |
||||||
336 | * |
||||||
337 | 5 | * @param array $record A page record |
|||||
338 | * @return bool TRUE if the page can be indexed according to its page type, FALSE otherwise |
||||||
339 | */ |
||||||
340 | 5 | protected function isIndexablePageType(array $record) |
|||||
341 | { |
||||||
342 | 5 | return $this->frontendEnvironment->isAllowedPageType($record); |
|||||
343 | } |
||||||
344 | |||||||
345 | /** |
||||||
346 | * Determines if a record is garbage and can be deleted. |
||||||
347 | * |
||||||
348 | * @param string $table |
||||||
349 | * @param array $record |
||||||
350 | * @return bool |
||||||
351 | */ |
||||||
352 | protected function getIsGarbageRecord($table, $record):bool |
||||||
353 | { |
||||||
354 | return $this->tcaService->isHidden($table, $record) || |
||||||
355 | 7 | $this->isInvisibleByStartOrEndtime($table, $record) || |
|||||
356 | $this->hasFrontendGroupsRemoved($table, $record) || |
||||||
357 | ($table === 'pages' && $this->isPageExcludedFromSearch($record)) || |
||||||
358 | ($table === 'pages' && !$this->isIndexablePageType($record)); |
||||||
359 | } |
||||||
360 | |||||||
361 | /** |
||||||
362 | * Returns a record with all visibility affecting fields. |
||||||
363 | 7 | * |
|||||
364 | * @param string $table |
||||||
365 | * @param int $uid |
||||||
366 | * @return array |
||||||
367 | */ |
||||||
368 | 7 | protected function getRecordWithFieldRelevantForGarbageCollection($table, $uid):array |
|||||
369 | { |
||||||
370 | $garbageCollectionRelevantFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table); |
||||||
371 | $record = (array)BackendUtility::getRecord($table, $uid, $garbageCollectionRelevantFields, '', false); |
||||||
372 | return $record; |
||||||
373 | 7 | } |
|||||
374 | } |
||||||
375 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.