dkd-kaehm /
ext-solr
| 1 | <?php |
||||||
| 2 | namespace ApacheSolrForTypo3\Solr; |
||||||
| 3 | |||||||
| 4 | /*************************************************************** |
||||||
| 5 | * Copyright notice |
||||||
| 6 | * |
||||||
| 7 | * (c) 2010-2015 Ingo Renner <[email protected]> |
||||||
| 8 | * All rights reserved |
||||||
| 9 | * |
||||||
| 10 | * This script is part of the TYPO3 project. The TYPO3 project is |
||||||
| 11 | * free software; you can redistribute it and/or modify |
||||||
| 12 | * it under the terms of the GNU General Public License as published by |
||||||
| 13 | * the Free Software Foundation; either version 3 of the License, or |
||||||
| 14 | * (at your option) any later version. |
||||||
| 15 | * |
||||||
| 16 | * The GNU General Public License can be found at |
||||||
| 17 | * http://www.gnu.org/copyleft/gpl.html. |
||||||
| 18 | * |
||||||
| 19 | * This script is distributed in the hope that it will be useful, |
||||||
| 20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
| 21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||||
| 22 | * GNU General Public License for more details. |
||||||
| 23 | * |
||||||
| 24 | * This copyright notice MUST APPEAR in all copies of the script! |
||||||
| 25 | ***************************************************************/ |
||||||
| 26 | |||||||
| 27 | use ApacheSolrForTypo3\Solr\Domain\Index\Queue\GarbageRemover\StrategyFactory; |
||||||
| 28 | use ApacheSolrForTypo3\Solr\IndexQueue\Queue; |
||||||
| 29 | use ApacheSolrForTypo3\Solr\System\TCA\TCAService; |
||||||
| 30 | use TYPO3\CMS\Backend\Utility\BackendUtility; |
||||||
| 31 | use TYPO3\CMS\Core\DataHandling\DataHandler; |
||||||
| 32 | use TYPO3\CMS\Core\SingletonInterface; |
||||||
| 33 | use TYPO3\CMS\Core\Utility\GeneralUtility; |
||||||
| 34 | |||||||
| 35 | /** |
||||||
| 36 | * Garbage Collector, removes related documents from the index when a record is |
||||||
| 37 | * set to hidden, is deleted or is otherwise made invisible to website visitors. |
||||||
| 38 | * |
||||||
| 39 | * Garbage collection will happen for online/LIVE workspaces only. |
||||||
| 40 | * |
||||||
| 41 | * @author Ingo Renner <[email protected]> |
||||||
| 42 | * @author Timo Schmidt <[email protected]> |
||||||
| 43 | */ |
||||||
| 44 | class GarbageCollector extends AbstractDataHandlerListener implements SingletonInterface |
||||||
| 45 | { |
||||||
| 46 | /** |
||||||
| 47 | * @var array |
||||||
| 48 | */ |
||||||
| 49 | protected $trackedRecords = []; |
||||||
| 50 | |||||||
| 51 | /** |
||||||
| 52 | * @var TCAService |
||||||
| 53 | */ |
||||||
| 54 | protected $tcaService; |
||||||
| 55 | |||||||
| 56 | /** |
||||||
| 57 | * GarbageCollector constructor. |
||||||
| 58 | * @param TCAService|null $TCAService |
||||||
| 59 | 11 | */ |
|||||
| 60 | public function __construct(TCAService $TCAService = null) |
||||||
| 61 | 11 | { |
|||||
| 62 | 11 | parent::__construct(); |
|||||
| 63 | 11 | $this->tcaService = $TCAService ?? GeneralUtility::makeInstance(TCAService::class); |
|||||
| 64 | } |
||||||
| 65 | |||||||
| 66 | /** |
||||||
| 67 | * Hooks into TCE main and tracks record deletion commands. |
||||||
| 68 | * |
||||||
| 69 | * @param string $command The command. |
||||||
| 70 | * @param string $table The table the record belongs to |
||||||
| 71 | * @param int $uid The record's uid |
||||||
| 72 | * @param string $value Not used |
||||||
| 73 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
| 74 | * @return void |
||||||
| 75 | 2 | */ |
|||||
| 76 | public function processCmdmap_preProcess($command, $table, $uid, $value, DataHandler $tceMain) |
||||||
|
0 ignored issues
–
show
The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||||
| 77 | { |
||||||
| 78 | // workspaces: collect garbage only for LIVE workspace |
||||||
| 79 | if ($command === 'delete' && $GLOBALS['BE_USER']->workspace == 0) { |
||||||
| 80 | $this->collectGarbage($table, $uid); |
||||||
| 81 | |||||||
| 82 | if ($table === 'pages') { |
||||||
| 83 | $this->getIndexQueue()->deleteItem($table, $uid); |
||||||
| 84 | } |
||||||
| 85 | 2 | } |
|||||
| 86 | 2 | } |
|||||
| 87 | |||||||
| 88 | 2 | /** |
|||||
| 89 | 1 | * Holds the configuration when a recursive page queing should be triggered. |
|||||
| 90 | * |
||||||
| 91 | * @var array |
||||||
| 92 | 2 | * @return array |
|||||
| 93 | */ |
||||||
| 94 | protected function getUpdateSubPagesRecursiveTriggerConfiguration() |
||||||
| 95 | { |
||||||
| 96 | return [ |
||||||
| 97 | // the current page has the field "extendToSubpages" enabled and the field "hidden" was set to 1 |
||||||
| 98 | 'extendToSubpageEnabledAndHiddenFlagWasAdded' => [ |
||||||
| 99 | 'currentState' => ['extendToSubpages' => '1'], |
||||||
| 100 | 3 | 'changeSet' => ['hidden' => '1'] |
|||||
| 101 | ], |
||||||
| 102 | // the current page has the field "hidden" enabled and the field "extendToSubpages" was set to 1 |
||||||
| 103 | 'hiddenIsEnabledAndExtendToSubPagesWasAdded' => [ |
||||||
| 104 | 3 | 'currentState' => ['hidden' => '1'], |
|||||
| 105 | 'changeSet' => ['extendToSubpages' => '1'] |
||||||
| 106 | ] |
||||||
| 107 | ]; |
||||||
| 108 | } |
||||||
| 109 | |||||||
| 110 | /** |
||||||
| 111 | * Tracks down index documents belonging to a particular record or page and |
||||||
| 112 | * removes them from the index and the Index Queue. |
||||||
| 113 | * |
||||||
| 114 | * @param string $table The record's table name. |
||||||
| 115 | * @param int $uid The record's uid. |
||||||
| 116 | * @throws \UnexpectedValueException if a hook object does not implement interface \ApacheSolrForTypo3\Solr\GarbageCollectorPostProcessor |
||||||
| 117 | */ |
||||||
| 118 | public function collectGarbage($table, $uid) |
||||||
| 119 | { |
||||||
| 120 | $garbageRemoverStrategy = StrategyFactory::getByTable($table); |
||||||
| 121 | $garbageRemoverStrategy->removeGarbageOf($table, $uid); |
||||||
| 122 | } |
||||||
| 123 | |||||||
| 124 | 10 | /** |
|||||
| 125 | * @param string $table |
||||||
| 126 | 10 | * @param int $uid |
|||||
| 127 | 10 | * @param array $changedFields |
|||||
| 128 | */ |
||||||
| 129 | protected function deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $changedFields) |
||||||
| 130 | { |
||||||
| 131 | if (!$this->isRecursivePageUpdateRequired($uid, $changedFields)) { |
||||||
| 132 | 10 | return; |
|||||
| 133 | } |
||||||
| 134 | |||||||
| 135 | // get affected subpages when "extendToSubpages" flag was set |
||||||
| 136 | $pagesToDelete = $this->getSubPageIds($uid); |
||||||
| 137 | // we need to at least remove this page |
||||||
| 138 | foreach ($pagesToDelete as $pageToDelete) { |
||||||
| 139 | $this->collectGarbage($table, $pageToDelete); |
||||||
| 140 | } |
||||||
| 141 | } |
||||||
| 142 | |||||||
| 143 | // methods checking whether to trigger garbage collection |
||||||
| 144 | |||||||
| 145 | /** |
||||||
| 146 | * Hooks into TCE main and tracks page move commands. |
||||||
| 147 | 10 | * |
|||||
| 148 | * @param string $command The command. |
||||||
| 149 | * @param string $table The table the record belongs to |
||||||
| 150 | * @param int $uid The record's uid |
||||||
| 151 | * @param string $value Not used |
||||||
| 152 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
| 153 | */ |
||||||
| 154 | public function processCmdmap_postProcess($command, $table, $uid, $value, DataHandler $tceMain) { |
||||||
|
0 ignored issues
–
show
The parameter
$value is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||||
| 155 | // workspaces: collect garbage only for LIVE workspace |
||||||
| 156 | 10 | if ($command === 'move' && $table === 'pages' && $GLOBALS['BE_USER']->workspace == 0) { |
|||||
| 157 | // TODO the below comment is not valid anymore, pid has been removed from doc ID |
||||||
| 158 | // ...still needed? |
||||||
| 159 | 10 | ||||||
| 160 | 4 | // must be removed from index since the pid changes and |
|||||
| 161 | // is part of the Solr document ID |
||||||
| 162 | 4 | $this->collectGarbage($table, $uid); |
|||||
| 163 | 4 | ||||||
| 164 | // now re-index with new properties |
||||||
| 165 | 4 | $this->getIndexQueue()->updateItem($table, $uid); |
|||||
| 166 | } |
||||||
| 167 | 4 | } |
|||||
| 168 | 4 | ||||||
| 169 | /** |
||||||
| 170 | 6 | * Hooks into TCE main and tracks changed records. In this case the current |
|||||
| 171 | * record's values are stored to do a change comparison later on for fields |
||||||
| 172 | * like fe_group. |
||||||
| 173 | * |
||||||
| 174 | * @param array $incomingFields An array of incoming fields, new or changed, not used |
||||||
| 175 | * @param string $table The table the record belongs to |
||||||
| 176 | * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...') |
||||||
| 177 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
| 178 | */ |
||||||
| 179 | public function processDatamap_preProcessFieldArray($incomingFields, $table, $uid, DataHandler $tceMain) |
||||||
|
0 ignored issues
–
show
The parameter
$incomingFields is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||||
| 180 | 6 | { |
|||||
| 181 | if (!is_int($uid)) { |
||||||
| 182 | // a newly created record, skip |
||||||
| 183 | 6 | return; |
|||||
| 184 | } |
||||||
| 185 | |||||||
| 186 | if (Util::isDraftRecord($table, $uid)) { |
||||||
| 187 | // skip workspaces: collect garbage only for LIVE workspace |
||||||
| 188 | 6 | return; |
|||||
| 189 | 6 | } |
|||||
| 190 | |||||||
| 191 | 6 | $hasConfiguredEnableColumnForFeGroup = $this->tcaService->isEnableColumn($table, 'fe_group'); |
|||||
| 192 | if (!$hasConfiguredEnableColumnForFeGroup) { |
||||||
| 193 | 10 | return; |
|||||
| 194 | } |
||||||
| 195 | |||||||
| 196 | $visibilityAffectingFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table); |
||||||
| 197 | $record = (array)BackendUtility::getRecord($table, $uid, $visibilityAffectingFields, '', false); |
||||||
| 198 | // If no record could be found skip further processing |
||||||
| 199 | if (empty($record)) { |
||||||
| 200 | 3 | return; |
|||||
| 201 | } |
||||||
| 202 | 3 | ||||||
| 203 | 1 | $record = $this->tcaService->normalizeFrontendGroupField($table, $record); |
|||||
| 204 | |||||||
| 205 | // keep previous state of important fields for later comparison |
||||||
| 206 | 2 | $this->trackedRecords[$table][$uid] = $record; |
|||||
| 207 | } |
||||||
| 208 | 2 | ||||||
| 209 | /** |
||||||
| 210 | 2 | * Hooks into TCE Main and watches all record updates. If a change is |
|||||
| 211 | 2 | * detected that would remove the record from the website, we try to find |
|||||
| 212 | 2 | * related documents and remove them from the index. |
|||||
| 213 | * |
||||||
| 214 | 2 | * @param string $status Status of the current operation, 'new' or 'update' |
|||||
| 215 | * @param string $table The table the record belongs to |
||||||
| 216 | * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...') |
||||||
| 217 | * @param array $fields The record's data, not used |
||||||
| 218 | * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used |
||||||
| 219 | */ |
||||||
| 220 | public function processDatamap_afterDatabaseOperations($status, $table, $uid, array $fields, DataHandler $tceMain) |
||||||
|
0 ignored issues
–
show
The parameter
$tceMain is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||||
| 221 | { |
||||||
| 222 | 10 | if ($status === 'new') { |
|||||
| 223 | // a newly created record, skip |
||||||
| 224 | return; |
||||||
| 225 | 10 | } |
|||||
| 226 | |||||||
| 227 | if (Util::isDraftRecord($table, $uid)) { |
||||||
| 228 | 10 | // skip workspaces: collect garbage only for LIVE workspace |
|||||
| 229 | 10 | return; |
|||||
| 230 | 9 | } |
|||||
| 231 | 9 | ||||||
| 232 | 9 | $record = $this->getRecordWithFieldRelevantForGarbageCollection($table, $uid); |
|||||
| 233 | |||||||
| 234 | // If no record could be found skip further processing |
||||||
| 235 | 9 | if (empty($record)) { |
|||||
| 236 | 9 | return; |
|||||
| 237 | 9 | } |
|||||
| 238 | 9 | ||||||
| 239 | 9 | $record = $this->tcaService->normalizeFrontendGroupField($table, $record); |
|||||
| 240 | $isGarbage = $this->getIsGarbageRecord($table, $record); |
||||||
| 241 | if (!$isGarbage) { |
||||||
| 242 | return; |
||||||
| 243 | 10 | } |
|||||
| 244 | |||||||
| 245 | $this->collectGarbage($table, $uid); |
||||||
| 246 | |||||||
| 247 | if ($table === 'pages') { |
||||||
| 248 | $this->deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $fields); |
||||||
| 249 | } |
||||||
| 250 | } |
||||||
| 251 | |||||||
| 252 | /** |
||||||
| 253 | * Check if a record is getting invisible due to changes in start or endtime. In addition it is checked that the related |
||||||
| 254 | * queue item was marked as indexed. |
||||||
| 255 | * |
||||||
| 256 | * @param string $table |
||||||
| 257 | * @param array $record |
||||||
| 258 | * @return bool |
||||||
| 259 | */ |
||||||
| 260 | protected function isInvisibleByStartOrEndtime($table, $record) |
||||||
| 261 | { |
||||||
| 262 | return ( |
||||||
| 263 | ($this->tcaService->isStartTimeInFuture($table, $record) || $this->tcaService->isEndTimeInPast($table, $record)) && |
||||||
| 264 | $this->isRelatedQueueRecordMarkedAsIndexed($table, $record) |
||||||
| 265 | ); |
||||||
| 266 | } |
||||||
| 267 | |||||||
| 268 | /** |
||||||
| 269 | 2 | * Checks if the related index queue item is indexed. |
|||||
| 270 | * |
||||||
| 271 | * * For tt_content the page from the pid is checked |
||||||
| 272 | * * For all other records the table it's self is checked |
||||||
| 273 | * |
||||||
| 274 | * @param string $table The table name. |
||||||
| 275 | * @param array $record An array with record fields that may affect visibility. |
||||||
| 276 | * @return bool True if the record is marked as being indexed |
||||||
| 277 | */ |
||||||
| 278 | protected function isRelatedQueueRecordMarkedAsIndexed($table, $record) |
||||||
| 279 | 2 | { |
|||||
| 280 | if ($table === 'tt_content') { |
||||||
| 281 | $table = 'pages'; |
||||||
| 282 | $uid = $record['pid']; |
||||||
| 283 | } else { |
||||||
| 284 | $uid = $record['uid']; |
||||||
| 285 | } |
||||||
| 286 | |||||||
| 287 | return $this->getIndexQueue()->containsIndexedItem($table, $uid); |
||||||
| 288 | } |
||||||
| 289 | |||||||
| 290 | 2 | /** |
|||||
| 291 | * @return Queue |
||||||
| 292 | */ |
||||||
| 293 | private function getIndexQueue() |
||||||
| 294 | { |
||||||
| 295 | return GeneralUtility::makeInstance(Queue::class); |
||||||
| 296 | } |
||||||
| 297 | |||||||
| 298 | /** |
||||||
| 299 | * Checks whether the a frontend group field exists for the record and if so |
||||||
| 300 | * whether groups have been removed from accessing the record thus making |
||||||
| 301 | * the record invisible to at least some people. |
||||||
| 302 | 5 | * |
|||||
| 303 | * @param string $table The table name. |
||||||
| 304 | * @param array $record An array with record fields that may affect visibility. |
||||||
| 305 | * @return bool TRUE if frontend groups have been removed from access to the record, FALSE otherwise. |
||||||
| 306 | */ |
||||||
| 307 | protected function hasFrontendGroupsRemoved($table, $record) |
||||||
| 308 | { |
||||||
| 309 | if (!isset($GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'])) { |
||||||
| 310 | 5 | return false; |
|||||
| 311 | } |
||||||
| 312 | |||||||
| 313 | $frontendGroupsField = $GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group']; |
||||||
| 314 | |||||||
| 315 | 5 | $previousGroups = explode(',', (string)$this->trackedRecords[$table][$record['uid']][$frontendGroupsField]); |
|||||
| 316 | $currentGroups = explode(',', (string)$record[$frontendGroupsField]); |
||||||
| 317 | $removedGroups = array_diff($previousGroups, $currentGroups); |
||||||
| 318 | |||||||
| 319 | return (boolean)count($removedGroups); |
||||||
| 320 | 5 | } |
|||||
| 321 | |||||||
| 322 | 5 | /** |
|||||
| 323 | 5 | * Checks whether the page has been excluded from searching. |
|||||
| 324 | 5 | * |
|||||
| 325 | 5 | * @param array $record An array with record fields that may affect visibility. |
|||||
| 326 | 5 | * @return bool True if the page has been excluded from searching, FALSE otherwise |
|||||
| 327 | 5 | */ |
|||||
| 328 | 5 | protected function isPageExcludedFromSearch($record) |
|||||
| 329 | 5 | { |
|||||
| 330 | return (boolean)$record['no_search']; |
||||||
| 331 | } |
||||||
| 332 | |||||||
| 333 | 5 | /** |
|||||
| 334 | * Checks whether a page has a page type that can be indexed. |
||||||
| 335 | * Currently standard pages and mount pages can be indexed. |
||||||
| 336 | * |
||||||
| 337 | 5 | * @param array $record A page record |
|||||
| 338 | * @return bool TRUE if the page can be indexed according to its page type, FALSE otherwise |
||||||
| 339 | */ |
||||||
| 340 | 5 | protected function isIndexablePageType(array $record) |
|||||
| 341 | { |
||||||
| 342 | 5 | return $this->frontendEnvironment->isAllowedPageType($record); |
|||||
| 343 | } |
||||||
| 344 | |||||||
| 345 | /** |
||||||
| 346 | * Determines if a record is garbage and can be deleted. |
||||||
| 347 | * |
||||||
| 348 | * @param string $table |
||||||
| 349 | * @param array $record |
||||||
| 350 | * @return bool |
||||||
| 351 | */ |
||||||
| 352 | protected function getIsGarbageRecord($table, $record):bool |
||||||
| 353 | { |
||||||
| 354 | return $this->tcaService->isHidden($table, $record) || |
||||||
| 355 | 7 | $this->isInvisibleByStartOrEndtime($table, $record) || |
|||||
| 356 | $this->hasFrontendGroupsRemoved($table, $record) || |
||||||
| 357 | ($table === 'pages' && $this->isPageExcludedFromSearch($record)) || |
||||||
| 358 | ($table === 'pages' && !$this->isIndexablePageType($record)); |
||||||
| 359 | } |
||||||
| 360 | |||||||
| 361 | /** |
||||||
| 362 | * Returns a record with all visibility affecting fields. |
||||||
| 363 | 7 | * |
|||||
| 364 | * @param string $table |
||||||
| 365 | * @param int $uid |
||||||
| 366 | * @return array |
||||||
| 367 | */ |
||||||
| 368 | 7 | protected function getRecordWithFieldRelevantForGarbageCollection($table, $uid):array |
|||||
| 369 | { |
||||||
| 370 | $garbageCollectionRelevantFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table); |
||||||
| 371 | $record = (array)BackendUtility::getRecord($table, $uid, $garbageCollectionRelevantFields, '', false); |
||||||
| 372 | return $record; |
||||||
| 373 | 7 | } |
|||||
| 374 | } |
||||||
| 375 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.