Issues (202)

Classes/GarbageCollector.php (7 issues)

Severity
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2010-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 3 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\Domain\Index\Queue\GarbageRemover\StrategyFactory;
28
use ApacheSolrForTypo3\Solr\IndexQueue\Queue;
29
use ApacheSolrForTypo3\Solr\System\TCA\TCAService;
30
use TYPO3\CMS\Backend\Utility\BackendUtility;
31
use TYPO3\CMS\Core\DataHandling\DataHandler;
32
use TYPO3\CMS\Core\SingletonInterface;
33
use TYPO3\CMS\Core\Utility\GeneralUtility;
34
35
/**
36
 * Garbage Collector, removes related documents from the index when a record is
37
 * set to hidden, is deleted or is otherwise made invisible to website visitors.
38
 *
39
 * Garbage collection will happen for online/LIVE workspaces only.
40
 *
41
 * @author Ingo Renner <[email protected]>
42
 * @author Timo Schmidt <[email protected]>
43
 */
44
class GarbageCollector extends AbstractDataHandlerListener implements SingletonInterface
45
{
46
    /**
47
     * @var array
48
     */
49
    protected $trackedRecords = [];
50
51
    /**
52
     * @var TCAService
53
     */
54
    protected $tcaService;
55
56
    /**
57
     * GarbageCollector constructor.
58
     * @param TCAService|null $TCAService
59 11
     */
60
    public function __construct(TCAService $TCAService = null)
61 11
    {
62 11
        parent::__construct();
63 11
        $this->tcaService = $TCAService ?? GeneralUtility::makeInstance(TCAService::class);
64
    }
65
66
    /**
67
     * Hooks into TCE main and tracks record deletion commands.
68
     *
69
     * @param string $command The command.
70
     * @param string $table The table the record belongs to
71
     * @param int $uid The record's uid
72
     * @param string $value Not used
73
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
74
     * @return void
75 2
     */
76
    public function processCmdmap_preProcess($command, $table, $uid, $value, DataHandler $tceMain)
0 ignored issues
show
The parameter $value is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

76
    public function processCmdmap_preProcess($command, $table, $uid, /** @scrutinizer ignore-unused */ $value, DataHandler $tceMain)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
The parameter $tceMain is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

76
    public function processCmdmap_preProcess($command, $table, $uid, $value, /** @scrutinizer ignore-unused */ DataHandler $tceMain)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
77
    {
78
        // workspaces: collect garbage only for LIVE workspace
79
        if ($command === 'delete' && $GLOBALS['BE_USER']->workspace == 0) {
80
            $this->collectGarbage($table, $uid);
81
82
            if ($table === 'pages') {
83
                $this->getIndexQueue()->deleteItem($table, $uid);
84
            }
85 2
        }
86 2
    }
87
88 2
    /**
89 1
     * Holds the configuration when a recursive page queing should be triggered.
90
     *
91
     * @var array
92 2
     * @return array
93
     */
94
    protected function getUpdateSubPagesRecursiveTriggerConfiguration()
95
    {
96
        return [
97
            // the current page has the field "extendToSubpages" enabled and the field "hidden" was set to 1
98
            'extendToSubpageEnabledAndHiddenFlagWasAdded' => [
99
                'currentState' =>  ['extendToSubpages' => '1'],
100 3
                'changeSet' => ['hidden' => '1']
101
            ],
102
            // the current page has the field "hidden" enabled and the field "extendToSubpages" was set to 1
103
            'hiddenIsEnabledAndExtendToSubPagesWasAdded' => [
104 3
                'currentState' =>  ['hidden' => '1'],
105
                'changeSet' => ['extendToSubpages' => '1']
106
            ]
107
        ];
108
    }
109
110
    /**
111
     * Tracks down index documents belonging to a particular record or page and
112
     * removes them from the index and the Index Queue.
113
     *
114
     * @param string $table The record's table name.
115
     * @param int $uid The record's uid.
116
     * @throws \UnexpectedValueException if a hook object does not implement interface \ApacheSolrForTypo3\Solr\GarbageCollectorPostProcessor
117
     */
118
    public function collectGarbage($table, $uid)
119
    {
120
        $garbageRemoverStrategy = StrategyFactory::getByTable($table);
121
        $garbageRemoverStrategy->removeGarbageOf($table, $uid);
122
    }
123
124 10
    /**
125
     * @param string $table
126 10
     * @param int $uid
127 10
     * @param array $changedFields
128
     */
129
    protected function deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $changedFields)
130
    {
131
        if (!$this->isRecursivePageUpdateRequired($uid, $changedFields)) {
132 10
            return;
133
        }
134
135
        // get affected subpages when "extendToSubpages" flag was set
136
        $pagesToDelete = $this->getSubPageIds($uid);
137
        // we need to at least remove this page
138
        foreach ($pagesToDelete as $pageToDelete) {
139
            $this->collectGarbage($table, $pageToDelete);
140
        }
141
    }
142
143
    // methods checking whether to trigger garbage collection
144
145
    /**
146
     * Hooks into TCE main and tracks page move commands.
147 10
     *
148
     * @param string $command The command.
149
     * @param string $table The table the record belongs to
150
     * @param int $uid The record's uid
151
     * @param string $value Not used
152
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
153
     */
154
    public function processCmdmap_postProcess($command, $table, $uid, $value, DataHandler $tceMain) {
0 ignored issues
show
The parameter $value is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

154
    public function processCmdmap_postProcess($command, $table, $uid, /** @scrutinizer ignore-unused */ $value, DataHandler $tceMain) {

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
The parameter $tceMain is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

154
    public function processCmdmap_postProcess($command, $table, $uid, $value, /** @scrutinizer ignore-unused */ DataHandler $tceMain) {

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
155
        // workspaces: collect garbage only for LIVE workspace
156 10
        if ($command === 'move' && $table === 'pages' && $GLOBALS['BE_USER']->workspace == 0) {
157
            // TODO the below comment is not valid anymore, pid has been removed from doc ID
158
            // ...still needed?
159 10
160 4
            // must be removed from index since the pid changes and
161
            // is part of the Solr document ID
162 4
            $this->collectGarbage($table, $uid);
163 4
164
            // now re-index with new properties
165 4
            $this->getIndexQueue()->updateItem($table, $uid);
166
        }
167 4
    }
168 4
169
    /**
170 6
     * Hooks into TCE main and tracks changed records. In this case the current
171
     * record's values are stored to do a change comparison later on for fields
172
     * like fe_group.
173
     *
174
     * @param array $incomingFields An array of incoming fields, new or changed, not used
175
     * @param string $table The table the record belongs to
176
     * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
177
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
178
     */
179
    public function processDatamap_preProcessFieldArray($incomingFields, $table, $uid, DataHandler $tceMain)
0 ignored issues
show
The parameter $incomingFields is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

179
    public function processDatamap_preProcessFieldArray(/** @scrutinizer ignore-unused */ $incomingFields, $table, $uid, DataHandler $tceMain)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
The parameter $tceMain is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

179
    public function processDatamap_preProcessFieldArray($incomingFields, $table, $uid, /** @scrutinizer ignore-unused */ DataHandler $tceMain)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
180 6
    {
181
        if (!is_int($uid)) {
182
            // a newly created record, skip
183 6
            return;
184
        }
185
186
        if (Util::isDraftRecord($table, $uid)) {
187
            // skip workspaces: collect garbage only for LIVE workspace
188 6
            return;
189 6
        }
190
191 6
        $hasConfiguredEnableColumnForFeGroup = $this->tcaService->isEnableColumn($table, 'fe_group');
192
        if (!$hasConfiguredEnableColumnForFeGroup) {
193 10
            return;
194
        }
195
196
        $visibilityAffectingFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
197
        $record = (array)BackendUtility::getRecord($table, $uid, $visibilityAffectingFields, '', false);
198
        // If no record could be found skip further processing
199
        if (empty($record)) {
200 3
            return;
201
        }
202 3
203 1
        $record = $this->tcaService->normalizeFrontendGroupField($table, $record);
204
205
        // keep previous state of important fields for later comparison
206 2
        $this->trackedRecords[$table][$uid] = $record;
207
    }
208 2
209
    /**
210 2
     * Hooks into TCE Main and watches all record updates. If a change is
211 2
     * detected that would remove the record from the website, we try to find
212 2
     * related documents and remove them from the index.
213
     *
214 2
     * @param string $status Status of the current operation, 'new' or 'update'
215
     * @param string $table The table the record belongs to
216
     * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
217
     * @param array $fields The record's data, not used
218
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
219
     */
220
    public function processDatamap_afterDatabaseOperations($status, $table, $uid, array $fields, DataHandler $tceMain)
0 ignored issues
show
The parameter $tceMain is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

220
    public function processDatamap_afterDatabaseOperations($status, $table, $uid, array $fields, /** @scrutinizer ignore-unused */ DataHandler $tceMain)

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
221
    {
222 10
        if ($status === 'new') {
223
            // a newly created record, skip
224
            return;
225 10
        }
226
227
        if (Util::isDraftRecord($table, $uid)) {
228 10
            // skip workspaces: collect garbage only for LIVE workspace
229 10
            return;
230 9
        }
231 9
232 9
        $record = $this->getRecordWithFieldRelevantForGarbageCollection($table, $uid);
233
234
        // If no record could be found skip further processing
235 9
        if (empty($record)) {
236 9
            return;
237 9
        }
238 9
239 9
        $record = $this->tcaService->normalizeFrontendGroupField($table, $record);
240
        $isGarbage = $this->getIsGarbageRecord($table, $record);
241
        if (!$isGarbage) {
242
            return;
243 10
        }
244
245
        $this->collectGarbage($table, $uid);
246
247
        if ($table === 'pages') {
248
            $this->deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $fields);
249
        }
250
    }
251
252
    /**
253
     * Check if a record is getting invisible due to changes in start or endtime. In addition it is checked that the related
254
     * queue item was marked as indexed.
255
     *
256
     * @param string $table
257
     * @param array $record
258
     * @return bool
259
     */
260
    protected function isInvisibleByStartOrEndtime($table, $record)
261
    {
262
        return (
263
            ($this->tcaService->isStartTimeInFuture($table, $record) || $this->tcaService->isEndTimeInPast($table, $record)) &&
264
            $this->isRelatedQueueRecordMarkedAsIndexed($table, $record)
265
        );
266
    }
267
268
    /**
269 2
     * Checks if the related index queue item is indexed.
270
     *
271
     * * For tt_content the page from the pid is checked
272
     * * For all other records the table it's self is checked
273
     *
274
     * @param string $table The table name.
275
     * @param array $record An array with record fields that may affect visibility.
276
     * @return bool True if the record is marked as being indexed
277
     */
278
    protected function isRelatedQueueRecordMarkedAsIndexed($table, $record)
279 2
    {
280
        if ($table === 'tt_content') {
281
            $table = 'pages';
282
            $uid = $record['pid'];
283
        } else {
284
            $uid = $record['uid'];
285
        }
286
287
        return $this->getIndexQueue()->containsIndexedItem($table, $uid);
288
    }
289
290 2
    /**
291
     * @return Queue
292
     */
293
    private function getIndexQueue()
294
    {
295
        return GeneralUtility::makeInstance(Queue::class);
296
    }
297
298
    /**
299
     * Checks whether the a frontend group field exists for the record and if so
300
     * whether groups have been removed from accessing the record thus making
301
     * the record invisible to at least some people.
302 5
     *
303
     * @param string $table The table name.
304
     * @param array $record An array with record fields that may affect visibility.
305
     * @return bool TRUE if frontend groups have been removed from access to the record, FALSE otherwise.
306
     */
307
    protected function hasFrontendGroupsRemoved($table, $record)
308
    {
309
        if (!isset($GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'])) {
310 5
            return false;
311
        }
312
313
        $frontendGroupsField = $GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'];
314
315 5
        $previousGroups = explode(',', (string)$this->trackedRecords[$table][$record['uid']][$frontendGroupsField]);
316
        $currentGroups = explode(',', (string)$record[$frontendGroupsField]);
317
        $removedGroups = array_diff($previousGroups, $currentGroups);
318
319
        return (boolean)count($removedGroups);
320 5
    }
321
322 5
    /**
323 5
     * Checks whether the page has been excluded from searching.
324 5
     *
325 5
     * @param array $record An array with record fields that may affect visibility.
326 5
     * @return bool True if the page has been excluded from searching, FALSE otherwise
327 5
     */
328 5
    protected function isPageExcludedFromSearch($record)
329 5
    {
330
        return (boolean)$record['no_search'];
331
    }
332
333 5
    /**
334
     * Checks whether a page has a page type that can be indexed.
335
     * Currently standard pages and mount pages can be indexed.
336
     *
337 5
     * @param array $record A page record
338
     * @return bool TRUE if the page can be indexed according to its page type, FALSE otherwise
339
     */
340 5
    protected function isIndexablePageType(array $record)
341
    {
342 5
        return $this->frontendEnvironment->isAllowedPageType($record);
343
    }
344
345
    /**
346
     * Determines if a record is garbage and can be deleted.
347
     *
348
     * @param string $table
349
     * @param array $record
350
     * @return bool
351
     */
352
    protected function getIsGarbageRecord($table, $record):bool
353
    {
354
        return $this->tcaService->isHidden($table, $record) ||
355 7
                $this->isInvisibleByStartOrEndtime($table, $record) ||
356
                $this->hasFrontendGroupsRemoved($table, $record) ||
357
                ($table === 'pages' && $this->isPageExcludedFromSearch($record)) ||
358
                ($table === 'pages' && !$this->isIndexablePageType($record));
359
    }
360
361
    /**
362
     * Returns a record with all visibility affecting fields.
363 7
     *
364
     * @param string $table
365
     * @param int $uid
366
     * @return array
367
     */
368 7
    protected function getRecordWithFieldRelevantForGarbageCollection($table, $uid):array
369
    {
370
        $garbageCollectionRelevantFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
371
        $record = (array)BackendUtility::getRecord($table, $uid, $garbageCollectionRelevantFields, '', false);
372
        return $record;
373 7
    }
374
}
375