Passed
Pull Request — master (#1228)
by Timo
18:39
created

GarbageCollector::processCmdmap_preProcess()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 18
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 5.2596

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 18
ccs 8
cts 14
cp 0.5714
rs 9.2
cc 4
eloc 10
nc 3
nop 5
crap 5.2596
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2010-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\IndexQueue\Queue;
28
use ApacheSolrForTypo3\Solr\System\TCA\TCAService;
29
use TYPO3\CMS\Backend\Utility\BackendUtility;
30
use TYPO3\CMS\Core\DataHandling\DataHandler;
31
use TYPO3\CMS\Core\SingletonInterface;
32
use TYPO3\CMS\Core\Utility\GeneralUtility;
33
34
/**
35
 * Garbage Collector, removes related documents from the index when a record is
36
 * set to hidden, is deleted or is otherwise made invisible to website visitors.
37
 *
38
 * Garbage collection will happen for online/LIVE workspaces only.
39
 *
40
 * @author Ingo Renner <[email protected]>
41
 * @author Timo Schmidt <[email protected]>
42
 */
43
class GarbageCollector extends AbstractDataHandlerListener implements SingletonInterface
44
{
45
    /**
46
     * @var array
47
     */
48
    protected $trackedRecords = [];
49
50
    /**
51
     * @var TCAService
52
     */
53
    protected $tcaService;
54
55
    /**
56
     * GarbageCollector constructor.
57
     * @param TCAService|null $TCAService
58
     */
59 11
    public function __construct(TCAService $TCAService = null)
60
    {
61 11
        parent::__construct();
62 11
        $this->tcaService = is_null($TCAService) ? GeneralUtility::makeInstance(TCAService::class) : $TCAService;
63 11
    }
64
65
    /**
66
     * Hooks into TCE main and tracks record deletion commands.
67
     *
68
     * @param string $command The command.
69
     * @param string $table The table the record belongs to
70
     * @param int $uid The record's uid
71
     * @param string $value Not used
72
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
73
     * @return void
74
     */
75 2
    public function processCmdmap_preProcess(
76
        $command,
77
        $table,
78
        $uid,
79
        /** @noinspection PhpUnusedParameterInspection */
80
        $value,
0 ignored issues
show
Unused Code introduced by
The parameter $value is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
81
        /** @noinspection PhpUnusedParameterInspection */
82
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
83
    ) {
84
        // workspaces: collect garbage only for LIVE workspace
85 2
        if ($command == 'delete' && $GLOBALS['BE_USER']->workspace == 0) {
86 2
            $this->collectGarbage($table, $uid);
87
88 2
            if ($table == 'pages') {
89 1
                $this->getIndexQueue()->deleteItem($table, $uid);
90 1
            }
91 2
        }
92 2
    }
93
94
    /**
95
     * Holds the configuration when a recursive page queing should be triggered.
96
     *
97
     * @var array
98
     * @return array
99
     */
100 3
    protected function getUpdateSubPagesRecursiveTriggerConfiguration()
101
    {
102
        return [
103
            // the current page has the field "extendToSubpages" enabled and the field "hidden" was set to 1
104
            'extendToSubpageEnabledAndHiddenFlagWasAdded' => [
105 3
                'currentState' =>  ['extendToSubpages' => '1'],
106 3
                'changeSet' => ['hidden' => '1']
107 3
            ],
108
            // the current page has the field "hidden" enabled and the field "extendToSubpages" was set to 1
109
            'hiddenIsEnabledAndExtendToSubPagesWasAdded' => [
110 3
                'currentState' =>  ['hidden' => '1'],
111 3
                'changeSet' => ['extendToSubpages' => '1']
112 3
            ]
113 3
        ];
114
    }
115
116
    /**
117
     * Tracks down index documents belonging to a particular record or page and
118
     * removes them from the index and the Index Queue.
119
     *
120
     * @param string $table The record's table name.
121
     * @param int $uid The record's uid.
122
     * @throws \UnexpectedValueException if a hook object does not implement interface \ApacheSolrForTypo3\Solr\GarbageCollectorPostProcessor
123
     */
124 10
    public function collectGarbage($table, $uid)
125
    {
126 10
        if ($table == 'tt_content' || $table == 'pages' || $table == 'pages_language_overlay') {
127 10
            $this->collectPageGarbage($table, $uid);
128 10
        } else {
129
            $this->collectRecordGarbage($table, $uid);
130
        }
131
132 10
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['postProcessGarbageCollector'])) {
133
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['postProcessGarbageCollector'] as $classReference) {
134
                $garbageCollectorPostProcessor = GeneralUtility::getUserObj($classReference);
135
136
                if ($garbageCollectorPostProcessor instanceof GarbageCollectorPostProcessor) {
137
                    $garbageCollectorPostProcessor->postProcessGarbageCollector($table,
138
                        $uid);
139
                } else {
140
                    throw new \UnexpectedValueException(
141
                        get_class($garbageCollectorPostProcessor) . ' must implement interface ' . GarbageCollectorPostProcessor::class,
142
                        1345807460
143
                    );
144
                }
145
            }
146
        }
147 10
    }
148
149
    /**
150
     * Tracks down index documents belonging to a particular page and
151
     * removes them from the index and the Index Queue.
152
     *
153
     * @param string $table The record's table name.
154
     * @param int $uid The record's uid.
155
     */
156 10
    protected function collectPageGarbage($table, $uid)
157
    {
158
        switch ($table) {
159 10
            case 'tt_content':
160 4
                $contentElement = BackendUtility::getRecord('tt_content', $uid, 'uid, pid', '', false);
161
162 4
                $table = 'pages';
163 4
                $uid = $contentElement['pid'];
164
165 4
                $this->deleteIndexDocuments($table, $uid);
166
                // only a content element was removed, now update/re-index the page
167 4
                $this->getIndexQueue()->updateItem($table, $uid);
168 4
                break;
169 6
            case 'pages_language_overlay':
170
                $pageOverlayRecord = BackendUtility::getRecord('pages_language_overlay', $uid, 'uid, pid', '', false);
171
172
                $table = 'pages';
173
                $uid = $pageOverlayRecord['pid'];
174
175
                $this->deleteIndexDocuments($table, $uid);
176
                // only a page overlay was removed, now update/re-index the page
177
                $this->getIndexQueue()->updateItem($table, $uid);
178
                break;
179 6
            case 'pages':
180
181 6
                $this->deleteIndexDocuments($table, $uid);
182 6
                $this->getIndexQueue()->deleteItem($table, $uid);
183
184 6
                break;
185
        }
186 10
    }
187
188
    /**
189
     * @param string $table
190
     * @param int $uid
191
     * @param array $changedFields
192
     */
193 3
    protected function deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $changedFields)
194
    {
195 3
        if (!$this->isRecursivePageUpdateRequired($uid, $changedFields)) {
196 1
            return;
197
        }
198
199 2
        $indexQueue = $this->getIndexQueue();
200
        // get affected subpages when "extendToSubpages" flag was set
201 2
        $pagesToDelete = $this->getSubPageIds($uid);
202
        // we need to at least remove this page
203 2
        foreach ($pagesToDelete as $pageToDelete) {
204 2
            $this->deleteIndexDocuments($table, $pageToDelete);
205 2
            $indexQueue->deleteItem($table, $pageToDelete);
206 2
        }
207 2
    }
208
209
    /**
210
     * Deletes index documents for a given record identification.
211
     *
212
     * @param string $table The record's table name.
213
     * @param int $uid The record's uid.
214
     */
215 10
    protected function deleteIndexDocuments($table, $uid)
216
    {
217
        /** @var $connectionManager ConnectionManager */
218 10
        $connectionManager = GeneralUtility::makeInstance(ConnectionManager::class);
219
220
        // record can be indexed for multiple sites
221 10
        $indexQueueItems = $this->getIndexQueue()->getItems($table, $uid);
222 10
        foreach ($indexQueueItems as $indexQueueItem) {
223 9
            $site = $indexQueueItem->getSite();
224 9
            $solrConfiguration = $site->getSolrConfiguration();
225 9
            $enableCommitsSetting = $solrConfiguration->getEnableCommits();
226
227
            // a site can have multiple connections (cores / languages)
228 9
            $solrConnections = $connectionManager->getConnectionsBySite($site);
229 9
            foreach ($solrConnections as $solr) {
230 9
                $solr->deleteByQuery('type:' . $table . ' AND uid:' . intval($uid));
231 9
                if ($enableCommitsSetting) {
232 9
                    $solr->commit(false, false, false);
233 9
                }
234 9
            }
235 10
        }
236 10
    }
237
238
    /**
239
     * Tracks down index documents belonging to a particular record and
240
     * removes them from the index and the Index Queue.
241
     *
242
     * @param string $table The record's table name.
243
     * @param int $uid The record's uid.
244
     */
245
    protected function collectRecordGarbage($table, $uid)
246
    {
247
        $this->deleteIndexDocuments($table, $uid);
248
        $this->getIndexQueue()->deleteItem($table, $uid);
249
    }
250
251
    // methods checking whether to trigger garbage collection
252
253
    /**
254
     * Hooks into TCE main and tracks page move commands.
255
     *
256
     * @param string $command The command.
257
     * @param string $table The table the record belongs to
258
     * @param int $uid The record's uid
259
     * @param string $value Not used
260
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
261
     */
262 2
    public function processCmdmap_postProcess(
263
        $command,
264
        $table,
265
        $uid,
266
        /** @noinspection PhpUnusedParameterInspection */
267
        $value,
0 ignored issues
show
Unused Code introduced by
The parameter $value is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
268
        /** @noinspection PhpUnusedParameterInspection */
269
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
270
    ) {
271
        // workspaces: collect garbage only for LIVE workspace
272 2
        if ($command == 'move' && $table == 'pages' && $GLOBALS['BE_USER']->workspace == 0) {
273
            // TODO the below comment is not valid anymore, pid has been removed from doc ID
274
            // ...still needed?
275
276
            // must be removed from index since the pid changes and
277
            // is part of the Solr document ID
278
            $this->collectGarbage($table, $uid);
279
280
            // now re-index with new properties
281
            $this->getIndexQueue()->updateItem($table, $uid);
282
        }
283 2
    }
284
285
    /**
286
     * Hooks into TCE main and tracks changed records. In this case the current
287
     * record's values are stored to do a change comparison later on for fields
288
     * like fe_group.
289
     *
290
     * @param array $incomingFields An array of incoming fields, new or changed, not used
291
     * @param string $table The table the record belongs to
292
     * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
293
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
294
     */
295 5
    public function processDatamap_preProcessFieldArray(
296
        /** @noinspection PhpUnusedParameterInspection */
297
        $incomingFields,
0 ignored issues
show
Unused Code introduced by
The parameter $incomingFields is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
298
        $table,
299
        $uid,
300
        /** @noinspection PhpUnusedParameterInspection */
301
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
302
    ) {
303 5
        if (!is_int($uid)) {
304
            // a newly created record, skip
305
            return;
306
        }
307
308 5
        if (Util::isDraftRecord($table, $uid)) {
309
            // skip workspaces: collect garbage only for LIVE workspace
310
            return;
311
        }
312
313 5
        $hasConfiguredEnableColumnForFeGroup = $this->tcaService->isEnableColumn($table, 'fe_group');
314
315 5
        if ($hasConfiguredEnableColumnForFeGroup) {
316 5
            $visibilityAffectingFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
317 5
            $record = (array)BackendUtility::getRecord(
318 5
                $table,
319 5
                $uid,
320 5
                $visibilityAffectingFields,
321 5
                '',
322
                false
323 5
            );
324
325
            // If no record could be found skip further processing
326 5
            if (empty($record)) {
327
                return;
328
            }
329
330 5
            $record = $this->tcaService->normalizeFrontendGroupField($table, $record);
331
332
            // keep previous state of important fields for later comparison
333 5
            $this->trackedRecords[$table][$uid] = $record;
334 5
        }
335 5
    }
336
337
    /**
338
     * Hooks into TCE Main and watches all record updates. If a change is
339
     * detected that would remove the record from the website, we try to find
340
     * related documents and remove them from the index.
341
     *
342
     * @param string $status Status of the current operation, 'new' or 'update'
343
     * @param string $table The table the record belongs to
344
     * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
345
     * @param array $fields The record's data, not used
346
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
347
     */
348 7
    public function processDatamap_afterDatabaseOperations(
349
        $status,
350
        $table,
351
        $uid,
352
        array $fields,
353
        /** @noinspection PhpUnusedParameterInspection */
354
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
355
    ) {
356 7
        if ($status == 'new') {
357
            // a newly created record, skip
358
            return;
359
        }
360
361 7
        if (Util::isDraftRecord($table, $uid)) {
362
            // skip workspaces: collect garbage only for LIVE workspace
363
            return;
364
        }
365
366 7
        $garbageCollectionRelevantFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
367
368 7
        $record = (array)BackendUtility::getRecord($table, $uid, $garbageCollectionRelevantFields, '', false);
369
370
        // If no record could be found skip further processing
371 7
        if (empty($record)) {
372
            return;
373
        }
374
375 7
        $record = $this->tcaService->normalizeFrontendGroupField($table, $record);
376
377 7
        if ($this->tcaService->isHidden($table, $record)
378 3
            || $this->isInvisibleByStartOrEndtime($table, $record)
379 3
            || $this->hasFrontendGroupsRemoved($table, $record)
380 1
            || ($table == 'pages' && $this->isPageExcludedFromSearch($record))
381 1
            || ($table == 'pages' && !$this->isIndexablePageType($record))
382 7
        ) {
383 6
            $this->collectGarbage($table, $uid);
384
385 6
            if ($table == 'pages') {
386 3
                $this->deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $fields);
387 3
            }
388 6
        }
389 7
    }
390
391
    /**
392
     * Check if a record is getting invisible due to changes in start or endtime. In addition it is checked that the related
393
     * queue item was marked as indexed.
394
     *
395
     * @param string $table
396
     * @param array $record
397
     * @return bool
398
     */
399 3
    protected function isInvisibleByStartOrEndtime($table, $record)
400
    {
401
        return (
402 3
            ($this->tcaService->isStartTimeInFuture($table, $record) || $this->tcaService->isEndTimeInPast($table, $record)) &&
403 2
            $this->isRelatedQueueRecordMarkedAsIndexed($table, $record)
404 3
        );
405
    }
406
407
    /**
408
     * Checks if the related index queue item is indexed.
409
     *
410
     * * For tt_content and pages_language_overlay the page from the pid is checked
411
     * * For all other records the table it's self is checked
412
     *
413
     * @param string $table The table name.
414
     * @param array $record An array with record fields that may affect visibility.
415
     * @return bool True if the record is marked as being indexed
416
     */
417 2
    protected function isRelatedQueueRecordMarkedAsIndexed($table, $record)
418
    {
419 2
        if ($table == 'tt_content' || $table == 'pages_language_overlay') {
420 2
            $table = 'pages';
421 2
            $uid = $record['pid'];
422 2
        } else {
423
            $uid = $record['uid'];
424
        }
425
426 2
        return $this->getIndexQueue()->containsIndexedItem($table, $uid);
427
    }
428
429
    /**
430
     * @return Queue
431
     */
432 10
    private function getIndexQueue()
433
    {
434 10
        return GeneralUtility::makeInstance(Queue::class);
435
    }
436
437
    /**
438
     * Checks whether the a frontend group field exists for the record and if so
439
     * whether groups have been removed from accessing the record thus making
440
     * the record invisible to at least some people.
441
     *
442
     * @param string $table The table name.
443
     * @param array $record An array with record fields that may affect visibility.
444
     * @return bool TRUE if frontend groups have been removed from access to the record, FALSE otherwise.
445
     */
446 1
    protected function hasFrontendGroupsRemoved($table, $record)
447
    {
448 1
        $frontendGroupsRemoved = false;
449
450 1
        if (isset($GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'])) {
451 1
            $frontendGroupsField = $GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'];
452
453 1
            $previousGroups = explode(',',
454 1
                (string)$this->trackedRecords[$table][$record['uid']][$frontendGroupsField]);
455 1
            $currentGroups = explode(',',
456 1
                (string)$record[$frontendGroupsField]);
457
458 1
            $removedGroups = array_diff($previousGroups, $currentGroups);
459
460 1
            $frontendGroupsRemoved = (boolean)count($removedGroups);
461 1
        }
462
463 1
        return $frontendGroupsRemoved;
464
    }
465
466
    /**
467
     * Checks whether the page has been excluded from searching.
468
     *
469
     * @param array $record An array with record fields that may affect visibility.
470
     * @return bool True if the page has been excluded from searching, FALSE otherwise
471
     */
472
    protected function isPageExcludedFromSearch($record)
473
    {
474
        return (boolean)$record['no_search'];
475
    }
476
477
    /**
478
     * Checks whether a page has a page type that can be indexed.
479
     * Currently standard pages and mount pages can be indexed.
480
     *
481
     * @param array $record A page record
482
     * @return bool TRUE if the page can be indexed according to its page type, FALSE otherwise
483
     */
484
    protected function isIndexablePageType(array $record)
485
    {
486
        return Util::isAllowedPageType($record);
487
    }
488
489
    /**
490
     * Cleans an index from garbage entries.
491
     *
492
     * Was used to clean the index from expired documents/past endtime. Solr 4.8
493
     * introduced DocExpirationUpdateProcessor to do that job by itself.
494
     *
495
     * The method remains as a dummy for possible later cleanups and to prevent
496
     * things from breaking if others were using it.
497
     *
498
     * @deprecated since 6.0 will be removed in 7.0. deletion is done by DocExpirationUpdateProcessor
499
     * @param Site $site The site to clean indexes on
500
     * @param bool $commitAfterCleanUp Whether to commit right after the clean up, defaults to TRUE
501
     * @return void
502
     */
503
    public function cleanIndex(Site $site, $commitAfterCleanUp = true)
0 ignored issues
show
Unused Code introduced by
The parameter $site is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $commitAfterCleanUp is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
504
    {
505
        GeneralUtility::logDeprecatedFunction();
506
    }
507
}
508