Completed
Branch master (b9fc31)
by Timo
05:19
created

GarbageCollector::__construct()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 2.0625

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 3
cts 4
cp 0.75
rs 10
c 0
b 0
f 0
cc 2
eloc 2
nc 2
nop 1
crap 2.0625
1
<?php
2
namespace ApacheSolrForTypo3\Solr;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2010-2015 Ingo Renner <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\GarbageCollectorPostProcessor;
28
use ApacheSolrForTypo3\Solr\IndexQueue\Queue;
29
use ApacheSolrForTypo3\Solr\System\TCA\TCAService;
30
use TYPO3\CMS\Backend\Utility\BackendUtility;
31
use TYPO3\CMS\Core\DataHandling\DataHandler;
32
use TYPO3\CMS\Core\SingletonInterface;
33
use TYPO3\CMS\Core\Utility\GeneralUtility;
34
35
/**
36
 * Garbage Collector, removes related documents from the index when a record is
37
 * set to hidden, is deleted or is otherwise made invisible to website visitors.
38
 *
39
 * Garbage collection will happen for online/LIVE workspaces only.
40
 *
41
 * @author Ingo Renner <[email protected]>
42
 * @author Timo Schmidt <[email protected]>
43
 */
44
class GarbageCollector extends AbstractDataHandlerListener implements SingletonInterface
45
{
46
    /**
47
     * @var array
48
     */
49
    protected $trackedRecords = [];
50
51
    /**
52
     * @var TCAService
53
     */
54
    protected $tcaService;
55
56
    /**
57
     * GarbageCollector constructor.
58
     * @param TCAService|null $TCAService
59
     */
60 4
    public function __construct(TCAService $TCAService = null)
61
    {
62 4
        $this->tcaService = is_null($TCAService) ? GeneralUtility::makeInstance(TCAService::class) : $TCAService;
63 4
    }
64
65
    /**
66
     * Hooks into TCE main and tracks record deletion commands.
67
     *
68
     * @param string $command The command.
69
     * @param string $table The table the record belongs to
70
     * @param int $uid The record's uid
71
     * @param string $value Not used
72
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
73
     * @return void
74
     */
75
    public function processCmdmap_preProcess(
76
        $command,
77
        $table,
78
        $uid,
79
        $value,
0 ignored issues
show
Unused Code introduced by
The parameter $value is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
80
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
81
    ) {
82
        // workspaces: collect garbage only for LIVE workspace
83
        if ($command == 'delete' && $GLOBALS['BE_USER']->workspace == 0) {
84
            $this->collectGarbage($table, $uid);
85
86
            if ($table == 'pages') {
87
                $this->getIndexQueue()->deleteItem($table, $uid);
88
            }
89
        }
90
    }
91
92
    /**
93
     * Holds the configuration when a recursive page queing should be triggered.
94
     *
95
     * @var array
96
     * @return array
97
     */
98 2
    protected function getUpdateSubPagesRecursiveTriggerConfiguration()
99
    {
100
        return [
101
            // the current page has the field "extendToSubpages" enabled and the field "hidden" was set to 1
102
            'extendToSubpageEnabledAndHiddenFlagWasAdded' => [
103 2
                'currentState' =>  ['extendToSubpages' => '1'],
104 2
                'changeSet' => ['hidden' => '1']
105 2
            ],
106
            // the current page has the field "hidden" enabled and the field "extendToSubpages" was set to 1
107
            'hiddenIsEnabledAndExtendToSubPagesWasAdded' => [
108 2
                'currentState' =>  ['hidden' => '1'],
109 2
                'changeSet' => ['extendToSubpages' => '1']
110 2
            ]
111 2
        ];
112
    }
113
114
    /**
115
     * Tracks down index documents belonging to a particular record or page and
116
     * removes them from the index and the Index Queue.
117
     *
118
     * @param string $table The record's table name.
119
     * @param int $uid The record's uid.
120
     * @throws \UnexpectedValueException if a hook object does not implement interface \ApacheSolrForTypo3\Solr\GarbageCollectorPostProcessor
121
     */
122 4
    public function collectGarbage($table, $uid)
123
    {
124 4
        if ($table == 'tt_content' || $table == 'pages' || $table == 'pages_language_overlay') {
125 4
            $this->collectPageGarbage($table, $uid);
126 4
        } else {
127
            $this->collectRecordGarbage($table, $uid);
128
        }
129
130 4
        if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['postProcessGarbageCollector'])) {
131
            foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['solr']['postProcessGarbageCollector'] as $classReference) {
132
                $garbageCollectorPostProcessor = GeneralUtility::getUserObj($classReference);
133
134
                if ($garbageCollectorPostProcessor instanceof GarbageCollectorPostProcessor) {
135
                    $garbageCollectorPostProcessor->postProcessGarbageCollector($table,
136
                        $uid);
137
                } else {
138
                    throw new \UnexpectedValueException(
139
                        get_class($garbageCollectorPostProcessor) . ' must implement interface ' . GarbageCollectorPostProcessor::class,
140
                        1345807460
141
                    );
142
                }
143
            }
144
        }
145 4
    }
146
147
    /**
148
     * Tracks down index documents belonging to a particular page and
149
     * removes them from the index and the Index Queue.
150
     *
151
     * @param string $table The record's table name.
152
     * @param int $uid The record's uid.
153
     */
154 4
    protected function collectPageGarbage($table, $uid)
155
    {
156
        switch ($table) {
157 4
            case 'tt_content':
158
                $contentElement = BackendUtility::getRecord('tt_content', $uid, 'uid, pid', '', false);
159
160
                $table = 'pages';
161
                $uid = $contentElement['pid'];
162
163
                $this->deleteIndexDocuments($table, $uid);
164
                // only a content element was removed, now update/re-index the page
165
                $this->getIndexQueue()->updateItem($table, $uid);
166
                break;
167 4
            case 'pages_language_overlay':
168
                $pageOverlayRecord = BackendUtility::getRecord('pages_language_overlay', $uid, 'uid, pid', '', false);
169
170
                $table = 'pages';
171
                $uid = $pageOverlayRecord['pid'];
172
173
                $this->deleteIndexDocuments($table, $uid);
174
                // only a page overlay was removed, now update/re-index the page
175
                $this->getIndexQueue()->updateItem($table, $uid);
176
                break;
177 4
            case 'pages':
178
179 4
                $this->deleteIndexDocuments($table, $uid);
180 4
                $this->getIndexQueue()->deleteItem($table, $uid);
181
182 4
                break;
183
        }
184 4
    }
185
186
    /**
187
     * @param string $table
188
     * @param int $uid
189
     * @param array $changedFields
190
     */
191 2
    protected function deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $changedFields)
192
    {
193 2
        if (!$this->isRecursiveUpdateRequired($uid, $changedFields)) {
194
            return;
195
        }
196
197 2
        $indexQueue = $this->getIndexQueue();
198
        // get affected subpages when "extendToSubpages" flag was set
199 2
        $pagesToDelete = $this->getSubPageIds($uid);
200
        // we need to at least remove this page
201 2
        foreach ($pagesToDelete as $pageToDelete) {
202 2
            $this->deleteIndexDocuments($table, $pageToDelete);
203 2
            $indexQueue->deleteItem($table, $pageToDelete);
204 2
        }
205 2
    }
206
207
    /**
208
     * Deletes index documents for a given record identification.
209
     *
210
     * @param string $table The record's table name.
211
     * @param int $uid The record's uid.
212
     */
213 4
    protected function deleteIndexDocuments($table, $uid)
214
    {
215
        /** @var $connectionManager ConnectionManager */
216 4
        $connectionManager = GeneralUtility::makeInstance(ConnectionManager::class);
217
218
        // record can be indexed for multiple sites
219 4
        $indexQueueItems = $this->getIndexQueue()->getItems($table, $uid);
220 4
        foreach ($indexQueueItems as $indexQueueItem) {
221 3
            $site = $indexQueueItem->getSite();
222 3
            $solrConfiguration = $site->getSolrConfiguration();
223 3
            $enableCommitsSetting = $solrConfiguration->getEnableCommits();
224
225
            // a site can have multiple connections (cores / languages)
226 3
            $solrConnections = $connectionManager->getConnectionsBySite($site);
227 3
            foreach ($solrConnections as $solr) {
228 3
                $solr->deleteByQuery('type:' . $table . ' AND uid:' . intval($uid));
229 3
                if ($enableCommitsSetting) {
230
                    $solr->commit(false, false, false);
231
                }
232 3
            }
233 4
        }
234 4
    }
235
236
    /**
237
     * Tracks down index documents belonging to a particular record and
238
     * removes them from the index and the Index Queue.
239
     *
240
     * @param string $table The record's table name.
241
     * @param int $uid The record's uid.
242
     */
243
    protected function collectRecordGarbage($table, $uid)
244
    {
245
        $this->deleteIndexDocuments($table, $uid);
246
        $this->getIndexQueue()->deleteItem($table, $uid);
247
    }
248
249
    // methods checking whether to trigger garbage collection
250
251
    /**
252
     * Hooks into TCE main and tracks page move commands.
253
     *
254
     * @param string $command The command.
255
     * @param string $table The table the record belongs to
256
     * @param int $uid The record's uid
257
     * @param string $value Not used
258
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
259
     */
260
    public function processCmdmap_postProcess(
261
        $command,
262
        $table,
263
        $uid,
264
        $value,
0 ignored issues
show
Unused Code introduced by
The parameter $value is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
265
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
266
    ) {
267
        // workspaces: collect garbage only for LIVE workspace
268
        if ($command == 'move' && $table == 'pages' && $GLOBALS['BE_USER']->workspace == 0) {
269
            // TODO the below comment is not valid anymore, pid has been removed from doc ID
270
            // ...still needed?
271
272
            // must be removed from index since the pid changes and
273
            // is part of the Solr document ID
274
            $this->collectGarbage($table, $uid);
275
276
            // now re-index with new properties
277
            $this->getIndexQueue()->updateItem($table, $uid);
278
        }
279
    }
280
281
    /**
282
     * Hooks into TCE main and tracks changed records. In this case the current
283
     * record's values are stored to do a change comparison later on for fields
284
     * like fe_group.
285
     *
286
     * @param array $incomingFields An array of incoming fields, new or changed, not used
287
     * @param string $table The table the record belongs to
288
     * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
289
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
290
     */
291
    public function processDatamap_preProcessFieldArray(
292
        $incomingFields,
0 ignored issues
show
Unused Code introduced by
The parameter $incomingFields is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
293
        $table,
294
        $uid,
295
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
296
    ) {
297
        if (!is_int($uid)) {
298
            // a newly created record, skip
299
            return;
300
        }
301
302
        if (Util::isDraftRecord($table, $uid)) {
303
            // skip workspaces: collect garbage only for LIVE workspace
304
            return;
305
        }
306
307
        $hasConfiguredEnableColumnForFeGroup = $this->tcaService->isEnableColumn($table, 'fe_group');
308
309
        if ($hasConfiguredEnableColumnForFeGroup) {
310
            $visibilityAffectingFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
311
            $record = BackendUtility::getRecord(
312
                $table,
313
                $uid,
314
                $visibilityAffectingFields,
315
                '',
316
                false
317
            );
318
            $record = $this->tcaService->normalizeFrontendGroupField($table, $record);
0 ignored issues
show
Bug introduced by
It seems like $record can also be of type null; however, ApacheSolrForTypo3\Solr\...izeFrontendGroupField() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
319
320
            // keep previous state of important fields for later comparison
321
            $this->trackedRecords[$table][$uid] = $record;
322
        }
323
    }
324
325
    /**
326
     * Hooks into TCE Main and watches all record updates. If a change is
327
     * detected that would remove the record from the website, we try to find
328
     * related documents and remove them from the index.
329
     *
330
     * @param string $status Status of the current operation, 'new' or 'update'
331
     * @param string $table The table the record belongs to
332
     * @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
333
     * @param array $fields The record's data, not used
334
     * @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
335
     */
336 2
    public function processDatamap_afterDatabaseOperations(
337
        $status,
338
        $table,
339
        $uid,
340
        array $fields,
341
        DataHandler $tceMain
0 ignored issues
show
Unused Code introduced by
The parameter $tceMain is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
342
    ) {
343 2
        if ($status == 'new') {
344
            // a newly created record, skip
345
            return;
346
        }
347
348 2
        if (Util::isDraftRecord($table, $uid)) {
349
            // skip workspaces: collect garbage only for LIVE workspace
350
            return;
351
        }
352
353 2
        $garbageCollectionRelevantFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
354
355 2
        $record = BackendUtility::getRecord($table, $uid, $garbageCollectionRelevantFields, '', false);
356 2
        $record = $this->tcaService->normalizeFrontendGroupField($table, $record);
0 ignored issues
show
Bug introduced by
It seems like $record can also be of type null; however, ApacheSolrForTypo3\Solr\...izeFrontendGroupField() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
357
358 2
        if ($this->tcaService->isHidden($table, $record)
359
            || (($this->tcaService->isStartTimeInFuture($table, $record)
360
                    || $this->tcaService->isEndTimeInPast($table, $record))
361
                && $this->isMarkedAsIndexed($table, $record)
362
            )
363
            || $this->hasFrontendGroupsRemoved($table, $record)
364
            || ($table == 'pages' && $this->isPageExcludedFromSearch($record))
365
            || ($table == 'pages' && !$this->isIndexablePageType($record))
366 2
        ) {
367 2
            $this->collectGarbage($table, $uid);
368
369 2
            if ($table == 'pages') {
370 2
                $this->deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $fields);
371 2
            }
372 2
        }
373 2
    }
374
375
    /**
376
     * Checks whether the record is in the Index Queue and whether it has been
377
     * indexed already.
378
     *
379
     * @param string $table The table name.
380
     * @param array $record An array with record fields that may affect visibility.
381
     * @return bool True if the record is marked as being indexed
382
     */
383
    protected function isMarkedAsIndexed($table, $record)
384
    {
385
        return $this->getIndexQueue()->containsIndexedItem($table, $record['uid']);
386
    }
387
388
    /**
389
     * @return Queue
390
     */
391 4
    private function getIndexQueue()
392
    {
393 4
        return GeneralUtility::makeInstance(Queue::class);
394
    }
395
396
    /**
397
     * Checks whether the a frontend group field exists for the record and if so
398
     * whether groups have been removed from accessing the record thus making
399
     * the record invisible to at least some people.
400
     *
401
     * @param string $table The table name.
402
     * @param array $record An array with record fields that may affect visibility.
403
     * @return bool TRUE if frontend groups have been removed from access to the record, FALSE otherwise.
404
     */
405
    protected function hasFrontendGroupsRemoved($table, $record)
406
    {
407
        $frontendGroupsRemoved = false;
408
409
        if (isset($GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'])) {
410
            $frontendGroupsField = $GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'];
411
412
            $previousGroups = explode(',',
413
                (string)$this->trackedRecords[$table][$record['uid']][$frontendGroupsField]);
414
            $currentGroups = explode(',',
415
                (string)$record[$frontendGroupsField]);
416
417
            $removedGroups = array_diff($previousGroups, $currentGroups);
418
419
            $frontendGroupsRemoved = (boolean)count($removedGroups);
420
        }
421
422
        return $frontendGroupsRemoved;
423
    }
424
425
    /**
426
     * Checks whether the page has been excluded from searching.
427
     *
428
     * @param array $record An array with record fields that may affect visibility.
429
     * @return bool True if the page has been excluded from searching, FALSE otherwise
430
     */
431
    protected function isPageExcludedFromSearch($record)
432
    {
433
        return (boolean)$record['no_search'];
434
    }
435
436
    /**
437
     * Checks whether a page has a page type that can be indexed.
438
     * Currently standard pages and mount pages can be indexed.
439
     *
440
     * @param array $record A page record
441
     * @return bool TRUE if the page can be indexed according to its page type, FALSE otherwise
442
     */
443
    protected function isIndexablePageType(array $record)
444
    {
445
        return Util::isAllowedPageType($record);
446
    }
447
448
    /**
449
     * Cleans an index from garbage entries.
450
     *
451
     * Was used to clean the index from expired documents/past endtime. Solr 4.8
452
     * introduced DocExpirationUpdateProcessor to do that job by itself.
453
     *
454
     * The method remains as a dummy for possible later cleanups and to prevent
455
     * things from breaking if others were using it.
456
     *
457
     * @deprecated since 6.0 will be removed in 7.0. deletion is done by DocExpirationUpdateProcessor
458
     * @param Site $site The site to clean indexes on
459
     * @param bool $commitAfterCleanUp Whether to commit right after the clean up, defaults to TRUE
460
     * @return void
461
     */
462
    public function cleanIndex(Site $site, $commitAfterCleanUp = true)
0 ignored issues
show
Unused Code introduced by
The parameter $site is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $commitAfterCleanUp is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
463
    {
464
        GeneralUtility::logDeprecatedFunction();
465
    }
466
}
467