Passed
Push — master ( 48fd54...5ddb8e )
by
unknown
17:47
created

LinkAnalyzer   F

Complexity

Total Complexity 64

Size/Duplication

Total Lines 439
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 64
eloc 188
dl 0
loc 439
rs 3.28
c 1
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
B analyzeTypoLinks() 0 47 11
A __construct() 0 5 1
A init() 0 12 3
B getLinkStatistics() 0 55 11
A getLanguageService() 0 3 1
A getLinkCounts() 0 3 1
B recheckLinks() 0 48 8
A analyzeLinks() 0 24 5
C analyzeRecord() 0 51 12
B checkLinks() 0 46 11

How to fix   Complexity   

Complex Class

Complex classes like LinkAnalyzer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use LinkAnalyzer, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/*
4
 * This file is part of the TYPO3 CMS project.
5
 *
6
 * It is free software; you can redistribute it and/or modify it under
7
 * the terms of the GNU General Public License, either version 2
8
 * of the License, or any later version.
9
 *
10
 * For the full copyright and license information, please read the
11
 * LICENSE.txt file that was distributed with this source code.
12
 *
13
 * The TYPO3 project - inspiring people to share!
14
 */
15
16
namespace TYPO3\CMS\Linkvalidator;
17
18
use Psr\EventDispatcher\EventDispatcherInterface;
19
use TYPO3\CMS\Backend\Utility\BackendUtility;
20
use TYPO3\CMS\Core\Database\Connection;
21
use TYPO3\CMS\Core\Database\ConnectionPool;
22
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
23
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
24
use TYPO3\CMS\Core\Html\HtmlParser;
25
use TYPO3\CMS\Core\Localization\LanguageService;
26
use TYPO3\CMS\Core\Utility\GeneralUtility;
27
use TYPO3\CMS\Linkvalidator\Event\BeforeRecordIsAnalyzedEvent;
28
use TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype;
29
use TYPO3\CMS\Linkvalidator\Repository\BrokenLinkRepository;
30
31
/**
32
 * This class provides Processing plugin implementation
33
 * @internal
34
 */
35
class LinkAnalyzer
36
{
37
38
    /**
39
     * Array of tables and fields to search for broken links
40
     *
41
     * @var array<string, array<int, string>>
42
     */
43
    protected $searchFields = [];
44
45
    /**
46
     * List of page uids (rootline downwards)
47
     *
48
     * @var array
49
     */
50
    protected $pids = [];
51
52
    /**
53
     * Array of tables and the number of external links they contain
54
     *
55
     * @var array
56
     */
57
    protected $linkCounts = [];
58
59
    /**
60
     * Array of tables and the number of broken external links they contain
61
     *
62
     * @var array
63
     */
64
    protected $brokenLinkCounts = [];
65
66
    /**
67
     * Array for hooks for own checks
68
     *
69
     * @var Linktype\AbstractLinktype[]
70
     */
71
    protected $hookObjectsArr = [];
72
73
    /**
74
     * The currently active TSconfig. Will be passed to the init function.
75
     *
76
     * @var array
77
     */
78
    protected $tsConfig = [];
79
80
    /**
81
     * @var EventDispatcherInterface
82
     */
83
    protected $eventDispatcher;
84
85
    /**
86
     * @var BrokenLinkRepository
87
     */
88
    protected $brokenLinkRepository;
89
90
    public function __construct(EventDispatcherInterface $eventDispatcher, BrokenLinkRepository $brokenLinkRepository)
91
    {
92
        $this->eventDispatcher = $eventDispatcher;
93
        $this->brokenLinkRepository = $brokenLinkRepository;
94
        $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
95
    }
96
97
    /**
98
     * Store all the needed configuration values in class variables
99
     *
100
     * @param array $searchFields List of fields in which to search for links
101
     * @param string|array $pidList List of comma separated page uids in which to search for links, can be an array too
102
     * @param array $tsConfig The currently active TSconfig.
103
     */
104
    public function init(array $searchFields, $pidList, $tsConfig)
105
    {
106
        $this->searchFields = $searchFields;
107
        $this->pids = is_array($pidList) ? $pidList : GeneralUtility::intExplode(',', $pidList, true);
108
        $this->tsConfig = $tsConfig;
109
110
        // Hook to handle own checks
111
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
112
            $this->hookObjectsArr[$key] = GeneralUtility::makeInstance($className);
113
            $options = $tsConfig['linktypesConfig.'][$key . '.'] ?? [];
114
            // setAdditionalConfig might use global configuration, so still call it, even if options are empty
115
            $this->hookObjectsArr[$key]->setAdditionalConfig($options);
116
        }
117
    }
118
119
    /**
120
     * Find all supported broken links and store them in tx_linkvalidator_link
121
     *
122
     * @param array<int,string> $linkTypes List of hook object to activate
123
     * @param bool $considerHidden Defines whether to look into hidden fields
124
     */
125
    public function getLinkStatistics(array $linkTypes = [], $considerHidden = false)
126
    {
127
        $results = [];
128
        if (empty($linkTypes) || empty($this->pids)) {
129
            return;
130
        }
131
132
        $this->brokenLinkRepository->removeAllBrokenLinksOfRecordsOnPageIds(
133
            $this->pids,
134
            $linkTypes
135
        );
136
137
        // Traverse all configured tables
138
        foreach ($this->searchFields as $table => $fields) {
139
            // If table is not configured, assume the extension is not installed
140
            // and therefore no need to check it
141
            if (!is_array($GLOBALS['TCA'][$table])) {
142
                continue;
143
            }
144
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
145
                ->getQueryBuilderForTable($table);
146
147
            if ($considerHidden) {
148
                $queryBuilder->getRestrictions()
149
                    ->removeAll()
150
                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
151
            }
152
153
            // Re-init selectFields for table
154
            $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
155
            if ($GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false) {
156
                $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['languageField'];
157
            }
158
            if ($GLOBALS['TCA'][$table]['ctrl']['type'] ?? false) {
159
                if (isset($GLOBALS['TCA'][$table]['columns'][$GLOBALS['TCA'][$table]['ctrl']['type']])) {
160
                    $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['type'];
161
                }
162
            }
163
164
            $result = $queryBuilder->select(...$selectFields)
165
                ->from($table)
166
                ->where(
167
                    $queryBuilder->expr()->in(
168
                        ($table === 'pages' ? 'uid' : 'pid'),
169
                        $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
170
                    )
171
                )
172
                ->execute();
173
174
            // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
175
            while ($row = $result->fetch()) {
176
                $this->analyzeRecord($results, $table, $fields, $row);
177
            }
178
        }
179
        $this->checkLinks($results, $linkTypes);
180
    }
181
182
    /**
183
     * @param array $links
184
     * @param array<int,string> $linkTypes
185
     */
186
    protected function checkLinks(array $links, array $linkTypes)
187
    {
188
        foreach ($this->hookObjectsArr as $key => $hookObj) {
189
            if (!is_array($links[$key]) || (!in_array($key, $linkTypes, true))) {
190
                continue;
191
            }
192
193
            //  Check them
194
            foreach ($links[$key] as $entryKey => $entryValue) {
195
                $table = $entryValue['table'];
196
                $record = [];
197
                $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
198
                $record['record_pid'] = $entryValue['row']['pid'];
199
                $record['record_uid'] = $entryValue['uid'];
200
                $record['table_name'] = $table;
201
                $record['link_type'] = $key;
202
                $record['link_title'] = $entryValue['link_title'];
203
                $record['field'] = $entryValue['field'];
204
                $record['last_check'] = time();
205
                $typeField = $GLOBALS['TCA'][$table]['ctrl']['type'] ?? false;
206
                if ($entryValue['row'][$typeField] ?? false) {
207
                    $record['element_type'] = $entryValue['row'][$typeField];
208
                }
209
                $languageField = $GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false;
210
                if ($languageField && isset($entryValue['row'][$languageField])) {
211
                    $record['language'] = $entryValue['row'][$languageField];
212
                } else {
213
                    $record['language'] = -1;
214
                }
215
                if (!empty($entryValue['pageAndAnchor'] ?? '')) {
216
                    // Page with anchor, e.g. 18#1580
217
                    $url = $entryValue['pageAndAnchor'];
218
                } else {
219
                    $url = $entryValue['substr']['tokenValue'];
220
                }
221
                $record['url'] = $url;
222
                $this->linkCounts[$table]++;
223
                $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
224
225
                // Broken link found
226
                if (!$checkUrl) {
227
                    $this->brokenLinkRepository->addBrokenLink($record, false, $hookObj->getErrorParams());
228
                    $this->brokenLinkCounts[$table]++;
229
                } elseif (GeneralUtility::_GP('showalllinks')) {
230
                    $this->brokenLinkRepository->addBrokenLink($record, true);
231
                    $this->brokenLinkCounts[$table]++;
232
                }
233
            }
234
        }
235
    }
236
237
    /**
238
     * Recheck for broken links for one field in table for record.
239
     *
240
     * @param array $checkOptions
241
     * @param string $recordUid uid of record to check
242
     * @param string $table
243
     * @param string $field
244
     * @param int $timestamp - only recheck if timestamp changed
245
     * @param bool $considerHidden
246
     */
247
    public function recheckLinks(
248
        array $checkOptions,
249
        string $recordUid,
250
        string $table,
251
        string $field,
252
        int $timestamp,
253
        bool $considerHidden = true
254
    ): void {
255
        // If table is not configured, assume the extension is not installed
256
        // and therefore no need to check it
257
        if (!is_array($GLOBALS['TCA'][$table])) {
258
            return;
259
        }
260
261
        // get all links for $record / $table / $field combination
262
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
263
            ->getQueryBuilderForTable($table);
264
        if ($considerHidden) {
265
            $queryBuilder->getRestrictions()->removeByType(HiddenRestriction::class);
266
        }
267
268
        $row = $queryBuilder->select('uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label'], $field, 'tstamp')
269
            ->from($table)
270
            ->where(
271
                $queryBuilder->expr()->eq(
272
                    'uid',
273
                    $queryBuilder->createNamedParameter($recordUid, Connection::PARAM_INT)
274
                )
275
            )
276
            ->execute()
277
            ->fetch();
278
279
        if (!$row) {
280
            // missing record: remove existing links
281
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
0 ignored issues
show
Bug introduced by
$recordUid of type string is incompatible with the type integer expected by parameter $recordUid of TYPO3\CMS\Linkvalidator\...eBrokenLinksForRecord(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

281
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, /** @scrutinizer ignore-type */ $recordUid);
Loading history...
282
            return;
283
        }
284
        if (($row['tstamp'] ?? 0) && $timestamp && ((int)($row['tstamp']) < $timestamp)) {
285
            // timestamp has not changed: no need to recheck
286
            return;
287
        }
288
        $resultsLinks = [];
289
        $this->analyzeRecord($resultsLinks, $table, [$field], $row);
290
        if ($resultsLinks) {
291
            // remove existing broken links from table
292
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
293
            // find all broken links for list of links
294
            $this->checkLinks($resultsLinks, $checkOptions);
295
        }
296
    }
297
298
    /**
299
     * Find all supported broken links for a specific record
300
     *
301
     * @param array $results Array of broken links
302
     * @param string $table Table name of the record
303
     * @param array $fields Array of fields to analyze
304
     * @param array $record Record to analyze
305
     */
306
    public function analyzeRecord(array &$results, $table, array $fields, array $record)
307
    {
308
        $event = new BeforeRecordIsAnalyzedEvent($table, $record, $fields, $this, $results);
309
        $this->eventDispatcher->dispatch($event);
310
        $results = $event->getResults();
311
        $record = $event->getRecord();
312
313
        // Put together content of all relevant fields
314
        $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
315
        $idRecord = $record['uid'];
316
        // Get all references
317
        foreach ($fields as $field) {
318
            $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
319
            $valueField = $record[$field];
320
321
            // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
322
            if (!$conf['softref'] || (string)$valueField === '') {
323
                continue;
324
            }
325
326
            // Explode the list of soft references/parameters
327
            $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
328
            if ($softRefs === false) {
329
                continue;
330
            }
331
332
            // Traverse soft references
333
            foreach ($softRefs as $spKey => $spParams) {
334
                /** @var \TYPO3\CMS\Core\Database\SoftReferenceIndex $softRefObj */
335
                $softRefObj = BackendUtility::softRefParserObj($spKey);
336
337
                // If there is an object returned...
338
                if (!is_object($softRefObj)) {
339
                    continue;
340
                }
341
                $softRefParams = $spParams;
342
                if (!is_array($softRefParams)) {
343
                    // set subst such that findRef will return substitutes for urls, emails etc
344
                    $softRefParams = ['subst' => true];
345
                }
346
347
                // Do processing
348
                $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $softRefParams);
349
                if (!is_array($resultArray) || !isset($resultArray['elements']) || !is_array($resultArray['elements'])) {
350
                    continue;
351
                }
352
353
                if ($spKey === 'typolink_tag') {
354
                    $this->analyzeTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
355
                } else {
356
                    $this->analyzeLinks($resultArray, $results, $record, $field, $table);
357
                }
358
            }
359
        }
360
    }
361
362
    /**
363
     * Find all supported broken links for a specific link list
364
     *
365
     * @param array $resultArray findRef parsed records
366
     * @param array $results Array of broken links
367
     * @param array $record UID of the current record
368
     * @param string $field The current field
369
     * @param string $table The current table
370
     */
371
    protected function analyzeLinks(array $resultArray, array &$results, array $record, $field, $table)
372
    {
373
        foreach ($resultArray['elements'] as $element) {
374
            $r = $element['subst'];
375
            $type = '';
376
            $idRecord = $record['uid'];
377
            if (empty($r)) {
378
                continue;
379
            }
380
381
            /** @var AbstractLinktype $hookObj */
382
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
383
                $type = $hookObj->fetchType($r, $type, $keyArr);
384
                // Store the type that was found
385
                // This prevents overriding by internal validator
386
                if (!empty($type)) {
387
                    $r['type'] = $type;
388
                }
389
            }
390
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
391
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
392
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
393
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
394
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
395
        }
396
    }
397
398
    /**
399
     * Find all supported broken links for a specific typoLink
400
     *
401
     * @param array $resultArray findRef parsed records
402
     * @param array $results Array of broken links
403
     * @param HtmlParser $htmlParser Instance of html parser
404
     * @param array $record The current record
405
     * @param string $field The current field
406
     * @param string $table The current table
407
     */
408
    protected function analyzeTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
409
    {
410
        $currentR = [];
411
        $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
412
        $idRecord = $record['uid'];
413
        $type = '';
414
        $title = '';
415
        $countLinkTags = count($linkTags);
416
        for ($i = 1; $i < $countLinkTags; $i += 2) {
417
            $referencedRecordType = '';
418
            foreach ($resultArray['elements'] as $element) {
419
                $type = '';
420
                $r = $element['subst'];
421
                if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
422
                    continue;
423
                }
424
425
                // Type of referenced record
426
                if (strpos($r['recordRef'], 'pages') !== false) {
427
                    $currentR = $r;
428
                    // Contains number of the page
429
                    $referencedRecordType = $r['tokenValue'];
430
                    $wasPage = true;
431
                } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
432
                    $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
433
                    $wasPage = false;
434
                } else {
435
                    $currentR = $r;
436
                }
437
                $title = strip_tags($linkTags[$i]);
438
            }
439
            /** @var AbstractLinktype $hookObj */
440
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
441
                $type = $hookObj->fetchType($currentR, $type, $keyArr);
442
                // Store the type that was found
443
                // This prevents overriding by internal validator
444
                if (!empty($type)) {
445
                    $currentR['type'] = $type;
446
                }
447
            }
448
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
449
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
450
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
451
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
452
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
453
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
454
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
455
        }
456
    }
457
458
    /**
459
     * Fill a marker array with the number of links found in a list of pages
460
     *
461
     * @return array array with the number of links found
462
     */
463
    public function getLinkCounts()
464
    {
465
        return $this->brokenLinkRepository->getNumberOfBrokenLinksForRecordsOnPages($this->pids, $this->searchFields);
466
    }
467
468
    /**
469
     * @return LanguageService
470
     */
471
    protected function getLanguageService()
472
    {
473
        return $GLOBALS['LANG'];
474
    }
475
}
476