Passed
Push — master ( 49b5a4...bb9b14 )
by
unknown
16:01
created

LinkAnalyzer::checkLinks()   C

Complexity

Conditions 13
Paths 3

Size

Total Lines 55
Code Lines 39

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 39
c 0
b 0
f 0
dl 0
loc 55
rs 6.6166
cc 13
nc 3
nop 2

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/*
4
 * This file is part of the TYPO3 CMS project.
5
 *
6
 * It is free software; you can redistribute it and/or modify it under
7
 * the terms of the GNU General Public License, either version 2
8
 * of the License, or any later version.
9
 *
10
 * For the full copyright and license information, please read the
11
 * LICENSE.txt file that was distributed with this source code.
12
 *
13
 * The TYPO3 project - inspiring people to share!
14
 */
15
16
namespace TYPO3\CMS\Linkvalidator;
17
18
use Psr\EventDispatcher\EventDispatcherInterface;
19
use TYPO3\CMS\Backend\Utility\BackendUtility;
20
use TYPO3\CMS\Core\Database\Connection;
21
use TYPO3\CMS\Core\Database\ConnectionPool;
22
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
23
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
24
use TYPO3\CMS\Core\Html\HtmlParser;
25
use TYPO3\CMS\Core\Localization\LanguageService;
26
use TYPO3\CMS\Core\Utility\GeneralUtility;
27
use TYPO3\CMS\Linkvalidator\Event\BeforeRecordIsAnalyzedEvent;
28
use TYPO3\CMS\Linkvalidator\Linktype\LinktypeInterface;
29
use TYPO3\CMS\Linkvalidator\Repository\BrokenLinkRepository;
30
31
/**
32
 * This class provides Processing plugin implementation
33
 * @internal
34
 */
35
class LinkAnalyzer
36
{
37
38
    /**
39
     * Array of tables and fields to search for broken links
40
     *
41
     * @var array<string, array<int, string>>
42
     */
43
    protected $searchFields = [];
44
45
    /**
46
     * List of page uids (rootline downwards)
47
     *
48
     * @var int[]
49
     */
50
    protected $pids = [];
51
52
    /**
53
     * Array of tables and the number of external links they contain
54
     *
55
     * @var array
56
     */
57
    protected $linkCounts = [];
58
59
    /**
60
     * Array of tables and the number of broken external links they contain
61
     *
62
     * @var array
63
     */
64
    protected $brokenLinkCounts = [];
65
66
    /**
67
     * Array for hooks for own checks
68
     *
69
     * @var LinktypeInterface[]
70
     */
71
    protected $hookObjectsArr = [];
72
73
    /**
74
     * The currently active TSconfig. Will be passed to the init function.
75
     *
76
     * @var array
77
     */
78
    protected $tsConfig = [];
79
80
    /**
81
     * @var EventDispatcherInterface
82
     */
83
    protected $eventDispatcher;
84
85
    /**
86
     * @var BrokenLinkRepository
87
     */
88
    protected $brokenLinkRepository;
89
90
    public function __construct(EventDispatcherInterface $eventDispatcher, BrokenLinkRepository $brokenLinkRepository)
91
    {
92
        $this->eventDispatcher = $eventDispatcher;
93
        $this->brokenLinkRepository = $brokenLinkRepository;
94
        $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
95
    }
96
97
    /**
98
     * Store all the needed configuration values in class variables
99
     *
100
     * @param array $searchFields List of fields in which to search for links
101
     * @param int[] $pidList List of page uids in which to search for links
102
     * @param array $tsConfig The currently active TSconfig.
103
     */
104
    public function init(array $searchFields, array $pidList, $tsConfig)
105
    {
106
        $this->searchFields = $searchFields;
107
        $this->pids = $pidList;
108
        $this->tsConfig = $tsConfig;
109
110
        // Hook to handle own checks
111
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
112
            $hookObject = GeneralUtility::makeInstance($className);
113
            if (!$hookObject instanceof LinktypeInterface) {
114
                continue;
115
            }
116
            $this->hookObjectsArr[$key] = $hookObject;
117
            $options = $tsConfig['linktypesConfig.'][$key . '.'] ?? [];
118
            // setAdditionalConfig might use global configuration, so still call it, even if options are empty
119
            $this->hookObjectsArr[$key]->setAdditionalConfig($options);
120
        }
121
    }
122
123
    /**
124
     * Find all supported broken links and store them in tx_linkvalidator_link
125
     *
126
     * @param array<int,string> $linkTypes List of hook object to activate
127
     * @param bool $considerHidden Defines whether to look into hidden fields
128
     */
129
    public function getLinkStatistics(array $linkTypes = [], $considerHidden = false)
130
    {
131
        $results = [];
132
        if (empty($linkTypes) || empty($this->pids)) {
133
            return;
134
        }
135
136
        $this->brokenLinkRepository->removeAllBrokenLinksOfRecordsOnPageIds(
137
            $this->pids,
138
            $linkTypes
139
        );
140
141
        // Traverse all configured tables
142
        foreach ($this->searchFields as $table => $fields) {
143
            // If table is not configured, assume the extension is not installed
144
            // and therefore no need to check it
145
            if (!is_array($GLOBALS['TCA'][$table])) {
146
                continue;
147
            }
148
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
149
                ->getQueryBuilderForTable($table);
150
151
            if ($considerHidden) {
152
                $queryBuilder->getRestrictions()
153
                    ->removeAll()
154
                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
155
            }
156
157
            // Re-init selectFields for table
158
            $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
159
            if ($GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false) {
160
                $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['languageField'];
161
            }
162
            if ($GLOBALS['TCA'][$table]['ctrl']['type'] ?? false) {
163
                if (isset($GLOBALS['TCA'][$table]['columns'][$GLOBALS['TCA'][$table]['ctrl']['type']])) {
164
                    $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['type'];
165
                }
166
            }
167
168
            $result = $queryBuilder->select(...$selectFields)
169
                ->from($table)
170
                ->where(
171
                    $queryBuilder->expr()->in(
172
                        ($table === 'pages' ? 'uid' : 'pid'),
173
                        $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
174
                    )
175
                )
176
                ->execute();
177
178
            // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
179
            while ($row = $result->fetch()) {
180
                $this->analyzeRecord($results, $table, $fields, $row);
181
            }
182
        }
183
        $this->checkLinks($results, $linkTypes);
184
    }
185
186
    /**
187
     * @param array $links
188
     * @param array<int,string> $linkTypes
189
     */
190
    protected function checkLinks(array $links, array $linkTypes)
191
    {
192
        foreach ($this->hookObjectsArr as $key => $hookObj) {
193
            if (!is_array($links[$key] ?? false) || (!in_array($key, $linkTypes, true))) {
194
                continue;
195
            }
196
197
            //  Check them
198
            foreach ($links[$key] as $entryKey => $entryValue) {
199
                $table = $entryValue['table'];
200
                $record = [];
201
                $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
202
                $record['record_pid'] = $entryValue['row']['pid'];
203
                $record['record_uid'] = $entryValue['uid'];
204
                $record['table_name'] = $table;
205
                $record['link_type'] = $key;
206
                $record['link_title'] = $entryValue['link_title'] ?? '';
207
                $record['field'] = $entryValue['field'];
208
                $record['last_check'] = time();
209
                $typeField = $GLOBALS['TCA'][$table]['ctrl']['type'] ?? false;
210
                if ($entryValue['row'][$typeField] ?? false) {
211
                    $record['element_type'] = $entryValue['row'][$typeField];
212
                }
213
                $languageField = $GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false;
214
                if ($languageField && isset($entryValue['row'][$languageField])) {
215
                    $record['language'] = $entryValue['row'][$languageField];
216
                } else {
217
                    $record['language'] = -1;
218
                }
219
                if (!empty($entryValue['pageAndAnchor'] ?? '')) {
220
                    // Page with anchor, e.g. 18#1580
221
                    $url = $entryValue['pageAndAnchor'];
222
                } else {
223
                    $url = $entryValue['substr']['tokenValue'];
224
                }
225
                $record['url'] = $url;
226
227
                if (!($this->linkCounts[$table] ?? false)) {
228
                    $this->linkCounts[$table] = 0;
229
                }
230
231
                if (!($this->brokenLinkCounts[$table] ?? false)) {
232
                    $this->brokenLinkCounts[$table] = 0;
233
                }
234
235
                $this->linkCounts[$table]++;
236
                $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
237
238
                // Broken link found
239
                if (!$checkUrl) {
240
                    $this->brokenLinkRepository->addBrokenLink($record, false, $hookObj->getErrorParams());
241
                    $this->brokenLinkCounts[$table]++;
242
                } elseif (GeneralUtility::_GP('showalllinks')) {
243
                    $this->brokenLinkRepository->addBrokenLink($record, true);
244
                    $this->brokenLinkCounts[$table]++;
245
                }
246
            }
247
        }
248
    }
249
250
    /**
251
     * Recheck for broken links for one field in table for record.
252
     *
253
     * @param array $checkOptions
254
     * @param string $recordUid uid of record to check
255
     * @param string $table
256
     * @param string $field
257
     * @param int $timestamp - only recheck if timestamp changed
258
     * @param bool $considerHidden
259
     */
260
    public function recheckLinks(
261
        array $checkOptions,
262
        string $recordUid,
263
        string $table,
264
        string $field,
265
        int $timestamp,
266
        bool $considerHidden = true
267
    ): void {
268
        // If table is not configured, assume the extension is not installed
269
        // and therefore no need to check it
270
        if (!is_array($GLOBALS['TCA'][$table])) {
271
            return;
272
        }
273
274
        // get all links for $record / $table / $field combination
275
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
276
            ->getQueryBuilderForTable($table);
277
        if ($considerHidden) {
278
            $queryBuilder->getRestrictions()->removeByType(HiddenRestriction::class);
279
        }
280
281
        $row = $queryBuilder->select('uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label'], $field, 'tstamp')
282
            ->from($table)
283
            ->where(
284
                $queryBuilder->expr()->eq(
285
                    'uid',
286
                    $queryBuilder->createNamedParameter($recordUid, Connection::PARAM_INT)
287
                )
288
            )
289
            ->execute()
290
            ->fetch();
291
292
        if (!$row) {
293
            // missing record: remove existing links
294
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
0 ignored issues
show
Bug introduced by
$recordUid of type string is incompatible with the type integer expected by parameter $recordUid of TYPO3\CMS\Linkvalidator\...eBrokenLinksForRecord(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

294
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, /** @scrutinizer ignore-type */ $recordUid);
Loading history...
295
            return;
296
        }
297
        if (($row['tstamp'] ?? 0) && $timestamp && ((int)($row['tstamp']) < $timestamp)) {
298
            // timestamp has not changed: no need to recheck
299
            return;
300
        }
301
        $resultsLinks = [];
302
        $this->analyzeRecord($resultsLinks, $table, [$field], $row);
303
        if ($resultsLinks) {
304
            // remove existing broken links from table
305
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
306
            // find all broken links for list of links
307
            $this->checkLinks($resultsLinks, $checkOptions);
308
        }
309
    }
310
311
    /**
312
     * Find all supported broken links for a specific record
313
     *
314
     * @param array $results Array of broken links
315
     * @param string $table Table name of the record
316
     * @param array $fields Array of fields to analyze
317
     * @param array $record Record to analyze
318
     */
319
    public function analyzeRecord(array &$results, $table, array $fields, array $record)
320
    {
321
        $event = new BeforeRecordIsAnalyzedEvent($table, $record, $fields, $this, $results);
322
        $this->eventDispatcher->dispatch($event);
323
        $results = $event->getResults();
324
        $record = $event->getRecord();
325
326
        // Put together content of all relevant fields
327
        $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
328
        $idRecord = $record['uid'];
329
        // Get all references
330
        foreach ($fields as $field) {
331
            $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
332
            $valueField = $record[$field];
333
334
            // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
335
            if (!($conf['softref'] ?? false) || (string)$valueField === '') {
336
                continue;
337
            }
338
339
            // Explode the list of soft references/parameters
340
            $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
341
            if ($softRefs === false) {
342
                continue;
343
            }
344
345
            // Traverse soft references
346
            foreach ($softRefs as $spKey => $spParams) {
347
                /** @var \TYPO3\CMS\Core\Database\SoftReferenceIndex $softRefObj */
348
                $softRefObj = BackendUtility::softRefParserObj($spKey);
349
350
                // If there is an object returned...
351
                if (!is_object($softRefObj)) {
352
                    continue;
353
                }
354
                $softRefParams = $spParams;
355
                if (!is_array($softRefParams)) {
356
                    // set subst such that findRef will return substitutes for urls, emails etc
357
                    $softRefParams = ['subst' => true];
358
                }
359
360
                // Do processing
361
                $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $softRefParams);
362
                if (!is_array($resultArray) || !isset($resultArray['elements']) || !is_array($resultArray['elements'])) {
363
                    continue;
364
                }
365
366
                if ($spKey === 'typolink_tag') {
367
                    $this->analyzeTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
368
                } else {
369
                    $this->analyzeLinks($resultArray, $results, $record, $field, $table);
370
                }
371
            }
372
        }
373
    }
374
375
    /**
376
     * Find all supported broken links for a specific link list
377
     *
378
     * @param array $resultArray findRef parsed records
379
     * @param array $results Array of broken links
380
     * @param array $record UID of the current record
381
     * @param string $field The current field
382
     * @param string $table The current table
383
     */
384
    protected function analyzeLinks(array $resultArray, array &$results, array $record, $field, $table)
385
    {
386
        foreach ($resultArray['elements'] as $element) {
387
            $r = $element['subst'];
388
            $type = '';
389
            $idRecord = $record['uid'];
390
            if (empty($r)) {
391
                continue;
392
            }
393
394
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
395
                $type = $hookObj->fetchType($r, $type, $keyArr);
396
                // Store the type that was found
397
                // This prevents overriding by internal validator
398
                if (!empty($type)) {
399
                    $r['type'] = $type;
400
                }
401
            }
402
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
403
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
404
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
405
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
406
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
407
        }
408
    }
409
410
    /**
411
     * Find all supported broken links for a specific typoLink
412
     *
413
     * @param array $resultArray findRef parsed records
414
     * @param array $results Array of broken links
415
     * @param HtmlParser $htmlParser Instance of html parser
416
     * @param array $record The current record
417
     * @param string $field The current field
418
     * @param string $table The current table
419
     */
420
    protected function analyzeTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
421
    {
422
        $currentR = [];
423
        $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
424
        $idRecord = $record['uid'];
425
        $type = '';
426
        $title = '';
427
        $countLinkTags = count($linkTags);
428
        for ($i = 1; $i < $countLinkTags; $i += 2) {
429
            $referencedRecordType = '';
430
            foreach ($resultArray['elements'] as $element) {
431
                $type = '';
432
                $r = $element['subst'];
433
                if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
434
                    continue;
435
                }
436
437
                // Type of referenced record
438
                if (strpos($r['recordRef'] ?? '', 'pages') !== false) {
439
                    $currentR = $r;
440
                    // Contains number of the page
441
                    $referencedRecordType = $r['tokenValue'];
442
                    $wasPage = true;
443
                } elseif (strpos($r['recordRef'] ?? '', 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
444
                    $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
445
                    $wasPage = false;
446
                } else {
447
                    $currentR = $r;
448
                }
449
                $title = strip_tags($linkTags[$i]);
450
            }
451
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
452
                $type = $hookObj->fetchType($currentR, $type, $keyArr);
453
                // Store the type that was found
454
                // This prevents overriding by internal validator
455
                if (!empty($type)) {
456
                    $currentR['type'] = $type;
457
                }
458
            }
459
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
460
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
461
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
462
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
463
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
464
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
465
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
466
        }
467
    }
468
469
    /**
470
     * Fill a marker array with the number of links found in a list of pages
471
     *
472
     * @return array array with the number of links found
473
     */
474
    public function getLinkCounts()
475
    {
476
        return $this->brokenLinkRepository->getNumberOfBrokenLinksForRecordsOnPages($this->pids, $this->searchFields);
477
    }
478
479
    /**
480
     * @return LanguageService
481
     */
482
    protected function getLanguageService()
483
    {
484
        return $GLOBALS['LANG'];
485
    }
486
}
487