Completed
Push — master ( c58d66...8e87cd )
by
unknown
16:17
created

LinkAnalyzer::getLinkCounts()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the TYPO3 CMS project.
5
 *
6
 * It is free software; you can redistribute it and/or modify it under
7
 * the terms of the GNU General Public License, either version 2
8
 * of the License, or any later version.
9
 *
10
 * For the full copyright and license information, please read the
11
 * LICENSE.txt file that was distributed with this source code.
12
 *
13
 * The TYPO3 project - inspiring people to share!
14
 */
15
16
namespace TYPO3\CMS\Linkvalidator;
17
18
use Psr\EventDispatcher\EventDispatcherInterface;
19
use TYPO3\CMS\Backend\Utility\BackendUtility;
20
use TYPO3\CMS\Core\Database\Connection;
21
use TYPO3\CMS\Core\Database\ConnectionPool;
22
use TYPO3\CMS\Core\Database\Query\QueryHelper;
23
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
25
use TYPO3\CMS\Core\Html\HtmlParser;
26
use TYPO3\CMS\Core\Localization\LanguageService;
27
use TYPO3\CMS\Core\Utility\GeneralUtility;
28
use TYPO3\CMS\Linkvalidator\Event\BeforeRecordIsAnalyzedEvent;
29
use TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype;
30
use TYPO3\CMS\Linkvalidator\Repository\BrokenLinkRepository;
31
32
/**
33
 * This class provides Processing plugin implementation
34
 * @internal
35
 */
36
class LinkAnalyzer
37
{
38
39
    /**
40
     * Array of tables and fields to search for broken links
41
     *
42
     * @var array<string, array<int, string>>
43
     */
44
    protected $searchFields = [];
45
46
    /**
47
     * List of page uids (rootline downwards)
48
     *
49
     * @var array
50
     */
51
    protected $pids = [];
52
53
    /**
54
     * Array of tables and the number of external links they contain
55
     *
56
     * @var array
57
     */
58
    protected $linkCounts = [];
59
60
    /**
61
     * Array of tables and the number of broken external links they contain
62
     *
63
     * @var array
64
     */
65
    protected $brokenLinkCounts = [];
66
67
    /**
68
     * Array for hooks for own checks
69
     *
70
     * @var Linktype\AbstractLinktype[]
71
     */
72
    protected $hookObjectsArr = [];
73
74
    /**
75
     * Reference to the current element with table:uid, e.g. pages:85
76
     *
77
     * @var string
78
     */
79
    protected $recordReference = '';
80
81
    /**
82
     * The currently active TSconfig. Will be passed to the init function.
83
     *
84
     * @var array
85
     */
86
    protected $tsConfig = [];
87
88
    /**
89
     * @var EventDispatcherInterface
90
     */
91
    protected $eventDispatcher;
92
93
    /**
94
     * @var BrokenLinkRepository
95
     */
96
    protected $brokenLinkRepository;
97
98
    public function __construct(EventDispatcherInterface $eventDispatcher, BrokenLinkRepository $brokenLinkRepository)
99
    {
100
        $this->eventDispatcher = $eventDispatcher;
101
        $this->brokenLinkRepository = $brokenLinkRepository;
102
        $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
103
    }
104
105
    /**
106
     * Store all the needed configuration values in class variables
107
     *
108
     * @param array $searchFields List of fields in which to search for links
109
     * @param string|array $pidList List of comma separated page uids in which to search for links, can be an array too
110
     * @param array $tsConfig The currently active TSconfig.
111
     */
112
    public function init(array $searchFields, $pidList, $tsConfig)
113
    {
114
        $this->searchFields = $searchFields;
115
        $this->pids = is_array($pidList) ? $pidList : GeneralUtility::intExplode(',', $pidList, true);
116
        $this->tsConfig = $tsConfig;
117
118
        // Hook to handle own checks
119
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
120
            $this->hookObjectsArr[$key] = GeneralUtility::makeInstance($className);
121
            $options = $tsConfig['linktypesConfig.'][$key . '.'] ?? [];
122
            // setAdditionalConfig might use global configuration, so still call it, even if options are empty
123
            $this->hookObjectsArr[$key]->setAdditionalConfig($options);
124
        }
125
    }
126
127
    /**
128
     * Find all supported broken links and store them in tx_linkvalidator_link
129
     *
130
     * @param array $checkOptions List of hook object to activate
131
     * @param bool $considerHidden Defines whether to look into hidden fields
132
     */
133
    public function getLinkStatistics($checkOptions = [], $considerHidden = false)
134
    {
135
        $results = [];
136
        if (empty($checkOptions) || empty($this->pids)) {
137
            return;
138
        }
139
140
        $this->brokenLinkRepository->removeAllBrokenLinksOfRecordsOnPageIds(
141
            $this->pids,
142
            array_keys($checkOptions)
143
        );
144
145
        // Traverse all configured tables
146
        foreach ($this->searchFields as $table => $fields) {
147
            // If table is not configured, assume the extension is not installed
148
            // and therefore no need to check it
149
            if (!is_array($GLOBALS['TCA'][$table])) {
150
                continue;
151
            }
152
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
153
                ->getQueryBuilderForTable($table);
154
155
            if ($considerHidden) {
156
                $queryBuilder->getRestrictions()
157
                    ->removeAll()
158
                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
159
            }
160
161
            // Re-init selectFields for table
162
            $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
163
            if ($GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false) {
164
                $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['languageField'];
165
            }
166
            if ($GLOBALS['TCA'][$table]['ctrl']['type'] ?? false) {
167
                if (isset($GLOBALS['TCA'][$table]['columns'][$GLOBALS['TCA'][$table]['ctrl']['type']])) {
168
                    $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['type'];
169
                }
170
            }
171
172
            $result = $queryBuilder->select(...$selectFields)
173
                ->from($table)
174
                ->where(
175
                    $queryBuilder->expr()->in(
176
                        ($table === 'pages' ? 'uid' : 'pid'),
177
                        $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
178
                    )
179
                )
180
                ->execute();
181
182
            // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
183
            while ($row = $result->fetch()) {
184
                $this->analyzeRecord($results, $table, $fields, $row);
185
            }
186
        }
187
        $this->checkLinks($results, $checkOptions);
188
    }
189
190
    protected function checkLinks(array $links, array $checkOptions)
191
    {
192
        foreach ($this->hookObjectsArr as $key => $hookObj) {
193
            if (!is_array($links[$key]) || (!empty($checkOptions) && !$checkOptions[$key])) {
194
                continue;
195
            }
196
197
            //  Check them
198
            foreach ($links[$key] as $entryKey => $entryValue) {
199
                $table = $entryValue['table'];
200
                $record = [];
201
                $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
202
                $record['record_pid'] = $entryValue['row']['pid'];
203
                $record['record_uid'] = $entryValue['uid'];
204
                $record['table_name'] = $table;
205
                $record['link_type'] = $key;
206
                $record['link_title'] = $entryValue['link_title'];
207
                $record['field'] = $entryValue['field'];
208
                $record['last_check'] = time();
209
                $typeField = $GLOBALS['TCA'][$table]['ctrl']['type'] ?? false;
210
                if ($entryValue['row'][$typeField] ?? false) {
211
                    $record['element_type'] = $entryValue['row'][$typeField];
212
                }
213
                $languageField = $GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false;
214
                if ($languageField && isset($entryValue['row'][$languageField])) {
215
                    $record['language'] = $entryValue['row'][$languageField];
216
                } else {
217
                    $record['language'] = -1;
218
                }
219
                $this->recordReference = $entryValue['substr']['recordRef'];
220
                if (!empty($entryValue['pageAndAnchor'] ?? '')) {
221
                    // Page with anchor, e.g. 18#1580
222
                    $url = $entryValue['pageAndAnchor'];
223
                } else {
224
                    $url = $entryValue['substr']['tokenValue'];
225
                }
226
                $record['url'] = $url;
227
                $this->linkCounts[$table]++;
228
                $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
229
230
                // Broken link found
231
                if (!$checkUrl) {
232
                    $this->brokenLinkRepository->addBrokenLink($record, false, $hookObj->getErrorParams());
233
                    $this->brokenLinkCounts[$table]++;
234
                } elseif (GeneralUtility::_GP('showalllinks')) {
235
                    $this->brokenLinkRepository->addBrokenLink($record, true);
236
                    $this->brokenLinkCounts[$table]++;
237
                }
238
            }
239
        }
240
    }
241
242
    /**
243
     * Recheck for broken links for one field in table for record.
244
     *
245
     * @param array $checkOptions
246
     * @param string $recordUid uid of record to check
247
     * @param string $table
248
     * @param string $field
249
     * @param int $timestamp - only recheck if timestamp changed
250
     * @param bool $considerHidden
251
     */
252
    public function recheckLinks(
253
        array $checkOptions,
254
        string $recordUid,
255
        string $table,
256
        string $field,
257
        int $timestamp,
258
        bool $considerHidden = true
259
    ): void {
260
        // If table is not configured, assume the extension is not installed
261
        // and therefore no need to check it
262
        if (!is_array($GLOBALS['TCA'][$table])) {
263
            return;
264
        }
265
266
        // get all links for $record / $table / $field combination
267
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
268
            ->getQueryBuilderForTable($table);
269
        if ($considerHidden) {
270
            $queryBuilder->getRestrictions()->removeByType(HiddenRestriction::class);
271
        }
272
273
        $row = $queryBuilder->select('uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label'], $field)
274
            ->from($table)
275
            ->where(
276
                $queryBuilder->expr()->eq(
277
                    'uid',
278
                    $queryBuilder->createNamedParameter($recordUid, Connection::PARAM_INT)
279
                )
280
            )
281
            ->execute()
282
            ->fetch();
283
284
        if (!$row) {
285
            // missing record: remove existing links
286
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
0 ignored issues
show
Bug introduced by
$recordUid of type string is incompatible with the type integer expected by parameter $recordUid of TYPO3\CMS\Linkvalidator\...eBrokenLinksForRecord(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

286
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, /** @scrutinizer ignore-type */ $recordUid);
Loading history...
287
            return;
288
        }
289
        if ($timestamp === (int)$row['timestamp']) {
290
            // timestamp has not changed: no need to recheck
291
            return;
292
        }
293
        $resultsLinks = [];
294
        $this->analyzeRecord($resultsLinks, $table, [$field], $row);
295
        if ($resultsLinks) {
296
            // remove existing broken links from table
297
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
298
            // find all broken links for list of links
299
            $this->checkLinks($resultsLinks, $checkOptions);
300
        }
301
    }
302
303
    /**
304
     * Find all supported broken links for a specific record
305
     *
306
     * @param array $results Array of broken links
307
     * @param string $table Table name of the record
308
     * @param array $fields Array of fields to analyze
309
     * @param array $record Record to analyze
310
     */
311
    public function analyzeRecord(array &$results, $table, array $fields, array $record)
312
    {
313
        $event = new BeforeRecordIsAnalyzedEvent($table, $record, $fields, $this, $results);
314
        $this->eventDispatcher->dispatch($event);
315
        $results = $event->getResults();
316
        $record = $event->getRecord();
317
318
        // Put together content of all relevant fields
319
        $haystack = '';
320
        $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
321
        $idRecord = $record['uid'];
322
        // Get all references
323
        foreach ($fields as $field) {
324
            $haystack .= $record[$field] . ' --- ';
325
            $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
326
            $valueField = $record[$field];
327
328
            // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
329
            if (!$conf['softref'] || (string)$valueField === '') {
330
                continue;
331
            }
332
333
            // Explode the list of soft references/parameters
334
            $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
335
            if ($softRefs === false) {
336
                continue;
337
            }
338
339
            // Traverse soft references
340
            foreach ($softRefs as $spKey => $spParams) {
341
                /** @var \TYPO3\CMS\Core\Database\SoftReferenceIndex $softRefObj */
342
                $softRefObj = BackendUtility::softRefParserObj($spKey);
343
344
                // If there is an object returned...
345
                if (!is_object($softRefObj)) {
346
                    continue;
347
                }
348
                $softRefParams = $spParams;
349
                if (!is_array($softRefParams)) {
350
                    // set subst such that findRef will return substitutes for urls, emails etc
351
                    $softRefParams = ['subst' => true];
352
                }
353
354
                // Do processing
355
                $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $softRefParams);
356
                if (!is_array($resultArray) || !isset($resultArray['elements']) || !is_array($resultArray['elements'])) {
357
                    continue;
358
                }
359
360
                if ($spKey === 'typolink_tag') {
361
                    $this->analyzeTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
362
                } else {
363
                    $this->analyzeLinks($resultArray, $results, $record, $field, $table);
364
                }
365
            }
366
        }
367
    }
368
369
    /**
370
     * Returns the TSconfig that was passed to the init() method.
371
     *
372
     * This can be used by link checkers that get a reference of this
373
     * object passed to the checkLink() method.
374
     *
375
     * @return array
376
     */
377
    public function getTSConfig()
378
    {
379
        return $this->tsConfig;
380
    }
381
382
    /**
383
     * Find all supported broken links for a specific link list
384
     *
385
     * @param array $resultArray findRef parsed records
386
     * @param array $results Array of broken links
387
     * @param array $record UID of the current record
388
     * @param string $field The current field
389
     * @param string $table The current table
390
     */
391
    protected function analyzeLinks(array $resultArray, array &$results, array $record, $field, $table)
392
    {
393
        foreach ($resultArray['elements'] as $element) {
394
            $r = $element['subst'];
395
            $type = '';
396
            $idRecord = $record['uid'];
397
            if (empty($r)) {
398
                continue;
399
            }
400
401
            /** @var AbstractLinktype $hookObj */
402
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
403
                $type = $hookObj->fetchType($r, $type, $keyArr);
404
                // Store the type that was found
405
                // This prevents overriding by internal validator
406
                if (!empty($type)) {
407
                    $r['type'] = $type;
408
                }
409
            }
410
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
411
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
412
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
413
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
414
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
415
        }
416
    }
417
418
    /**
419
     * Find all supported broken links for a specific typoLink
420
     *
421
     * @param array $resultArray findRef parsed records
422
     * @param array $results Array of broken links
423
     * @param HtmlParser $htmlParser Instance of html parser
424
     * @param array $record The current record
425
     * @param string $field The current field
426
     * @param string $table The current table
427
     */
428
    protected function analyzeTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
429
    {
430
        $currentR = [];
431
        $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
432
        $idRecord = $record['uid'];
433
        $type = '';
434
        $title = '';
435
        $countLinkTags = count($linkTags);
436
        for ($i = 1; $i < $countLinkTags; $i += 2) {
437
            $referencedRecordType = '';
438
            foreach ($resultArray['elements'] as $element) {
439
                $type = '';
440
                $r = $element['subst'];
441
                if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
442
                    continue;
443
                }
444
445
                // Type of referenced record
446
                if (strpos($r['recordRef'], 'pages') !== false) {
447
                    $currentR = $r;
448
                    // Contains number of the page
449
                    $referencedRecordType = $r['tokenValue'];
450
                    $wasPage = true;
451
                } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
452
                    $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
453
                    $wasPage = false;
454
                } else {
455
                    $currentR = $r;
456
                }
457
                $title = strip_tags($linkTags[$i]);
458
            }
459
            /** @var AbstractLinktype $hookObj */
460
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
461
                $type = $hookObj->fetchType($currentR, $type, $keyArr);
462
                // Store the type that was found
463
                // This prevents overriding by internal validator
464
                if (!empty($type)) {
465
                    $currentR['type'] = $type;
466
                }
467
            }
468
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
469
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
470
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
471
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
472
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
473
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
474
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
475
        }
476
    }
477
478
    /**
479
     * Fill a marker array with the number of links found in a list of pages
480
     *
481
     * @return array array with the number of links found
482
     */
483
    public function getLinkCounts()
484
    {
485
        return $this->brokenLinkRepository->getNumberOfBrokenLinksForRecordsOnPages($this->pids, $this->searchFields);
486
    }
487
488
    /**
489
     * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
490
     * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
491
     * this is necessary to create the object that is used recursively by the original function.
492
     *
493
     * Generates a list of page uids from $id. List does not include $id itself.
494
     * The only pages excluded from the list are deleted pages.
495
     *
496
     * @param int $id Start page id
497
     * @param int $depth Depth to traverse down the page tree.
498
     * @param int $begin is an optional integer that determines at which level to start. use "0" from outside usage
499
     * @param string $permsClause Perms clause
500
     * @param bool $considerHidden Whether to consider hidden pages or not
501
     * @return string Returns the list with a comma in the end (if any pages selected!)
502
     */
503
    public function extGetTreeList($id, $depth, $begin, $permsClause, $considerHidden = false)
504
    {
505
        $depth = (int)$depth;
506
        $begin = (int)$begin;
507
        $id = (int)$id;
508
        $theList = '';
509
        if ($depth === 0) {
510
            return $theList;
511
        }
512
513
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
514
        $queryBuilder->getRestrictions()
515
            ->removeAll()
516
            ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
517
518
        $result = $queryBuilder
519
            ->select('uid', 'title', 'hidden', 'extendToSubpages')
520
            ->from('pages')
521
            ->where(
522
                $queryBuilder->expr()->eq(
523
                    'pid',
524
                    $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
525
                ),
526
                QueryHelper::stripLogicalOperatorPrefix($permsClause)
527
            )
528
            ->execute();
529
530
        while ($row = $result->fetch()) {
531
            if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
532
                $theList .= $row['uid'] . ',';
533
            }
534
            if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
535
                $theList .= $this->extGetTreeList(
536
                    $row['uid'],
537
                    $depth - 1,
538
                    $begin - 1,
539
                    $permsClause,
540
                    $considerHidden
541
                );
542
            }
543
        }
544
        return $theList;
545
    }
546
547
    /**
548
     * Check if rootline contains a hidden page
549
     *
550
     * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
551
     * @return bool TRUE if rootline contains a hidden page, FALSE if not
552
     */
553
    public function getRootLineIsHidden(array $pageInfo)
554
    {
555
        if ($pageInfo['pid'] === 0) {
556
            return false;
557
        }
558
559
        if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
560
            return true;
561
        }
562
563
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
564
        $queryBuilder->getRestrictions()->removeAll();
565
566
        $row = $queryBuilder
567
            ->select('uid', 'title', 'hidden', 'extendToSubpages')
568
            ->from('pages')
569
            ->where(
570
                $queryBuilder->expr()->eq(
571
                    'uid',
572
                    $queryBuilder->createNamedParameter($pageInfo['pid'], \PDO::PARAM_INT)
573
                )
574
            )
575
            ->execute()
576
            ->fetch();
577
578
        if ($row !== false) {
579
            return $this->getRootLineIsHidden($row);
580
        }
581
        return false;
582
    }
583
584
    /**
585
     * @return LanguageService
586
     */
587
    protected function getLanguageService()
588
    {
589
        return $GLOBALS['LANG'];
590
    }
591
}
592