Passed
Push — master ( 9e6baf...a5dff7 )
by
unknown
13:01
created

LinkAnalyzer::getTSConfig()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
3
/*
4
 * This file is part of the TYPO3 CMS project.
5
 *
6
 * It is free software; you can redistribute it and/or modify it under
7
 * the terms of the GNU General Public License, either version 2
8
 * of the License, or any later version.
9
 *
10
 * For the full copyright and license information, please read the
11
 * LICENSE.txt file that was distributed with this source code.
12
 *
13
 * The TYPO3 project - inspiring people to share!
14
 */
15
16
namespace TYPO3\CMS\Linkvalidator;
17
18
use Psr\EventDispatcher\EventDispatcherInterface;
19
use TYPO3\CMS\Backend\Utility\BackendUtility;
20
use TYPO3\CMS\Core\Database\Connection;
21
use TYPO3\CMS\Core\Database\ConnectionPool;
22
use TYPO3\CMS\Core\Database\Query\QueryHelper;
23
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
24
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
25
use TYPO3\CMS\Core\Html\HtmlParser;
26
use TYPO3\CMS\Core\Localization\LanguageService;
27
use TYPO3\CMS\Core\Utility\GeneralUtility;
28
use TYPO3\CMS\Linkvalidator\Event\BeforeRecordIsAnalyzedEvent;
29
use TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype;
30
use TYPO3\CMS\Linkvalidator\Repository\BrokenLinkRepository;
31
32
/**
33
 * This class provides Processing plugin implementation
34
 * @internal
35
 */
36
class LinkAnalyzer
37
{
38
39
    /**
40
     * Array of tables and fields to search for broken links
41
     *
42
     * @var array<string, array<int, string>>
43
     */
44
    protected $searchFields = [];
45
46
    /**
47
     * List of page uids (rootline downwards)
48
     *
49
     * @var array
50
     */
51
    protected $pids = [];
52
53
    /**
54
     * Array of tables and the number of external links they contain
55
     *
56
     * @var array
57
     */
58
    protected $linkCounts = [];
59
60
    /**
61
     * Array of tables and the number of broken external links they contain
62
     *
63
     * @var array
64
     */
65
    protected $brokenLinkCounts = [];
66
67
    /**
68
     * Array for hooks for own checks
69
     *
70
     * @var Linktype\AbstractLinktype[]
71
     */
72
    protected $hookObjectsArr = [];
73
74
    /**
75
     * The currently active TSconfig. Will be passed to the init function.
76
     *
77
     * @var array
78
     */
79
    protected $tsConfig = [];
80
81
    /**
82
     * @var EventDispatcherInterface
83
     */
84
    protected $eventDispatcher;
85
86
    /**
87
     * @var BrokenLinkRepository
88
     */
89
    protected $brokenLinkRepository;
90
91
    public function __construct(EventDispatcherInterface $eventDispatcher, BrokenLinkRepository $brokenLinkRepository)
92
    {
93
        $this->eventDispatcher = $eventDispatcher;
94
        $this->brokenLinkRepository = $brokenLinkRepository;
95
        $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
96
    }
97
98
    /**
99
     * Store all the needed configuration values in class variables
100
     *
101
     * @param array $searchFields List of fields in which to search for links
102
     * @param string|array $pidList List of comma separated page uids in which to search for links, can be an array too
103
     * @param array $tsConfig The currently active TSconfig.
104
     */
105
    public function init(array $searchFields, $pidList, $tsConfig)
106
    {
107
        $this->searchFields = $searchFields;
108
        $this->pids = is_array($pidList) ? $pidList : GeneralUtility::intExplode(',', $pidList, true);
109
        $this->tsConfig = $tsConfig;
110
111
        // Hook to handle own checks
112
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
113
            $this->hookObjectsArr[$key] = GeneralUtility::makeInstance($className);
114
            $options = $tsConfig['linktypesConfig.'][$key . '.'] ?? [];
115
            // setAdditionalConfig might use global configuration, so still call it, even if options are empty
116
            $this->hookObjectsArr[$key]->setAdditionalConfig($options);
117
        }
118
    }
119
120
    /**
121
     * Find all supported broken links and store them in tx_linkvalidator_link
122
     *
123
     * @param array $checkOptions List of hook object to activate
124
     * @param bool $considerHidden Defines whether to look into hidden fields
125
     */
126
    public function getLinkStatistics($checkOptions = [], $considerHidden = false)
127
    {
128
        $results = [];
129
        if (empty($checkOptions) || empty($this->pids)) {
130
            return;
131
        }
132
133
        $this->brokenLinkRepository->removeAllBrokenLinksOfRecordsOnPageIds(
134
            $this->pids,
135
            array_keys($checkOptions)
136
        );
137
138
        // Traverse all configured tables
139
        foreach ($this->searchFields as $table => $fields) {
140
            // If table is not configured, assume the extension is not installed
141
            // and therefore no need to check it
142
            if (!is_array($GLOBALS['TCA'][$table])) {
143
                continue;
144
            }
145
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
146
                ->getQueryBuilderForTable($table);
147
148
            if ($considerHidden) {
149
                $queryBuilder->getRestrictions()
150
                    ->removeAll()
151
                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
152
            }
153
154
            // Re-init selectFields for table
155
            $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
156
            if ($GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false) {
157
                $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['languageField'];
158
            }
159
            if ($GLOBALS['TCA'][$table]['ctrl']['type'] ?? false) {
160
                if (isset($GLOBALS['TCA'][$table]['columns'][$GLOBALS['TCA'][$table]['ctrl']['type']])) {
161
                    $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['type'];
162
                }
163
            }
164
165
            $result = $queryBuilder->select(...$selectFields)
166
                ->from($table)
167
                ->where(
168
                    $queryBuilder->expr()->in(
169
                        ($table === 'pages' ? 'uid' : 'pid'),
170
                        $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
171
                    )
172
                )
173
                ->execute();
174
175
            // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
176
            while ($row = $result->fetch()) {
177
                $this->analyzeRecord($results, $table, $fields, $row);
178
            }
179
        }
180
        $this->checkLinks($results, $checkOptions);
181
    }
182
183
    protected function checkLinks(array $links, array $checkOptions)
184
    {
185
        foreach ($this->hookObjectsArr as $key => $hookObj) {
186
            if (!is_array($links[$key]) || (!empty($checkOptions) && !$checkOptions[$key])) {
187
                continue;
188
            }
189
190
            //  Check them
191
            foreach ($links[$key] as $entryKey => $entryValue) {
192
                $table = $entryValue['table'];
193
                $record = [];
194
                $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
195
                $record['record_pid'] = $entryValue['row']['pid'];
196
                $record['record_uid'] = $entryValue['uid'];
197
                $record['table_name'] = $table;
198
                $record['link_type'] = $key;
199
                $record['link_title'] = $entryValue['link_title'];
200
                $record['field'] = $entryValue['field'];
201
                $record['last_check'] = time();
202
                $typeField = $GLOBALS['TCA'][$table]['ctrl']['type'] ?? false;
203
                if ($entryValue['row'][$typeField] ?? false) {
204
                    $record['element_type'] = $entryValue['row'][$typeField];
205
                }
206
                $languageField = $GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false;
207
                if ($languageField && isset($entryValue['row'][$languageField])) {
208
                    $record['language'] = $entryValue['row'][$languageField];
209
                } else {
210
                    $record['language'] = -1;
211
                }
212
                if (!empty($entryValue['pageAndAnchor'] ?? '')) {
213
                    // Page with anchor, e.g. 18#1580
214
                    $url = $entryValue['pageAndAnchor'];
215
                } else {
216
                    $url = $entryValue['substr']['tokenValue'];
217
                }
218
                $record['url'] = $url;
219
                $this->linkCounts[$table]++;
220
                $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
221
222
                // Broken link found
223
                if (!$checkUrl) {
224
                    $this->brokenLinkRepository->addBrokenLink($record, false, $hookObj->getErrorParams());
225
                    $this->brokenLinkCounts[$table]++;
226
                } elseif (GeneralUtility::_GP('showalllinks')) {
227
                    $this->brokenLinkRepository->addBrokenLink($record, true);
228
                    $this->brokenLinkCounts[$table]++;
229
                }
230
            }
231
        }
232
    }
233
234
    /**
235
     * Recheck for broken links for one field in table for record.
236
     *
237
     * @param array $checkOptions
238
     * @param string $recordUid uid of record to check
239
     * @param string $table
240
     * @param string $field
241
     * @param int $timestamp - only recheck if timestamp changed
242
     * @param bool $considerHidden
243
     */
244
    public function recheckLinks(
245
        array $checkOptions,
246
        string $recordUid,
247
        string $table,
248
        string $field,
249
        int $timestamp,
250
        bool $considerHidden = true
251
    ): void {
252
        // If table is not configured, assume the extension is not installed
253
        // and therefore no need to check it
254
        if (!is_array($GLOBALS['TCA'][$table])) {
255
            return;
256
        }
257
258
        // get all links for $record / $table / $field combination
259
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
260
            ->getQueryBuilderForTable($table);
261
        if ($considerHidden) {
262
            $queryBuilder->getRestrictions()->removeByType(HiddenRestriction::class);
263
        }
264
265
        $row = $queryBuilder->select('uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label'], $field, 'tstamp')
266
            ->from($table)
267
            ->where(
268
                $queryBuilder->expr()->eq(
269
                    'uid',
270
                    $queryBuilder->createNamedParameter($recordUid, Connection::PARAM_INT)
271
                )
272
            )
273
            ->execute()
274
            ->fetch();
275
276
        if (!$row) {
277
            // missing record: remove existing links
278
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
0 ignored issues
show
Bug introduced by
$recordUid of type string is incompatible with the type integer expected by parameter $recordUid of TYPO3\CMS\Linkvalidator\...eBrokenLinksForRecord(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

278
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, /** @scrutinizer ignore-type */ $recordUid);
Loading history...
279
            return;
280
        }
281
        if (($row['tstamp'] ?? 0) && $timestamp && ((int)($row['tstamp']) < $timestamp)) {
282
            // timestamp has not changed: no need to recheck
283
            return;
284
        }
285
        $resultsLinks = [];
286
        $this->analyzeRecord($resultsLinks, $table, [$field], $row);
287
        if ($resultsLinks) {
288
            // remove existing broken links from table
289
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
290
            // find all broken links for list of links
291
            $this->checkLinks($resultsLinks, $checkOptions);
292
        }
293
    }
294
295
    /**
296
     * Find all supported broken links for a specific record
297
     *
298
     * @param array $results Array of broken links
299
     * @param string $table Table name of the record
300
     * @param array $fields Array of fields to analyze
301
     * @param array $record Record to analyze
302
     */
303
    public function analyzeRecord(array &$results, $table, array $fields, array $record)
304
    {
305
        $event = new BeforeRecordIsAnalyzedEvent($table, $record, $fields, $this, $results);
306
        $this->eventDispatcher->dispatch($event);
307
        $results = $event->getResults();
308
        $record = $event->getRecord();
309
310
        // Put together content of all relevant fields
311
        $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
312
        $idRecord = $record['uid'];
313
        // Get all references
314
        foreach ($fields as $field) {
315
            $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
316
            $valueField = $record[$field];
317
318
            // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
319
            if (!$conf['softref'] || (string)$valueField === '') {
320
                continue;
321
            }
322
323
            // Explode the list of soft references/parameters
324
            $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
325
            if ($softRefs === false) {
326
                continue;
327
            }
328
329
            // Traverse soft references
330
            foreach ($softRefs as $spKey => $spParams) {
331
                /** @var \TYPO3\CMS\Core\Database\SoftReferenceIndex $softRefObj */
332
                $softRefObj = BackendUtility::softRefParserObj($spKey);
333
334
                // If there is an object returned...
335
                if (!is_object($softRefObj)) {
336
                    continue;
337
                }
338
                $softRefParams = $spParams;
339
                if (!is_array($softRefParams)) {
340
                    // set subst such that findRef will return substitutes for urls, emails etc
341
                    $softRefParams = ['subst' => true];
342
                }
343
344
                // Do processing
345
                $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $softRefParams);
346
                if (!is_array($resultArray) || !isset($resultArray['elements']) || !is_array($resultArray['elements'])) {
347
                    continue;
348
                }
349
350
                if ($spKey === 'typolink_tag') {
351
                    $this->analyzeTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
352
                } else {
353
                    $this->analyzeLinks($resultArray, $results, $record, $field, $table);
354
                }
355
            }
356
        }
357
    }
358
359
    /**
360
     * Find all supported broken links for a specific link list
361
     *
362
     * @param array $resultArray findRef parsed records
363
     * @param array $results Array of broken links
364
     * @param array $record UID of the current record
365
     * @param string $field The current field
366
     * @param string $table The current table
367
     */
368
    protected function analyzeLinks(array $resultArray, array &$results, array $record, $field, $table)
369
    {
370
        foreach ($resultArray['elements'] as $element) {
371
            $r = $element['subst'];
372
            $type = '';
373
            $idRecord = $record['uid'];
374
            if (empty($r)) {
375
                continue;
376
            }
377
378
            /** @var AbstractLinktype $hookObj */
379
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
380
                $type = $hookObj->fetchType($r, $type, $keyArr);
381
                // Store the type that was found
382
                // This prevents overriding by internal validator
383
                if (!empty($type)) {
384
                    $r['type'] = $type;
385
                }
386
            }
387
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
388
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
389
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
390
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
391
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
392
        }
393
    }
394
395
    /**
396
     * Find all supported broken links for a specific typoLink
397
     *
398
     * @param array $resultArray findRef parsed records
399
     * @param array $results Array of broken links
400
     * @param HtmlParser $htmlParser Instance of html parser
401
     * @param array $record The current record
402
     * @param string $field The current field
403
     * @param string $table The current table
404
     */
405
    protected function analyzeTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
406
    {
407
        $currentR = [];
408
        $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
409
        $idRecord = $record['uid'];
410
        $type = '';
411
        $title = '';
412
        $countLinkTags = count($linkTags);
413
        for ($i = 1; $i < $countLinkTags; $i += 2) {
414
            $referencedRecordType = '';
415
            foreach ($resultArray['elements'] as $element) {
416
                $type = '';
417
                $r = $element['subst'];
418
                if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
419
                    continue;
420
                }
421
422
                // Type of referenced record
423
                if (strpos($r['recordRef'], 'pages') !== false) {
424
                    $currentR = $r;
425
                    // Contains number of the page
426
                    $referencedRecordType = $r['tokenValue'];
427
                    $wasPage = true;
428
                } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
429
                    $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
430
                    $wasPage = false;
431
                } else {
432
                    $currentR = $r;
433
                }
434
                $title = strip_tags($linkTags[$i]);
435
            }
436
            /** @var AbstractLinktype $hookObj */
437
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
438
                $type = $hookObj->fetchType($currentR, $type, $keyArr);
439
                // Store the type that was found
440
                // This prevents overriding by internal validator
441
                if (!empty($type)) {
442
                    $currentR['type'] = $type;
443
                }
444
            }
445
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
446
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
447
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
448
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
449
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
450
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
451
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
452
        }
453
    }
454
455
    /**
456
     * Fill a marker array with the number of links found in a list of pages
457
     *
458
     * @return array array with the number of links found
459
     */
460
    public function getLinkCounts()
461
    {
462
        return $this->brokenLinkRepository->getNumberOfBrokenLinksForRecordsOnPages($this->pids, $this->searchFields);
463
    }
464
465
    /**
466
     * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
467
     * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
468
     * this is necessary to create the object that is used recursively by the original function.
469
     *
470
     * Generates a list of page uids from $id. List does not include $id itself.
471
     * The only pages excluded from the list are deleted pages.
472
     *
473
     * @param int $id Start page id
474
     * @param int $depth Depth to traverse down the page tree.
475
     * @param int $begin is an optional integer that determines at which level to start. use "0" from outside usage
476
     * @param string $permsClause Perms clause
477
     * @param bool $considerHidden Whether to consider hidden pages or not
478
     * @return string Returns the list with a comma in the end (if any pages selected!)
479
     */
480
    public function extGetTreeList($id, $depth, $begin, $permsClause, $considerHidden = false)
481
    {
482
        $depth = (int)$depth;
483
        $begin = (int)$begin;
484
        $id = (int)$id;
485
        $theList = '';
486
        if ($depth === 0) {
487
            return $theList;
488
        }
489
490
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
491
        $queryBuilder->getRestrictions()
492
            ->removeAll()
493
            ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
494
495
        $result = $queryBuilder
496
            ->select('uid', 'title', 'hidden', 'extendToSubpages')
497
            ->from('pages')
498
            ->where(
499
                $queryBuilder->expr()->eq(
500
                    'pid',
501
                    $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
502
                ),
503
                QueryHelper::stripLogicalOperatorPrefix($permsClause)
504
            )
505
            ->execute();
506
507
        while ($row = $result->fetch()) {
508
            if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
509
                $theList .= $row['uid'] . ',';
510
            }
511
            if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
512
                $theList .= $this->extGetTreeList(
513
                    $row['uid'],
514
                    $depth - 1,
515
                    $begin - 1,
516
                    $permsClause,
517
                    $considerHidden
518
                );
519
            }
520
        }
521
        return $theList;
522
    }
523
524
    /**
525
     * Check if rootline contains a hidden page
526
     *
527
     * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
528
     * @return bool TRUE if rootline contains a hidden page, FALSE if not
529
     */
530
    public function getRootLineIsHidden(array $pageInfo)
531
    {
532
        if ($pageInfo['pid'] === 0) {
533
            return false;
534
        }
535
536
        if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
537
            return true;
538
        }
539
540
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
541
        $queryBuilder->getRestrictions()->removeAll();
542
543
        $row = $queryBuilder
544
            ->select('uid', 'title', 'hidden', 'extendToSubpages')
545
            ->from('pages')
546
            ->where(
547
                $queryBuilder->expr()->eq(
548
                    'uid',
549
                    $queryBuilder->createNamedParameter($pageInfo['pid'], \PDO::PARAM_INT)
550
                )
551
            )
552
            ->execute()
553
            ->fetch();
554
555
        if ($row !== false) {
556
            return $this->getRootLineIsHidden($row);
557
        }
558
        return false;
559
    }
560
561
    /**
562
     * @return LanguageService
563
     */
564
    protected function getLanguageService()
565
    {
566
        return $GLOBALS['LANG'];
567
    }
568
}
569