Completed
Push — master ( a8b8b7...ae10a2 )
by
unknown
21:25
created

LinkAnalyzer::getTSConfig()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
namespace TYPO3\CMS\Linkvalidator;
3
4
/*
5
 * This file is part of the TYPO3 CMS project.
6
 *
7
 * It is free software; you can redistribute it and/or modify it under
8
 * the terms of the GNU General Public License, either version 2
9
 * of the License, or any later version.
10
 *
11
 * For the full copyright and license information, please read the
12
 * LICENSE.txt file that was distributed with this source code.
13
 *
14
 * The TYPO3 project - inspiring people to share!
15
 */
16
17
use Psr\EventDispatcher\EventDispatcherInterface;
18
use TYPO3\CMS\Backend\Utility\BackendUtility;
19
use TYPO3\CMS\Core\Database\Connection;
20
use TYPO3\CMS\Core\Database\ConnectionPool;
21
use TYPO3\CMS\Core\Database\Query\QueryHelper;
22
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
23
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
24
use TYPO3\CMS\Core\Html\HtmlParser;
25
use TYPO3\CMS\Core\Localization\LanguageService;
26
use TYPO3\CMS\Core\Utility\GeneralUtility;
27
use TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype;
28
use TYPO3\CMS\Linkvalidator\Repository\BrokenLinkRepository;
29
30
/**
31
 * This class provides Processing plugin implementation
32
 * @internal
33
 */
34
class LinkAnalyzer
35
{
36
37
    /**
38
     * Array of tables and fields to search for broken links
39
     *
40
     * @var array
41
     */
42
    protected $searchFields = [];
43
44
    /**
45
     * List of page uids (rootline downwards)
46
     *
47
     * @var array
48
     */
49
    protected $pids = [];
50
51
    /**
52
     * Array of tables and the number of external links they contain
53
     *
54
     * @var array
55
     */
56
    protected $linkCounts = [];
57
58
    /**
59
     * Array of tables and the number of broken external links they contain
60
     *
61
     * @var array
62
     */
63
    protected $brokenLinkCounts = [];
64
65
    /**
66
     * Array for hooks for own checks
67
     *
68
     * @var Linktype\AbstractLinktype[]
69
     */
70
    protected $hookObjectsArr = [];
71
72
    /**
73
     * Reference to the current element with table:uid, e.g. pages:85
74
     *
75
     * @var string
76
     */
77
    protected $recordReference = '';
78
79
    /**
80
     * The currently active TSconfig. Will be passed to the init function.
81
     *
82
     * @var array
83
     */
84
    protected $tsConfig = [];
85
86
    /**
87
     * @var EventDispatcherInterface
88
     */
89
    protected $eventDispatcher;
90
91
    /**
92
     * @var BrokenLinkRepository
93
     */
94
    protected $brokenLinkRepository;
95
96
    public function __construct(EventDispatcherInterface $eventDispatcher, BrokenLinkRepository $brokenLinkRepository)
97
    {
98
        $this->eventDispatcher = $eventDispatcher;
99
        $this->brokenLinkRepository = $brokenLinkRepository;
100
        $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
101
    }
102
103
    /**
104
     * Store all the needed configuration values in class variables
105
     *
106
     * @param array $searchFields List of fields in which to search for links
107
     * @param string|array $pidList List of comma separated page uids in which to search for links, can be an array too
108
     * @param array $tsConfig The currently active TSconfig.
109
     */
110
    public function init(array $searchFields, $pidList, $tsConfig)
111
    {
112
        $this->searchFields = $searchFields;
113
        $this->pids = is_array($pidList) ? $pidList : GeneralUtility::intExplode(',', $pidList, true);
114
        $this->tsConfig = $tsConfig;
115
116
        // Hook to handle own checks
117
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
118
            $this->hookObjectsArr[$key] = GeneralUtility::makeInstance($className);
119
            $options = $tsConfig['linktypesConfig.'][$key . '.'] ?? [];
120
            // setAdditionalConfig might use global configuration, so still call it, even if options are empty
121
            $this->hookObjectsArr[$key]->setAdditionalConfig($options);
122
        }
123
    }
124
125
    /**
126
     * Find all supported broken links and store them in tx_linkvalidator_link
127
     *
128
     * @param array $checkOptions List of hook object to activate
129
     * @param bool $considerHidden Defines whether to look into hidden fields
130
     */
131
    public function getLinkStatistics($checkOptions = [], $considerHidden = false)
132
    {
133
        $results = [];
134
        if (empty($checkOptions) || empty($this->pids)) {
135
            return;
136
        }
137
138
        $this->brokenLinkRepository->removeAllBrokenLinksOfRecordsOnPageIds(
139
            $this->pids,
140
            array_keys($checkOptions)
141
        );
142
143
        // Traverse all configured tables
144
        foreach ($this->searchFields as $table => $fields) {
145
            // If table is not configured, assume the extension is not installed
146
            // and therefore no need to check it
147
            if (!is_array($GLOBALS['TCA'][$table])) {
148
                continue;
149
            }
150
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
151
                ->getQueryBuilderForTable($table);
152
153
            if ($considerHidden) {
154
                $queryBuilder->getRestrictions()
155
                    ->removeAll()
156
                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
157
            }
158
159
            // Re-init selectFields for table
160
            $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
161
            if ($GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false) {
162
                $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['languageField'];
163
            }
164
            if ($GLOBALS['TCA'][$table]['ctrl']['type'] ?? false) {
165
                $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['type'];
166
            }
167
168
            $result = $queryBuilder->select(...$selectFields)
169
                ->from($table)
170
                ->where(
171
                    $queryBuilder->expr()->in(
172
                        ($table === 'pages' ? 'uid' : 'pid'),
173
                        $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
174
                    )
175
                )
176
                ->execute();
177
178
            // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
179
            while ($row = $result->fetch()) {
180
                $this->analyzeRecord($results, $table, $fields, $row);
181
            }
182
        }
183
        $this->checkLinks($results, $checkOptions);
184
    }
185
186
    protected function checkLinks(array $links, array $checkOptions)
187
    {
188
        foreach ($this->hookObjectsArr as $key => $hookObj) {
189
            if (!is_array($links[$key]) || (!empty($checkOptions) && !$checkOptions[$key])) {
190
                continue;
191
            }
192
193
            //  Check them
194
            foreach ($links[$key] as $entryKey => $entryValue) {
195
                $table = $entryValue['table'];
196
                $record = [];
197
                $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
198
                $record['record_pid'] = $entryValue['row']['pid'];
199
                $record['record_uid'] = $entryValue['uid'];
200
                $record['table_name'] = $table;
201
                $record['link_type'] = $key;
202
                $record['link_title'] = $entryValue['link_title'];
203
                $record['field'] = $entryValue['field'];
204
                $record['last_check'] = time();
205
                $typeField = $GLOBALS['TCA'][$table]['ctrl']['type'] ?? false;
206
                if ($entryValue['row'][$typeField] ?? false) {
207
                    $record['element_type'] = $entryValue['row'][$typeField];
208
                }
209
                $languageField = $GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false;
210
                if ($languageField && isset($entryValue['row'][$languageField])) {
211
                    $record['language'] = $entryValue['row'][$languageField];
212
                } else {
213
                    $record['language'] = -1;
214
                }
215
                $this->recordReference = $entryValue['substr']['recordRef'];
216
                if (!empty($entryValue['pageAndAnchor'] ?? '')) {
217
                    // Page with anchor, e.g. 18#1580
218
                    $url = $entryValue['pageAndAnchor'];
219
                } else {
220
                    $url = $entryValue['substr']['tokenValue'];
221
                }
222
                $record['url'] = $url;
223
                $this->linkCounts[$table]++;
224
                $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
225
226
                // Broken link found
227
                if (!$checkUrl) {
228
                    $this->brokenLinkRepository->addBrokenLink($record, false, $hookObj->getErrorParams());
229
                    $this->brokenLinkCounts[$table]++;
230
                } elseif (GeneralUtility::_GP('showalllinks')) {
231
                    $this->brokenLinkRepository->addBrokenLink($record, true);
232
                    $this->brokenLinkCounts[$table]++;
233
                }
234
            }
235
        }
236
    }
237
238
    /**
239
     * Recheck for broken links for one field in table for record.
240
     *
241
     * @param array $checkOptions
242
     * @param string $recordUid uid of record to check
243
     * @param string $table
244
     * @param string $field
245
     * @param int $timestamp - only recheck if timestamp changed
246
     * @param bool $considerHidden
247
     */
248
    public function recheckLinks(
249
        array $checkOptions,
250
        string $recordUid,
251
        string $table,
252
        string $field,
253
        int $timestamp,
254
        bool $considerHidden = true
255
    ): void {
256
        // If table is not configured, assume the extension is not installed
257
        // and therefore no need to check it
258
        if (!is_array($GLOBALS['TCA'][$table])) {
259
            return;
260
        }
261
262
        // get all links for $record / $table / $field combination
263
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
264
            ->getQueryBuilderForTable($table);
265
        if ($considerHidden) {
266
            $queryBuilder->getRestrictions()->removeByType(HiddenRestriction::class);
267
        }
268
269
        $row = $queryBuilder->select('uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label'], $field)
270
            ->from($table)
271
            ->where(
272
                $queryBuilder->expr()->eq(
273
                    'uid',
274
                    $queryBuilder->createNamedParameter($recordUid, Connection::PARAM_INT)
275
                )
276
            )
277
            ->execute()
278
            ->fetch();
279
280
        if (!$row) {
281
            // missing record: remove existing links
282
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
0 ignored issues
show
Bug introduced by
$recordUid of type string is incompatible with the type integer expected by parameter $recordUid of TYPO3\CMS\Linkvalidator\...eBrokenLinksForRecord(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

282
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, /** @scrutinizer ignore-type */ $recordUid);
Loading history...
283
            return;
284
        }
285
        if ($timestamp === (int)$row['timestamp']) {
286
            // timestamp has not changed: no need to recheck
287
            return;
288
        }
289
        $resultsLinks = [];
290
        $this->analyzeRecord($resultsLinks, $table, [$field], $row);
291
        if ($resultsLinks) {
292
            // remove existing broken links from table
293
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
294
            // find all broken links for list of links
295
            $this->checkLinks($resultsLinks, $checkOptions);
296
        }
297
    }
298
299
    /**
300
     * Find all supported broken links for a specific record
301
     *
302
     * @param array $results Array of broken links
303
     * @param string $table Table name of the record
304
     * @param array $fields Array of fields to analyze
305
     * @param array $record Record to analyze
306
     */
307
    public function analyzeRecord(array &$results, $table, array $fields, array $record)
308
    {
309
        $event = new Event\BeforeRecordIsAnalyzedEvent($table, $record, $fields, $this, $results);
310
        $this->eventDispatcher->dispatch($event);
311
        $results = $event->getResults();
312
        $record = $event->getRecord();
313
314
        // Put together content of all relevant fields
315
        $haystack = '';
316
        $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
317
        $idRecord = $record['uid'];
318
        // Get all references
319
        foreach ($fields as $field) {
320
            $haystack .= $record[$field] . ' --- ';
321
            $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
322
            $valueField = $record[$field];
323
324
            // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
325
            if (!$conf['softref'] || (string)$valueField === '') {
326
                continue;
327
            }
328
329
            // Explode the list of soft references/parameters
330
            $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
331
            if ($softRefs === false) {
332
                continue;
333
            }
334
335
            // Traverse soft references
336
            foreach ($softRefs as $spKey => $spParams) {
337
                /** @var \TYPO3\CMS\Core\Database\SoftReferenceIndex $softRefObj */
338
                $softRefObj = BackendUtility::softRefParserObj($spKey);
339
340
                // If there is an object returned...
341
                if (!is_object($softRefObj)) {
342
                    continue;
343
                }
344
                $softRefParams = $spParams;
345
                if (!is_array($softRefParams)) {
346
                    // set subst such that findRef will return substitutes for urls, emails etc
347
                    $softRefParams = ['subst' => true];
348
                }
349
350
                // Do processing
351
                $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $softRefParams);
352
                if (empty($resultArray['elements'])) {
353
                    continue;
354
                }
355
356
                if ($spKey === 'typolink_tag') {
357
                    $this->analyzeTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
358
                } else {
359
                    $this->analyzeLinks($resultArray, $results, $record, $field, $table);
360
                }
361
            }
362
        }
363
    }
364
365
    /**
366
     * Returns the TSconfig that was passed to the init() method.
367
     *
368
     * This can be used by link checkers that get a reference of this
369
     * object passed to the checkLink() method.
370
     *
371
     * @return array
372
     */
373
    public function getTSConfig()
374
    {
375
        return $this->tsConfig;
376
    }
377
378
    /**
379
     * Find all supported broken links for a specific link list
380
     *
381
     * @param array $resultArray findRef parsed records
382
     * @param array $results Array of broken links
383
     * @param array $record UID of the current record
384
     * @param string $field The current field
385
     * @param string $table The current table
386
     */
387
    protected function analyzeLinks(array $resultArray, array &$results, array $record, $field, $table)
388
    {
389
        foreach ($resultArray['elements'] as $element) {
390
            $r = $element['subst'];
391
            $type = '';
392
            $idRecord = $record['uid'];
393
            if (empty($r)) {
394
                continue;
395
            }
396
397
            /** @var AbstractLinktype $hookObj */
398
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
399
                $type = $hookObj->fetchType($r, $type, $keyArr);
400
                // Store the type that was found
401
                // This prevents overriding by internal validator
402
                if (!empty($type)) {
403
                    $r['type'] = $type;
404
                }
405
            }
406
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
407
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
408
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
409
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
410
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
411
        }
412
    }
413
414
    /**
415
     * Find all supported broken links for a specific typoLink
416
     *
417
     * @param array $resultArray findRef parsed records
418
     * @param array $results Array of broken links
419
     * @param HtmlParser $htmlParser Instance of html parser
420
     * @param array $record The current record
421
     * @param string $field The current field
422
     * @param string $table The current table
423
     */
424
    protected function analyzeTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
425
    {
426
        $currentR = [];
427
        $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
428
        $idRecord = $record['uid'];
429
        $type = '';
430
        $title = '';
431
        $countLinkTags = count($linkTags);
432
        for ($i = 1; $i < $countLinkTags; $i += 2) {
433
            $referencedRecordType = '';
434
            foreach ($resultArray['elements'] as $element) {
435
                $type = '';
436
                $r = $element['subst'];
437
                if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
438
                    continue;
439
                }
440
441
                // Type of referenced record
442
                if (strpos($r['recordRef'], 'pages') !== false) {
443
                    $currentR = $r;
444
                    // Contains number of the page
445
                    $referencedRecordType = $r['tokenValue'];
446
                    $wasPage = true;
447
                } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
448
                    $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
449
                    $wasPage = false;
450
                } else {
451
                    $currentR = $r;
452
                }
453
                $title = strip_tags($linkTags[$i]);
454
            }
455
            /** @var AbstractLinktype $hookObj */
456
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
457
                $type = $hookObj->fetchType($currentR, $type, $keyArr);
458
                // Store the type that was found
459
                // This prevents overriding by internal validator
460
                if (!empty($type)) {
461
                    $currentR['type'] = $type;
462
                }
463
            }
464
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
465
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
466
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
467
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
468
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
469
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
470
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
471
        }
472
    }
473
474
    /**
475
     * Fill a marker array with the number of links found in a list of pages
476
     *
477
     * @return array array with the number of links found
478
     */
479
    public function getLinkCounts()
480
    {
481
        return $this->brokenLinkRepository->getNumberOfBrokenLinksForRecordsOnPages($this->pids, $this->searchFields);
482
    }
483
484
    /**
485
     * Calls TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList.
486
     * Although this duplicates the function TYPO3\CMS\Backend\FrontendBackendUserAuthentication::extGetTreeList
487
     * this is necessary to create the object that is used recursively by the original function.
488
     *
489
     * Generates a list of page uids from $id. List does not include $id itself.
490
     * The only pages excluded from the list are deleted pages.
491
     *
492
     * @param int $id Start page id
493
     * @param int $depth Depth to traverse down the page tree.
494
     * @param int $begin is an optional integer that determines at which level to start. use "0" from outside usage
495
     * @param string $permsClause Perms clause
496
     * @param bool $considerHidden Whether to consider hidden pages or not
497
     * @return string Returns the list with a comma in the end (if any pages selected!)
498
     */
499
    public function extGetTreeList($id, $depth, $begin, $permsClause, $considerHidden = false)
500
    {
501
        $depth = (int)$depth;
502
        $begin = (int)$begin;
503
        $id = (int)$id;
504
        $theList = '';
505
        if ($depth === 0) {
506
            return $theList;
507
        }
508
509
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
510
        $queryBuilder->getRestrictions()
511
            ->removeAll()
512
            ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
513
514
        $result = $queryBuilder
515
            ->select('uid', 'title', 'hidden', 'extendToSubpages')
516
            ->from('pages')
517
            ->where(
518
                $queryBuilder->expr()->eq(
519
                    'pid',
520
                    $queryBuilder->createNamedParameter($id, \PDO::PARAM_INT)
521
                ),
522
                QueryHelper::stripLogicalOperatorPrefix($permsClause)
523
            )
524
            ->execute();
525
526
        while ($row = $result->fetch()) {
527
            if ($begin <= 0 && ($row['hidden'] == 0 || $considerHidden)) {
528
                $theList .= $row['uid'] . ',';
529
            }
530
            if ($depth > 1 && (!($row['hidden'] == 1 && $row['extendToSubpages'] == 1) || $considerHidden)) {
531
                $theList .= $this->extGetTreeList(
532
                    $row['uid'],
533
                    $depth - 1,
534
                    $begin - 1,
535
                    $permsClause,
536
                    $considerHidden
537
                );
538
            }
539
        }
540
        return $theList;
541
    }
542
543
    /**
544
     * Check if rootline contains a hidden page
545
     *
546
     * @param array $pageInfo Array with uid, title, hidden, extendToSubpages from pages table
547
     * @return bool TRUE if rootline contains a hidden page, FALSE if not
548
     */
549
    public function getRootLineIsHidden(array $pageInfo)
550
    {
551
        if ($pageInfo['pid'] === 0) {
552
            return false;
553
        }
554
555
        if ($pageInfo['extendToSubpages'] == 1 && $pageInfo['hidden'] == 1) {
556
            return true;
557
        }
558
559
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable('pages');
560
        $queryBuilder->getRestrictions()->removeAll();
561
562
        $row = $queryBuilder
563
            ->select('uid', 'title', 'hidden', 'extendToSubpages')
564
            ->from('pages')
565
            ->where(
566
                $queryBuilder->expr()->eq(
567
                    'uid',
568
                    $queryBuilder->createNamedParameter($pageInfo['pid'], \PDO::PARAM_INT)
569
                )
570
            )
571
            ->execute()
572
            ->fetch();
573
574
        if ($row !== false) {
575
            return $this->getRootLineIsHidden($row);
576
        }
577
        return false;
578
    }
579
580
    /**
581
     * @return LanguageService
582
     */
583
    protected function getLanguageService()
584
    {
585
        return $GLOBALS['LANG'];
586
    }
587
}
588