Completed
Push — master ( 5e5725...eff4fd )
by
unknown
40:58 queued 27:03
created

LinkAnalyzer::getRootLineIsHidden()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 29
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 18
c 0
b 0
f 0
nc 4
nop 1
dl 0
loc 29
rs 9.3554
1
<?php
2
3
/*
4
 * This file is part of the TYPO3 CMS project.
5
 *
6
 * It is free software; you can redistribute it and/or modify it under
7
 * the terms of the GNU General Public License, either version 2
8
 * of the License, or any later version.
9
 *
10
 * For the full copyright and license information, please read the
11
 * LICENSE.txt file that was distributed with this source code.
12
 *
13
 * The TYPO3 project - inspiring people to share!
14
 */
15
16
namespace TYPO3\CMS\Linkvalidator;
17
18
use Psr\EventDispatcher\EventDispatcherInterface;
19
use TYPO3\CMS\Backend\Utility\BackendUtility;
20
use TYPO3\CMS\Core\Database\Connection;
21
use TYPO3\CMS\Core\Database\ConnectionPool;
22
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
23
use TYPO3\CMS\Core\Database\Query\Restriction\HiddenRestriction;
24
use TYPO3\CMS\Core\Html\HtmlParser;
25
use TYPO3\CMS\Core\Localization\LanguageService;
26
use TYPO3\CMS\Core\Utility\GeneralUtility;
27
use TYPO3\CMS\Linkvalidator\Event\BeforeRecordIsAnalyzedEvent;
28
use TYPO3\CMS\Linkvalidator\Linktype\AbstractLinktype;
29
use TYPO3\CMS\Linkvalidator\Repository\BrokenLinkRepository;
30
31
/**
32
 * This class provides Processing plugin implementation
33
 * @internal
34
 */
35
class LinkAnalyzer
36
{
37
38
    /**
39
     * Array of tables and fields to search for broken links
40
     *
41
     * @var array<string, array<int, string>>
42
     */
43
    protected $searchFields = [];
44
45
    /**
46
     * List of page uids (rootline downwards)
47
     *
48
     * @var array
49
     */
50
    protected $pids = [];
51
52
    /**
53
     * Array of tables and the number of external links they contain
54
     *
55
     * @var array
56
     */
57
    protected $linkCounts = [];
58
59
    /**
60
     * Array of tables and the number of broken external links they contain
61
     *
62
     * @var array
63
     */
64
    protected $brokenLinkCounts = [];
65
66
    /**
67
     * Array for hooks for own checks
68
     *
69
     * @var Linktype\AbstractLinktype[]
70
     */
71
    protected $hookObjectsArr = [];
72
73
    /**
74
     * The currently active TSconfig. Will be passed to the init function.
75
     *
76
     * @var array
77
     */
78
    protected $tsConfig = [];
79
80
    /**
81
     * @var EventDispatcherInterface
82
     */
83
    protected $eventDispatcher;
84
85
    /**
86
     * @var BrokenLinkRepository
87
     */
88
    protected $brokenLinkRepository;
89
90
    public function __construct(EventDispatcherInterface $eventDispatcher, BrokenLinkRepository $brokenLinkRepository)
91
    {
92
        $this->eventDispatcher = $eventDispatcher;
93
        $this->brokenLinkRepository = $brokenLinkRepository;
94
        $this->getLanguageService()->includeLLFile('EXT:linkvalidator/Resources/Private/Language/Module/locallang.xlf');
95
    }
96
97
    /**
98
     * Store all the needed configuration values in class variables
99
     *
100
     * @param array $searchFields List of fields in which to search for links
101
     * @param string|array $pidList List of comma separated page uids in which to search for links, can be an array too
102
     * @param array $tsConfig The currently active TSconfig.
103
     */
104
    public function init(array $searchFields, $pidList, $tsConfig)
105
    {
106
        $this->searchFields = $searchFields;
107
        $this->pids = is_array($pidList) ? $pidList : GeneralUtility::intExplode(',', $pidList, true);
108
        $this->tsConfig = $tsConfig;
109
110
        // Hook to handle own checks
111
        foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['linkvalidator']['checkLinks'] ?? [] as $key => $className) {
112
            $this->hookObjectsArr[$key] = GeneralUtility::makeInstance($className);
113
            $options = $tsConfig['linktypesConfig.'][$key . '.'] ?? [];
114
            // setAdditionalConfig might use global configuration, so still call it, even if options are empty
115
            $this->hookObjectsArr[$key]->setAdditionalConfig($options);
116
        }
117
    }
118
119
    /**
120
     * Find all supported broken links and store them in tx_linkvalidator_link
121
     *
122
     * @param array $checkOptions List of hook object to activate
123
     * @param bool $considerHidden Defines whether to look into hidden fields
124
     */
125
    public function getLinkStatistics($checkOptions = [], $considerHidden = false)
126
    {
127
        $results = [];
128
        if (empty($checkOptions) || empty($this->pids)) {
129
            return;
130
        }
131
132
        $this->brokenLinkRepository->removeAllBrokenLinksOfRecordsOnPageIds(
133
            $this->pids,
134
            array_keys($checkOptions)
135
        );
136
137
        // Traverse all configured tables
138
        foreach ($this->searchFields as $table => $fields) {
139
            // If table is not configured, assume the extension is not installed
140
            // and therefore no need to check it
141
            if (!is_array($GLOBALS['TCA'][$table])) {
142
                continue;
143
            }
144
            $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
145
                ->getQueryBuilderForTable($table);
146
147
            if ($considerHidden) {
148
                $queryBuilder->getRestrictions()
149
                    ->removeAll()
150
                    ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
151
            }
152
153
            // Re-init selectFields for table
154
            $selectFields = array_merge(['uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label']], $fields);
155
            if ($GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false) {
156
                $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['languageField'];
157
            }
158
            if ($GLOBALS['TCA'][$table]['ctrl']['type'] ?? false) {
159
                if (isset($GLOBALS['TCA'][$table]['columns'][$GLOBALS['TCA'][$table]['ctrl']['type']])) {
160
                    $selectFields[] = $GLOBALS['TCA'][$table]['ctrl']['type'];
161
                }
162
            }
163
164
            $result = $queryBuilder->select(...$selectFields)
165
                ->from($table)
166
                ->where(
167
                    $queryBuilder->expr()->in(
168
                        ($table === 'pages' ? 'uid' : 'pid'),
169
                        $queryBuilder->createNamedParameter($this->pids, Connection::PARAM_INT_ARRAY)
170
                    )
171
                )
172
                ->execute();
173
174
            // @todo #64091: only select rows that have content in at least one of the relevant fields (via OR)
175
            while ($row = $result->fetch()) {
176
                $this->analyzeRecord($results, $table, $fields, $row);
177
            }
178
        }
179
        $this->checkLinks($results, $checkOptions);
180
    }
181
182
    protected function checkLinks(array $links, array $checkOptions)
183
    {
184
        foreach ($this->hookObjectsArr as $key => $hookObj) {
185
            if (!is_array($links[$key]) || (!empty($checkOptions) && !$checkOptions[$key])) {
186
                continue;
187
            }
188
189
            //  Check them
190
            foreach ($links[$key] as $entryKey => $entryValue) {
191
                $table = $entryValue['table'];
192
                $record = [];
193
                $record['headline'] = BackendUtility::getRecordTitle($table, $entryValue['row']);
194
                $record['record_pid'] = $entryValue['row']['pid'];
195
                $record['record_uid'] = $entryValue['uid'];
196
                $record['table_name'] = $table;
197
                $record['link_type'] = $key;
198
                $record['link_title'] = $entryValue['link_title'];
199
                $record['field'] = $entryValue['field'];
200
                $record['last_check'] = time();
201
                $typeField = $GLOBALS['TCA'][$table]['ctrl']['type'] ?? false;
202
                if ($entryValue['row'][$typeField] ?? false) {
203
                    $record['element_type'] = $entryValue['row'][$typeField];
204
                }
205
                $languageField = $GLOBALS['TCA'][$table]['ctrl']['languageField'] ?? false;
206
                if ($languageField && isset($entryValue['row'][$languageField])) {
207
                    $record['language'] = $entryValue['row'][$languageField];
208
                } else {
209
                    $record['language'] = -1;
210
                }
211
                if (!empty($entryValue['pageAndAnchor'] ?? '')) {
212
                    // Page with anchor, e.g. 18#1580
213
                    $url = $entryValue['pageAndAnchor'];
214
                } else {
215
                    $url = $entryValue['substr']['tokenValue'];
216
                }
217
                $record['url'] = $url;
218
                $this->linkCounts[$table]++;
219
                $checkUrl = $hookObj->checkLink($url, $entryValue, $this);
220
221
                // Broken link found
222
                if (!$checkUrl) {
223
                    $this->brokenLinkRepository->addBrokenLink($record, false, $hookObj->getErrorParams());
224
                    $this->brokenLinkCounts[$table]++;
225
                } elseif (GeneralUtility::_GP('showalllinks')) {
226
                    $this->brokenLinkRepository->addBrokenLink($record, true);
227
                    $this->brokenLinkCounts[$table]++;
228
                }
229
            }
230
        }
231
    }
232
233
    /**
234
     * Recheck for broken links for one field in table for record.
235
     *
236
     * @param array $checkOptions
237
     * @param string $recordUid uid of record to check
238
     * @param string $table
239
     * @param string $field
240
     * @param int $timestamp - only recheck if timestamp changed
241
     * @param bool $considerHidden
242
     */
243
    public function recheckLinks(
244
        array $checkOptions,
245
        string $recordUid,
246
        string $table,
247
        string $field,
248
        int $timestamp,
249
        bool $considerHidden = true
250
    ): void {
251
        // If table is not configured, assume the extension is not installed
252
        // and therefore no need to check it
253
        if (!is_array($GLOBALS['TCA'][$table])) {
254
            return;
255
        }
256
257
        // get all links for $record / $table / $field combination
258
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
259
            ->getQueryBuilderForTable($table);
260
        if ($considerHidden) {
261
            $queryBuilder->getRestrictions()->removeByType(HiddenRestriction::class);
262
        }
263
264
        $row = $queryBuilder->select('uid', 'pid', $GLOBALS['TCA'][$table]['ctrl']['label'], $field, 'tstamp')
265
            ->from($table)
266
            ->where(
267
                $queryBuilder->expr()->eq(
268
                    'uid',
269
                    $queryBuilder->createNamedParameter($recordUid, Connection::PARAM_INT)
270
                )
271
            )
272
            ->execute()
273
            ->fetch();
274
275
        if (!$row) {
276
            // missing record: remove existing links
277
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
0 ignored issues
show
Bug introduced by
$recordUid of type string is incompatible with the type integer expected by parameter $recordUid of TYPO3\CMS\Linkvalidator\...eBrokenLinksForRecord(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

277
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, /** @scrutinizer ignore-type */ $recordUid);
Loading history...
278
            return;
279
        }
280
        if (($row['tstamp'] ?? 0) && $timestamp && ((int)($row['tstamp']) < $timestamp)) {
281
            // timestamp has not changed: no need to recheck
282
            return;
283
        }
284
        $resultsLinks = [];
285
        $this->analyzeRecord($resultsLinks, $table, [$field], $row);
286
        if ($resultsLinks) {
287
            // remove existing broken links from table
288
            $this->brokenLinkRepository->removeBrokenLinksForRecord($table, $recordUid);
289
            // find all broken links for list of links
290
            $this->checkLinks($resultsLinks, $checkOptions);
291
        }
292
    }
293
294
    /**
295
     * Find all supported broken links for a specific record
296
     *
297
     * @param array $results Array of broken links
298
     * @param string $table Table name of the record
299
     * @param array $fields Array of fields to analyze
300
     * @param array $record Record to analyze
301
     */
302
    public function analyzeRecord(array &$results, $table, array $fields, array $record)
303
    {
304
        $event = new BeforeRecordIsAnalyzedEvent($table, $record, $fields, $this, $results);
305
        $this->eventDispatcher->dispatch($event);
306
        $results = $event->getResults();
307
        $record = $event->getRecord();
308
309
        // Put together content of all relevant fields
310
        $htmlParser = GeneralUtility::makeInstance(HtmlParser::class);
311
        $idRecord = $record['uid'];
312
        // Get all references
313
        foreach ($fields as $field) {
314
            $conf = $GLOBALS['TCA'][$table]['columns'][$field]['config'];
315
            $valueField = $record[$field];
316
317
            // Check if a TCA configured field has soft references defined (see TYPO3 Core API document)
318
            if (!$conf['softref'] || (string)$valueField === '') {
319
                continue;
320
            }
321
322
            // Explode the list of soft references/parameters
323
            $softRefs = BackendUtility::explodeSoftRefParserList($conf['softref']);
324
            if ($softRefs === false) {
325
                continue;
326
            }
327
328
            // Traverse soft references
329
            foreach ($softRefs as $spKey => $spParams) {
330
                /** @var \TYPO3\CMS\Core\Database\SoftReferenceIndex $softRefObj */
331
                $softRefObj = BackendUtility::softRefParserObj($spKey);
332
333
                // If there is an object returned...
334
                if (!is_object($softRefObj)) {
335
                    continue;
336
                }
337
                $softRefParams = $spParams;
338
                if (!is_array($softRefParams)) {
339
                    // set subst such that findRef will return substitutes for urls, emails etc
340
                    $softRefParams = ['subst' => true];
341
                }
342
343
                // Do processing
344
                $resultArray = $softRefObj->findRef($table, $field, $idRecord, $valueField, $spKey, $softRefParams);
345
                if (!is_array($resultArray) || !isset($resultArray['elements']) || !is_array($resultArray['elements'])) {
346
                    continue;
347
                }
348
349
                if ($spKey === 'typolink_tag') {
350
                    $this->analyzeTypoLinks($resultArray, $results, $htmlParser, $record, $field, $table);
351
                } else {
352
                    $this->analyzeLinks($resultArray, $results, $record, $field, $table);
353
                }
354
            }
355
        }
356
    }
357
358
    /**
359
     * Find all supported broken links for a specific link list
360
     *
361
     * @param array $resultArray findRef parsed records
362
     * @param array $results Array of broken links
363
     * @param array $record UID of the current record
364
     * @param string $field The current field
365
     * @param string $table The current table
366
     */
367
    protected function analyzeLinks(array $resultArray, array &$results, array $record, $field, $table)
368
    {
369
        foreach ($resultArray['elements'] as $element) {
370
            $r = $element['subst'];
371
            $type = '';
372
            $idRecord = $record['uid'];
373
            if (empty($r)) {
374
                continue;
375
            }
376
377
            /** @var AbstractLinktype $hookObj */
378
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
379
                $type = $hookObj->fetchType($r, $type, $keyArr);
380
                // Store the type that was found
381
                // This prevents overriding by internal validator
382
                if (!empty($type)) {
383
                    $r['type'] = $type;
384
                }
385
            }
386
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['substr'] = $r;
387
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['row'] = $record;
388
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['table'] = $table;
389
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['field'] = $field;
390
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $r['tokenID']]['uid'] = $idRecord;
391
        }
392
    }
393
394
    /**
395
     * Find all supported broken links for a specific typoLink
396
     *
397
     * @param array $resultArray findRef parsed records
398
     * @param array $results Array of broken links
399
     * @param HtmlParser $htmlParser Instance of html parser
400
     * @param array $record The current record
401
     * @param string $field The current field
402
     * @param string $table The current table
403
     */
404
    protected function analyzeTypoLinks(array $resultArray, array &$results, $htmlParser, array $record, $field, $table)
405
    {
406
        $currentR = [];
407
        $linkTags = $htmlParser->splitIntoBlock('a,link', $resultArray['content']);
408
        $idRecord = $record['uid'];
409
        $type = '';
410
        $title = '';
411
        $countLinkTags = count($linkTags);
412
        for ($i = 1; $i < $countLinkTags; $i += 2) {
413
            $referencedRecordType = '';
414
            foreach ($resultArray['elements'] as $element) {
415
                $type = '';
416
                $r = $element['subst'];
417
                if (empty($r['tokenID']) || substr_count($linkTags[$i], $r['tokenID']) === 0) {
418
                    continue;
419
                }
420
421
                // Type of referenced record
422
                if (strpos($r['recordRef'], 'pages') !== false) {
423
                    $currentR = $r;
424
                    // Contains number of the page
425
                    $referencedRecordType = $r['tokenValue'];
426
                    $wasPage = true;
427
                } elseif (strpos($r['recordRef'], 'tt_content') !== false && (isset($wasPage) && $wasPage === true)) {
428
                    $referencedRecordType = $referencedRecordType . '#c' . $r['tokenValue'];
429
                    $wasPage = false;
430
                } else {
431
                    $currentR = $r;
432
                }
433
                $title = strip_tags($linkTags[$i]);
434
            }
435
            /** @var AbstractLinktype $hookObj */
436
            foreach ($this->hookObjectsArr as $keyArr => $hookObj) {
437
                $type = $hookObj->fetchType($currentR, $type, $keyArr);
438
                // Store the type that was found
439
                // This prevents overriding by internal validator
440
                if (!empty($type)) {
441
                    $currentR['type'] = $type;
442
                }
443
            }
444
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['substr'] = $currentR;
445
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['row'] = $record;
446
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['table'] = $table;
447
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['field'] = $field;
448
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['uid'] = $idRecord;
449
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['link_title'] = $title;
450
            $results[$type][$table . ':' . $field . ':' . $idRecord . ':' . $currentR['tokenID']]['pageAndAnchor'] = $referencedRecordType;
451
        }
452
    }
453
454
    /**
455
     * Fill a marker array with the number of links found in a list of pages
456
     *
457
     * @return array array with the number of links found
458
     */
459
    public function getLinkCounts()
460
    {
461
        return $this->brokenLinkRepository->getNumberOfBrokenLinksForRecordsOnPages($this->pids, $this->searchFields);
462
    }
463
464
    /**
465
     * @return LanguageService
466
     */
467
    protected function getLanguageService()
468
    {
469
        return $GLOBALS['LANG'];
470
    }
471
}
472