Passed
Branch master (7baf30)
by Dispositif
02:38
created

RobotNoIndexValidator::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 0

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 0
dl 0
loc 6
rs 10
c 1
b 0
f 0
cc 1
nc 1
nop 3
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\ExternLink\Validators;
11
12
use App\Domain\ValidatorInterface;
13
use Psr\Log\LoggerInterface;
14
use Psr\Log\NullLogger;
15
16
/**
17
 * Detect if robots noindex.
18
 * https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag?hl=fr
19
 */
20
class RobotNoIndexValidator implements ValidatorInterface
21
{
22
    public $noindexWhitelist = ['test.com']; // move to config
23
24
    public function __construct(
25
        private readonly array           $pageData,
26
        private readonly string          $url,
27
        private readonly LoggerInterface $log = new NullLogger()
28
    )
29
    {
30
    }
31
32
    // "NOINDEX" => true
33
    public function validate(): bool
34
    {
35
        $robots = $this->pageData['meta']['robots'] ?? null;
36
        if (
37
            !empty($robots)
38
            && (
39
                stripos((string)$robots, 'noindex') !== false
40
                || stripos((string)$robots, 'none') !== false
41
            )
42
        ) {
43
            $this->log->notice('robots NOINDEX : ' . $this->url);
44
45
            if (empty($this->pageData['meta']['prettyDomainName'])) {
46
                $this->log->warning('No prettyDomainName for ' . $this->url);
47
48
                return true;
49
            }
50
51
            return !$this->isNoIndexDomainWhitelisted($this->pageData['meta']['prettyDomainName']);
52
        }
53
54
        return false;
55
    }
56
57
    protected function isNoIndexDomainWhitelisted(?string $prettyDomain): bool
58
    {
59
        if (in_array($prettyDomain ?? '', $this->noindexWhitelist)) {
60
            $this->log->notice('ROBOT_NOINDEX_WHITELIST ' . $prettyDomain);
61
62
            return true;
63
        }
64
65
        return false;
66
    }
67
}