Passed
Push — master ( 2f16d6...f55b85 )
by Dispositif
02:32
created

RobotsRulesTrait::isRobotNoIndex()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 16
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 9
c 1
b 0
f 0
nc 2
nop 2
dl 0
loc 16
rs 9.9666
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\ExternLink;
11
12
trait RobotsRulesTrait
13
{
14
    public $noindexWhitelist = ['legifrance.gouv.fr'];
15
16
    /**
17
     * Detect if robots noindex
18
     * https://developers.google.com/search/docs/crawling-indexing/robots-meta-tag?hl=fr
19
     */
20
    protected function isRobotNoIndex(array $pageData, string $url): bool
21
    {
22
        $robots = $pageData['meta']['robots'] ?? null;
23
        if (
24
            !empty($robots)
25
            && (
26
                stripos($robots, 'noindex') !== false
27
                || stripos($robots, 'none') !== false
28
            )
29
        ) {
30
            $this->log->notice('robots NOINDEX : ' . $url);
31
32
            return !$this->isNoIndexDomainWhitelisted($pageData['meta']['prettyDomainName']);
33
        }
34
35
        return false;
36
    }
37
38
    protected function isNoIndexDomainWhitelisted(?string $prettyDomain): bool
39
    {
40
        if (in_array($prettyDomain ?? '', $this->noindexWhitelist)) {
41
            $this->log->notice('ROBOT_NOINDEX_WHITELIST ' . $prettyDomain);
42
43
            return true;
44
        }
45
46
        return false;
47
    }
48
}