Passed
Push — master ( 92084b...91cbe9 )
by Dev
13:57 queued 12:21
created

Indexable   A

Complexity

Total Complexity 17

Size/Duplication

Total Lines 89
Duplicated Lines 0 %

Test Coverage

Coverage 84.85%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 17
eloc 40
c 1
b 0
f 0
dl 0
loc 89
ccs 28
cts 33
cp 0.8485
rs 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A metaAllows() 0 6 2
A __construct() 0 4 1
A headersAllow() 0 5 1
A robotsTxtAllows() 0 6 2
B isIndexable() 0 40 11
1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
use Spatie\Robots\RobotsHeaders;
6
7
class Indexable
8
{
9
    // https://stackoverflow.com/questions/1880148/how-to-get-name-of-the-constant
10
    const INDEXABLE = 0;
11
    const NOT_INDEXABLE_ROBOTS = 1;
12
    const NOT_INDEXABLE_HEADER = 2;
13
    const NOT_INDEXABLE_META = 3;
14
    const NOT_INDEXABLE_CANONICAL = 4;
15
    const NOT_INDEXABLE_4XX = 5;
16
    const NOT_INDEXABLE_5XX = 6;
17
    const NOT_INDEXABLE_NETWORK_ERROR = 7;
18
    const NOT_INDEXABLE_3XX = 8;
19
    const NOT_INDEXABLE_NOT_HTML = 9;
20
21
    /** @var Harvest */
22
    protected $harvest;
23
24
    /** @var string */
25
    protected $isIndexableFor;
26
27 9
    public function __construct(Harvest $harvest, string $isIndexableFor = 'googlebot')
28
    {
29 9
        $this->harvest = $harvest;
30 9
        $this->isIndexableFor = $isIndexableFor;
31 9
    }
32
33 9
    public function robotsTxtAllows()
34
    {
35 9
        $url = $this->harvest->getResponse()->getUrl();
36 9
        $robotsTxt = $this->harvest->getRobotsTxt();
37
38 9
        return '' === $robotsTxt ? true : $robotsTxt->allows($url, $this->isIndexableFor);
39
    }
40
41 9
    public function metaAllows()
42
    {
43 9
        $meta = $this->harvest->getMeta($this->isIndexableFor);
44 9
        $generic = $this->harvest->getMeta('robots');
45
46 9
        return !(false !== stripos($meta, 'noindex') || false !== stripos($generic, 'noindex'));
47
    }
48
49 9
    public function headersAllow()
50
    {
51 9
        $headers = explode(PHP_EOL, $this->harvest->getResponse()->getHeaders(false));
52
53 9
        return RobotsHeaders::create($headers)->mayIndex($this->isIndexableFor);
54
    }
55
56 9
    public static function isIndexable(Harvest $harvest, string $isIndexableFor = 'googlebot'): int
57
    {
58 9
        $self = new self($harvest, $isIndexableFor);
59
60
        // robots
61 9
        if (!$self->robotsTxtAllows()) {
62 3
            return self::NOT_INDEXABLE_ROBOTS;
63
        }
64
65 6
        if (!$self->headersAllow()) {
66
            return self::NOT_INDEXABLE_HEADER;
67
        }
68
69 6
        if (!$self->metaAllows()) {
70
            return self::NOT_INDEXABLE_META;
71
        }
72
73
        // canonical
74 6
        if (!$harvest->isCanonicalCorrect()) {
75
            return self::NOT_INDEXABLE_CANONICAL;
76
        }
77
78 6
        $statusCode = $harvest->getResponse()->getStatusCode();
79
80
        // status 4XX
81 6
        if ($statusCode < 500 && $statusCode > 399) {
82
            return self::NOT_INDEXABLE_5XX;
83
        }
84
85
        // status 5XX
86 6
        if ($statusCode < 600 && $statusCode > 499) {
87
            return self::NOT_INDEXABLE_5XX;
88
        }
89
90
        // status 3XX
91 6
        if ($statusCode < 400 && $statusCode > 299) {
92 3
            return self::NOT_INDEXABLE_3XX;
93
        }
94
95 3
        return self::INDEXABLE;
96
    }
97
}
98