Completed
Push — master ( acb1e6...d7f53c )
by Dev
08:59 queued 05:50
created

Indexable::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 2
dl 0
loc 4
ccs 3
cts 3
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
use Spatie\Robots\RobotsHeaders;
6
7
class Indexable
8
{
9
    // https://stackoverflow.com/questions/1880148/how-to-get-name-of-the-constant
10
    const INDEXABLE = 0;
11
    const NOT_INDEXABLE_ROBOTS = 1;
12
    const NOT_INDEXABLE_HEADER = 2;
13
    const NOT_INDEXABLE_META = 3;
14
    const NOT_INDEXABLE_CANONICAL = 4;
15
    const NOT_INDEXABLE_4XX = 5;
16
    const NOT_INDEXABLE_5XX = 6;
17
    const NOT_INDEXABLE_NETWORK_ERROR = 7;
18
    const NOT_INDEXABLE_3XX = 8;
19
    const NOT_INDEXABLE_NOT_HTML = 9;
20
21
    /** @var Harvest */
22
    protected $harvest;
23
24
    /** @var string */
25
    protected $isIndexableFor;
26
27 9
    public function __construct(Harvest $harvest, string $isIndexableFor = 'googlebot')
28
    {
29 9
        $this->harvest = $harvest;
30 9
        $this->isIndexableFor = $isIndexableFor;
31 9
    }
32
33 9
    public function robotsTxtAllows()
34
    {
35 9
        $url = $this->harvest->getResponse()->getUrl();
36 9
        $robotsTxt = $this->harvest->getRobotsTxt();
37
38 9
        return '' === $robotsTxt ? true : $robotsTxt->allows($url, $this->isIndexableFor);
39
    }
40
41 9
    public function metaAllows()
42
    {
43 9
        $meta = $this->harvest->getMeta($this->isIndexableFor);
44 9
        $generic = $this->harvest->getMeta('robots');
45
46 9
        return !(false !== stripos($meta, 'noindex') || false !== stripos($generic, 'noindex'));
47
    }
48
49 9
    public function headersAllow()
50
    {
51 9
        $headers = explode(PHP_EOL, $this->harvest->getResponse()->getHeaders(false));
52
53 9
        return RobotsHeaders::create($headers)->mayIndex($this->isIndexableFor);
54
    }
55
56
    /**
57
     * @return int
58
     */
59 9
    public static function isIndexable(Harvest $harvest, string $isIndexableFor = 'googlebot')
60
    {
61 9
        $self = new self($harvest, $isIndexableFor);
62
63
        // robots
64 9
        if (!$self->robotsTxtAllows()) {
65 3
            return self::NOT_INDEXABLE_ROBOTS;
66
        }
67
68 6
        if (!$self->headersAllow()) {
69
            return self::NOT_INDEXABLE_HEADER;
70
        }
71
72 6
        if (!$self->metaAllows()) {
73
            return self::NOT_INDEXABLE_META;
74
        }
75
76
        // canonical
77 6
        if (!$harvest->isCanonicalCorrect()) {
78
            return self::NOT_INDEXABLE_CANONICAL;
79
        }
80
81 6
        $statusCode = $harvest->getResponse()->getStatusCode();
82
        // status 4XX
83 6
        if ($statusCode < 500 && $statusCode > 399) {
84
            return self::NOT_INDEXABLE_5XX;
85
        }
86
87
        // status 5XX
88 6
        if ($statusCode < 600 && $statusCode > 499) {
89
            return self::NOT_INDEXABLE_5XX;
90
        }
91
92
        // status 3XX
93 6
        if ($statusCode < 400 && $statusCode > 299) {
94 3
            return self::NOT_INDEXABLE_3XX;
95
        }
96
97 3
        return self::INDEXABLE;
98
    }
99
}
100