Passed
Push — master ( a8de00...373459 )
by Dev
04:36
created

Indexable::indexable()   B

Complexity

Conditions 11
Paths 8

Size

Total Lines 40
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 11

Importance

Changes 0
Metric Value
eloc 17
c 0
b 0
f 0
dl 0
loc 40
ccs 18
cts 18
cp 1
rs 7.3166
cc 11
nc 8
nop 2
crap 11

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace PiedWeb\UrlHarvester;
4
5
//use Spatie\Robots\RobotsHeaders;
6
7
class Indexable
8
{
9
    // https://stackoverflow.com/questions/1880148/how-to-get-name-of-the-constant
10
    const INDEXABLE = 0;
11
    const NOT_INDEXABLE_ROBOTS = 1;
12
    const NOT_INDEXABLE_HEADER = 2;
13
    const NOT_INDEXABLE_META = 3;
14
    const NOT_INDEXABLE_CANONICAL = 4;
15
    const NOT_INDEXABLE_4XX = 5;
16
    const NOT_INDEXABLE_5XX = 6;
17
    const NOT_INDEXABLE_NETWORK_ERROR = 7;
18
    const NOT_INDEXABLE_3XX = 8;
19
    const NOT_INDEXABLE_NOT_HTML = 9;
20
21
    /** @var Harvest */
22
    protected $harvest;
23
24
    /** @var string */
25
    protected $isIndexableFor;
26
27 9
    public function __construct(Harvest $harvest, string $isIndexableFor = 'googlebot')
28
    {
29 9
        $this->harvest = $harvest;
30 9
        $this->isIndexableFor = $isIndexableFor;
31 9
    }
32
33 9
    public function robotsTxtAllows()
34
    {
35 9
        $url = $this->harvest->getResponse()->getUrl();
36 9
        $robotsTxt = $this->harvest->getRobotsTxt();
37
38 9
        return '' === $robotsTxt ? true : $robotsTxt->allows($url, $this->isIndexableFor);
39
    }
40
41 9
    public function metaAllows()
42
    {
43 9
        $meta = $this->harvest->getMeta($this->isIndexableFor);
44 9
        $generic = $this->harvest->getMeta('robots');
45
46 9
        return !(false !== stripos($meta, 'noindex') || false !== stripos($generic, 'noindex'));
47
    }
48
49 9
    public function headersAllow()
50
    {
51 9
        $headers = explode(PHP_EOL, $this->harvest->getResponse()->getHeaders(false));
52
53 9
        return RobotsHeaders::create($headers)->mayIndex($this->isIndexableFor);
54
    }
55
56 9
    public static function indexable(Harvest $harvest, string $isIndexableFor = 'googlebot'): int
57
    {
58 9
        $self = new self($harvest, $isIndexableFor);
59
60
        // robots
61 9
        if (!$self->robotsTxtAllows()) {
62 3
            return self::NOT_INDEXABLE_ROBOTS;
63
        }
64
65 9
        if (!$self->headersAllow()) {
66 3
            return self::NOT_INDEXABLE_HEADER;
67
        }
68
69 9
        if (!$self->metaAllows()) {
70 3
            return self::NOT_INDEXABLE_META;
71
        }
72
73
        // canonical
74 9
        if (!$harvest->isCanonicalCorrect()) {
75 3
            return self::NOT_INDEXABLE_CANONICAL;
76
        }
77
78 9
        $statusCode = $harvest->getResponse()->getStatusCode();
79
80
        // status 4XX
81 9
        if ($statusCode < 500 && $statusCode > 399) {
82 3
            return self::NOT_INDEXABLE_4XX;
83
        }
84
85
        // status 5XX
86 9
        if ($statusCode < 600 && $statusCode > 499) {
87 3
            return self::NOT_INDEXABLE_5XX;
88
        }
89
90
        // status 3XX
91 9
        if ($statusCode < 400 && $statusCode > 299) {
92 6
            return self::NOT_INDEXABLE_3XX;
93
        }
94
95 6
        return self::INDEXABLE;
96
    }
97
}
98