RobotsDisallowAllRule::validate()   B
last analyzed

Complexity

Conditions 7
Paths 10

Size

Total Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 30
rs 8.5066
c 0
b 0
f 0
cc 7
nc 10
nop 1
1
<?php
2
3
namespace whm\Smoke\Rules\Seo;
4
5
use phm\HttpWebdriverClient\Http\Response\UriAwareResponse;
6
use Psr\Http\Message\ResponseInterface;
7
use whm\Smoke\Http\ClientAware;
8
use whm\Smoke\Http\Response;
9
use whm\Smoke\Rules\Rule;
10
use whm\Smoke\Rules\ValidationFailedException;
11
12
/**
13
 * This rule checks if robots.txt has no entry "Disallow:/".
14
 */
15
class RobotsDisallowAllRule implements Rule
16
{
17
    public function init()
18
    {
19
20
    }
21
22
    /**
23
     * @param ResponseInterface $response
24
     * @throws ValidationFailedException
25
     */
26
    public function validate(ResponseInterface $response)
27
    {
28
        if ($response instanceof UriAwareResponse) {
29
            $url = $response->getUri()->getScheme() . '://' . $response->getUri()->getHost();
30
31
            if (substr_count($url, '/') === 2) {
32
                $filename = $robotsUrl = $url . '/robots.txt';
33
            } elseif (substr_count($url, '/') === 3) {
34
                $filename = $robotsUrl = $url . 'robots.txt';
35
            } else {
36
                return;
37
            }
38
39
            try {
40
                $content = @file_get_contents($filename);
41
            } catch (\Exception $e) {
42
                return;
43
            }
44
45
            $normalizedContent = $this->normalizeContent($content);
46
47
            if (strpos($normalizedContent, 'user-agent:* disallow:/' . PHP_EOL) !== false) {
48
                throw new ValidationFailedException('The robots.txt contains disallow all (Disallow: /)');
49
            }
50
51
            if (strpos($normalizedContent, 'user-agent:* disallow:/') === strlen($normalizedContent) - 23) {
52
                throw new ValidationFailedException('The robots.txt contains disallow all (Disallow: /)');
53
            }
54
        }
55
    }
56
57
    private function normalizeContent($content)
58
    {
59
        $normalizedContent = strtolower($content);
60
        $normalizedContent = str_replace(' ', '', $normalizedContent);
61
62
        $normalizedContent = trim(preg_replace('/\s+/', ' ', $normalizedContent));
63
64
        return $normalizedContent;
65
    }
66
}
67