Completed
Branch master (1f9106)
by Nils
02:44
created

RobotsDisallowAllRule   A

Complexity

Total Complexity 5

Size/Duplication

Total Lines 26
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 3

Importance

Changes 6
Bugs 2 Features 2
Metric Value
wmc 5
c 6
b 2
f 2
lcom 0
cbo 3
dl 0
loc 26
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
B validate() 0 23 5
1
<?php
2
3
namespace whm\Smoke\Rules\Seo;
4
5
use whm\Smoke\Http\Response;
6
use whm\Smoke\Rules\Rule;
7
use whm\Smoke\Rules\ValidationFailedException;
8
9
/**
10
 * This rule checks if robots.txt has no entry "Disallow:/".
11
 */
12
class RobotsDisallowAllRule implements Rule
13
{
14
    public function validate(Response $response)
15
    {
16
        $url = $response->getUri()->getScheme() . '://' . $response->getUri()->getHost();
17
18
        if (substr_count($url, '/') === 2) {
19
            $filename = $robotsUrl = $url . '/robots.txt';
20
        } elseif (substr_count($url, '/') === 3) {
21
            $filename = $robotsUrl = $url . 'robots.txt';
22
        } else {
23
            return;
24
        }
25
26
        $headers = @get_headers($filename);
27
28
        if (strpos($headers[0], '200') !== false) {
29
            $content = file_get_contents($filename);
30
            $normalizedContent = str_replace(' ', '', $content);
31
32
            if (strpos($normalizedContent, 'Disallow:/' . PHP_EOL) !== false) {
33
                throw new ValidationFailedException('The robots.txt contains disallow all (Disallow: /)');
34
            }
35
        }
36
    }
37
}
38