1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace whm\Smoke\Rules\Seo; |
4
|
|
|
|
5
|
|
|
use Ivory\HttpAdapter\HttpAdapterInterface; |
6
|
|
|
use whm\Smoke\Http\ClientAware; |
7
|
|
|
use whm\Smoke\Http\Response; |
8
|
|
|
use whm\Smoke\Rules\Rule; |
9
|
|
|
use whm\Smoke\Rules\ValidationFailedException; |
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* This rule checks if robots.txt has no entry "Disallow:/". |
13
|
|
|
*/ |
14
|
|
|
class RobotsDisallowAllRule implements Rule, ClientAware |
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* @var HttpAdapterInterface |
18
|
|
|
*/ |
19
|
|
|
private $client; |
20
|
|
|
|
21
|
|
|
public function validate(Response $response) |
22
|
|
|
{ |
23
|
|
|
$url = $response->getUri()->getScheme() . '://' . $response->getUri()->getHost(); |
24
|
|
|
|
25
|
|
|
if (substr_count($url, '/') === 2) { |
26
|
|
|
$filename = $robotsUrl = $url . '/robots.txt'; |
27
|
|
|
} elseif (substr_count($url, '/') === 3) { |
28
|
|
|
$filename = $robotsUrl = $url . 'robots.txt'; |
29
|
|
|
} else { |
30
|
|
|
return; |
31
|
|
|
} |
32
|
|
|
|
33
|
|
|
try { |
34
|
|
|
$response = $this->client->get($filename); |
35
|
|
|
} catch (\Exception $e) { |
36
|
|
|
return; |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
$content = (string) $response->getBody(); |
40
|
|
|
|
41
|
|
|
$normalizedContent = $this->normalizeContent($content); |
42
|
|
|
|
43
|
|
|
if (strpos($normalizedContent, 'user-agent:* disallow:/' . PHP_EOL) !== false) { |
44
|
|
|
throw new ValidationFailedException('The robots.txt contains disallow all (Disallow: /)'); |
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
if (strpos($normalizedContent, 'user-agent:* disallow:/') === strlen($normalizedContent) - 23) { |
48
|
|
|
throw new ValidationFailedException('The robots.txt contains disallow all (Disallow: /)'); |
49
|
|
|
} |
50
|
|
|
} |
51
|
|
|
|
52
|
|
|
private function normalizeContent($content) |
53
|
|
|
{ |
54
|
|
|
$normalizedContent = strtolower($content); |
55
|
|
|
$normalizedContent = str_replace(' ', '', $normalizedContent); |
56
|
|
|
|
57
|
|
|
$normalizedContent = trim(preg_replace('/\s+/', ' ', $normalizedContent)); |
58
|
|
|
|
59
|
|
|
return $normalizedContent; |
60
|
|
|
} |
61
|
|
|
|
62
|
|
|
public function setClient(HttpAdapterInterface $client) |
63
|
|
|
{ |
64
|
|
|
$this->client = $client; |
65
|
|
|
} |
66
|
|
|
} |
67
|
|
|
|