|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace whm\Smoke\Rules\Xml\Sitemap; |
|
4
|
|
|
|
|
5
|
|
|
use whm\Smoke\Http\Response; |
|
6
|
|
|
use whm\Smoke\Rules\Rule; |
|
7
|
|
|
use whm\Smoke\Rules\ValidationFailedException; |
|
8
|
|
|
|
|
9
|
|
|
/** |
|
10
|
|
|
* This rule checks if a sitemap.xml file is valid. |
|
11
|
|
|
*/ |
|
12
|
|
|
class ValidRule implements Rule |
|
13
|
|
|
{ |
|
14
|
|
|
const SCHEMA = 'sitemap0_9.xsd'; |
|
15
|
|
|
|
|
16
|
|
|
private function getSchema() |
|
17
|
|
|
{ |
|
18
|
|
|
return __DIR__ . '/' . self::SCHEMA; |
|
19
|
|
|
} |
|
20
|
|
|
|
|
21
|
|
|
private function validateBody ($body) { |
|
22
|
|
|
libxml_clear_errors(); |
|
23
|
|
|
$dom = new \DOMDocument(); |
|
24
|
|
|
@$dom->loadXML($body); |
|
|
|
|
|
|
25
|
|
|
$lastError = libxml_get_last_error(); |
|
26
|
|
|
if ($lastError) { |
|
27
|
|
|
throw new ValidationFailedException( |
|
28
|
|
|
'The given sitemap file is not well formed (last error: ' . str_replace("\n", '', $lastError->message) . ').'); |
|
29
|
|
|
} |
|
30
|
|
|
$valid = @$dom->schemaValidate($this->getSchema()); |
|
31
|
|
|
if (!$valid) { |
|
32
|
|
|
$lastError = libxml_get_last_error(); |
|
33
|
|
|
throw new ValidationFailedException( |
|
34
|
|
|
'The given sitemap file did not validate vs. sitemap.xsd (last error: ' . str_replace("\n", '', $lastError->message) . ').'); |
|
35
|
|
|
} |
|
36
|
|
|
} |
|
37
|
|
|
|
|
38
|
|
|
/** |
|
39
|
|
|
* @param string |
|
40
|
|
|
* @return array |
|
41
|
|
|
*/ |
|
42
|
|
|
private function getLocations($body) { |
|
43
|
|
|
$locations = array(); |
|
44
|
|
|
$xml = simplexml_load_string($body); |
|
45
|
|
|
$json = json_encode($xml); |
|
46
|
|
|
$xmlValues = json_decode($json, TRUE); |
|
47
|
|
|
|
|
48
|
|
|
if (isset($xmlValues['sitemap']['loc'])) { |
|
49
|
|
|
$locations[] = $xmlValues['sitemap']['loc']; |
|
50
|
|
|
} |
|
51
|
|
|
else { |
|
52
|
|
|
foreach ($xmlValues['sitemap'] AS $sitemap) { |
|
53
|
|
|
$locations[] = $sitemap['loc']; |
|
54
|
|
|
} |
|
55
|
|
|
} |
|
56
|
|
|
return $locations; |
|
57
|
|
|
} |
|
58
|
|
|
|
|
59
|
|
|
public function validate(Response $response) |
|
60
|
|
|
{ |
|
61
|
|
|
if (strtolower($response->getContentType()) !== 'text/xml') { |
|
62
|
|
|
return; |
|
63
|
|
|
} |
|
64
|
|
|
$body = $response->getBody(); |
|
65
|
|
|
|
|
66
|
|
|
// sitemapindex or urlset |
|
67
|
|
|
if (preg_match('/<sitemapindex/', $body)) { |
|
68
|
|
|
$allSingleSitemapsUrls = $this->getLocations($body); |
|
69
|
|
|
foreach ($allSingleSitemapsUrls AS $sitemapUrl) { |
|
70
|
|
|
$singleSitemapXml = file_get_contents($sitemapUrl); |
|
71
|
|
|
$this->validateBody($singleSitemapXml); |
|
72
|
|
|
} |
|
73
|
|
|
} |
|
74
|
|
|
elseif (preg_match('/<urlset/', $body)) { |
|
75
|
|
|
$this->validateBody($body); |
|
76
|
|
|
} |
|
77
|
|
|
} |
|
78
|
|
|
} |
|
79
|
|
|
|
If you suppress an error, we recommend checking for the error condition explicitly: