1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace whm\Smoke\Rules\Xml\Sitemap; |
4
|
|
|
|
5
|
|
|
use whm\Smoke\Http\Response; |
6
|
|
|
use whm\Smoke\Rules\Rule; |
7
|
|
|
use whm\Smoke\Rules\ValidationFailedException; |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* This rule checks if a sitemap.xml file is valid. |
11
|
|
|
*/ |
12
|
|
|
class ValidRule implements Rule |
13
|
|
|
{ |
14
|
|
|
const SCHEMA = 'sitemap0_9.xsd'; |
15
|
|
|
|
16
|
|
|
private function getSchema() |
17
|
|
|
{ |
18
|
|
|
return __DIR__ . '/' . self::SCHEMA; |
19
|
|
|
} |
20
|
|
|
|
21
|
|
|
private function validateBody ($body) { |
22
|
|
|
libxml_clear_errors(); |
23
|
|
|
$dom = new \DOMDocument(); |
24
|
|
|
@$dom->loadXML($body); |
|
|
|
|
25
|
|
|
$lastError = libxml_get_last_error(); |
26
|
|
|
if ($lastError) { |
27
|
|
|
throw new ValidationFailedException( |
28
|
|
|
'The given sitemap file is not well formed (last error: ' . str_replace("\n", '', $lastError->message) . ').'); |
29
|
|
|
} |
30
|
|
|
$valid = @$dom->schemaValidate($this->getSchema()); |
31
|
|
|
if (!$valid) { |
32
|
|
|
$lastError = libxml_get_last_error(); |
33
|
|
|
throw new ValidationFailedException( |
34
|
|
|
'The given sitemap file did not validate vs. sitemap.xsd (last error: ' . str_replace("\n", '', $lastError->message) . ').'); |
35
|
|
|
} |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* @param string |
40
|
|
|
* @return array |
41
|
|
|
*/ |
42
|
|
|
private function getLocations($body) { |
43
|
|
|
$locations = array(); |
44
|
|
|
$xml = simplexml_load_string($body); |
45
|
|
|
$json = json_encode($xml); |
46
|
|
|
$xmlValues = json_decode($json, TRUE); |
47
|
|
|
|
48
|
|
|
if (isset($xmlValues['sitemap']['loc'])) { |
49
|
|
|
$locations[] = $xmlValues['sitemap']['loc']; |
50
|
|
|
} |
51
|
|
|
else { |
52
|
|
|
foreach ($xmlValues['sitemap'] AS $sitemap) { |
53
|
|
|
$locations[] = $sitemap['loc']; |
54
|
|
|
} |
55
|
|
|
} |
56
|
|
|
return $locations; |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
public function validate(Response $response) |
60
|
|
|
{ |
61
|
|
|
if (strtolower($response->getContentType()) !== 'text/xml') { |
62
|
|
|
return; |
63
|
|
|
} |
64
|
|
|
$body = $response->getBody(); |
65
|
|
|
|
66
|
|
|
// sitemapindex or urlset |
67
|
|
|
if (preg_match('/<sitemapindex/', $body)) { |
68
|
|
|
$allSingleSitemapsUrls = $this->getLocations($body); |
69
|
|
|
foreach ($allSingleSitemapsUrls AS $sitemapUrl) { |
70
|
|
|
$singleSitemapXml = file_get_contents($sitemapUrl); |
71
|
|
|
$this->validateBody($singleSitemapXml); |
72
|
|
|
} |
73
|
|
|
} |
74
|
|
|
elseif (preg_match('/<urlset/', $body)) { |
75
|
|
|
$this->validateBody($body); |
76
|
|
|
} |
77
|
|
|
} |
78
|
|
|
} |
79
|
|
|
|
If you suppress an error, we recommend checking for the error condition explicitly: