1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace whm\Smoke\Rules\Xml\Sitemap; |
4
|
|
|
|
5
|
|
|
use whm\Smoke\Http\Response; |
6
|
|
|
use whm\Smoke\Rules\StandardRule; |
7
|
|
|
use whm\Smoke\Rules\ValidationFailedException; |
8
|
|
|
|
9
|
|
|
/** |
10
|
|
|
* This rule checks if a sitemap.xml file is valid. |
11
|
|
|
*/ |
12
|
|
|
class ValidRule extends StandardRule |
13
|
|
|
{ |
14
|
|
|
const SCHEMA = 'schema.xsd'; |
15
|
|
|
const NON_STRICT_SCHEMA = 'nonStrictSchema.xsd'; |
16
|
|
|
const INDEX = 'siteindex.xsd'; |
17
|
|
|
|
18
|
|
|
private $strictMode; |
19
|
|
|
|
20
|
|
|
protected $contentTypes = array('text/xml', 'application/xml'); |
21
|
|
|
|
22
|
|
|
public function init($strictMode = true) |
23
|
|
|
{ |
24
|
|
|
$this->strictMode = $strictMode; |
25
|
|
|
} |
26
|
|
|
|
27
|
|
|
private function getSchema($isIndex) |
28
|
|
|
{ |
29
|
|
|
if ($isIndex) { |
30
|
|
|
return __DIR__ . '/' . self::INDEX; |
31
|
|
|
} |
32
|
|
|
|
33
|
|
|
if ($this->strictMode) { |
34
|
|
|
return __DIR__ . '/' . self::SCHEMA; |
35
|
|
|
} else { |
36
|
|
|
return __DIR__ . '/' . self::NON_STRICT_SCHEMA; |
37
|
|
|
} |
38
|
|
|
} |
39
|
|
|
|
40
|
|
View Code Duplication |
private function validateBody($body, $filename, $isIndex = TRUE) |
|
|
|
|
41
|
|
|
{ |
42
|
|
|
$dom = new \DOMDocument(); |
43
|
|
|
@$dom->loadXML($body); |
|
|
|
|
44
|
|
|
|
45
|
|
|
$valid = @$dom->schemaValidate($this->getSchema($isIndex)); |
46
|
|
|
if (!$valid) { |
47
|
|
|
$lastError = libxml_get_last_error(); |
48
|
|
|
throw new ValidationFailedException( |
49
|
|
|
'The given sitemap file (' . $filename . ') did not validate against the sitemap schema (last error: ' . str_replace("\n", '', $lastError->message) . ').'); |
50
|
|
|
} |
51
|
|
|
} |
52
|
|
|
|
53
|
|
|
/** |
54
|
|
|
* @param string |
55
|
|
|
* @return array |
56
|
|
|
*/ |
57
|
|
|
private function getLocations($body) |
|
|
|
|
58
|
|
|
{ |
59
|
|
|
$locations = array(); |
60
|
|
|
$xml = simplexml_load_string($body); |
61
|
|
|
$json = json_encode($xml); |
62
|
|
|
$xmlValues = json_decode($json, true); |
63
|
|
|
|
64
|
|
|
if (isset($xmlValues['sitemap']['loc'])) { |
65
|
|
|
$locations[] = $xmlValues['sitemap']['loc']; |
66
|
|
|
} else { |
67
|
|
|
foreach ($xmlValues['sitemap'] as $sitemap) { |
68
|
|
|
$locations[] = $sitemap['loc']; |
69
|
|
|
} |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
return $locations; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
protected function doValidation(Response $response) |
76
|
|
|
{ |
77
|
|
|
$body = $response->getBody(); |
78
|
|
|
|
79
|
|
|
// sitemapindex or urlset |
80
|
|
|
if (preg_match('/<sitemapindex/', $body)) { |
81
|
|
|
|
82
|
|
|
$this->validateBody($body, (string)$response->getUri()); |
83
|
|
|
|
84
|
|
|
} elseif (preg_match('/<urlset/', $body)) { |
85
|
|
|
$this->validateBody($body, (string)$response->getUri(), FALSE); |
86
|
|
|
} |
87
|
|
|
} |
88
|
|
|
} |
89
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.