1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Eclipxe\XmlSchemaValidator; |
6
|
|
|
|
7
|
|
|
use DOMAttr; |
8
|
|
|
use DOMDocument; |
9
|
|
|
use DOMXPath; |
10
|
|
|
use Eclipxe\XmlSchemaValidator\Exceptions\SchemaLocationPartsNotEvenException; |
11
|
|
|
use Eclipxe\XmlSchemaValidator\Exceptions\ValidationFailException; |
12
|
|
|
use Eclipxe\XmlSchemaValidator\Exceptions\XmlContentIsEmptyException; |
13
|
|
|
use Eclipxe\XmlSchemaValidator\Exceptions\XmlContentIsInvalidException; |
14
|
|
|
use Eclipxe\XmlSchemaValidator\Exceptions\XmlSchemaValidatorException; |
15
|
|
|
use Eclipxe\XmlSchemaValidator\Internal\LibXmlException; |
16
|
|
|
|
17
|
|
|
/** |
18
|
|
|
* This class is an XML schema validator |
19
|
|
|
* It is needed because some XML can contain more than one external schema and DOM library fails to load it. |
20
|
|
|
*/ |
21
|
|
|
class SchemaValidator |
22
|
|
|
{ |
23
|
|
|
/** @var DOMDocument */ |
24
|
|
|
private $document; |
25
|
|
|
|
26
|
|
|
/** @var string */ |
27
|
|
|
private $lastError = ''; |
28
|
|
|
|
29
|
|
|
/** |
30
|
|
|
* SchemaValidator constructor. |
31
|
|
|
* |
32
|
|
|
* @param DOMDocument $document |
33
|
|
|
*/ |
34
|
17 |
|
public function __construct(DOMDocument $document) |
35
|
|
|
{ |
36
|
17 |
|
$this->document = $document; |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Create a SchemaValidator instance based on an XML string |
41
|
|
|
* |
42
|
|
|
* @param string $contents |
43
|
|
|
* @return self |
44
|
|
|
* @throws XmlContentIsEmptyException when the xml contents is an empty string |
45
|
|
|
* @throws XmlContentIsInvalidException when the xml contents cannot be loaded |
46
|
|
|
*/ |
47
|
18 |
|
public static function createFromString(string $contents): self |
48
|
|
|
{ |
49
|
|
|
// do not allow empty string |
50
|
18 |
|
if ('' === $contents) { |
51
|
1 |
|
throw XmlContentIsEmptyException::create(); |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
// create and load contents throwing specific exception |
55
|
|
|
try { |
56
|
|
|
/** @var DOMDocument $document */ |
57
|
17 |
|
$document = LibXmlException::useInternalErrors( |
58
|
17 |
|
function () use ($contents): DOMDocument { |
59
|
17 |
|
$document = new DOMDocument(); |
60
|
17 |
|
$document->loadXML($contents); |
61
|
17 |
|
return $document; |
62
|
17 |
|
} |
63
|
17 |
|
); |
64
|
1 |
|
} catch (LibXmlException $exception) { |
65
|
1 |
|
throw XmlContentIsInvalidException::create($exception); |
66
|
|
|
} |
67
|
|
|
|
68
|
16 |
|
return new self($document); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* Validate the content by: |
73
|
|
|
* - Create the Schemas collection from the document |
74
|
|
|
* - Validate using validateWithSchemas |
75
|
|
|
* - Populate the error property |
76
|
|
|
* |
77
|
|
|
* @return bool |
78
|
|
|
* @see validateWithSchemas |
79
|
|
|
*/ |
80
|
9 |
|
public function validate(): bool |
81
|
|
|
{ |
82
|
9 |
|
$this->lastError = ''; |
83
|
|
|
try { |
84
|
|
|
// create the schemas collection |
85
|
9 |
|
$schemas = $this->buildSchemas(); |
86
|
|
|
// validate the document using the schema collection |
87
|
8 |
|
$this->validateWithSchemas($schemas); |
88
|
4 |
|
} catch (XmlSchemaValidatorException $ex) { |
89
|
4 |
|
$this->lastError = $ex->getMessage(); |
90
|
4 |
|
return false; |
91
|
|
|
} |
92
|
5 |
|
return true; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
/** |
96
|
|
|
* Retrieve the last error message captured on the last validate operation |
97
|
|
|
* |
98
|
|
|
* @return string |
99
|
|
|
*/ |
100
|
8 |
|
public function getLastError(): string |
101
|
|
|
{ |
102
|
8 |
|
return $this->lastError; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* Validate against a list of schemas (if any) |
107
|
|
|
* |
108
|
|
|
* @param Schemas $schemas |
109
|
|
|
* @return void |
110
|
|
|
* |
111
|
|
|
* @throws ValidationFailException when schema validation fails |
112
|
|
|
*/ |
113
|
11 |
|
public function validateWithSchemas(Schemas $schemas): void |
114
|
|
|
{ |
115
|
|
|
// early exit, do not validate if schemas collection is empty |
116
|
11 |
|
if (0 === $schemas->count()) { |
117
|
2 |
|
return; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
// build the unique importing schema |
121
|
9 |
|
$xsd = $schemas->getImporterXsd(); |
122
|
|
|
|
123
|
|
|
// validate and trap LibXmlException |
124
|
|
|
try { |
125
|
9 |
|
LibXmlException::useInternalErrors(function () use ($xsd): void { |
126
|
9 |
|
$this->document->schemaValidateSource($xsd); |
127
|
9 |
|
}); |
128
|
4 |
|
} catch (LibXmlException $exception) { |
129
|
4 |
|
throw ValidationFailException::create($exception); |
130
|
|
|
} |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
/** |
134
|
|
|
* Retrieve a list of namespaces based on the schemaLocation attributes |
135
|
|
|
* |
136
|
|
|
* @return Schemas |
137
|
|
|
* @throws SchemaLocationPartsNotEvenException when the schemaLocation attribute does not have even parts |
138
|
|
|
*/ |
139
|
11 |
|
public function buildSchemas(): Schemas |
140
|
|
|
{ |
141
|
11 |
|
$schemas = new Schemas(); |
142
|
11 |
|
$xpath = new DOMXPath($this->document); |
143
|
|
|
|
144
|
|
|
// get the http://www.w3.org/2001/XMLSchema-instance namespace (it could not be 'xsi') |
145
|
11 |
|
$xsi = strval($this->document->lookupPrefix('http://www.w3.org/2001/XMLSchema-instance')); |
146
|
11 |
|
if ('' === $xsi) { // the namespace is not registered, no need to continue |
147
|
2 |
|
return $schemas; |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
// get all the xsi:schemaLocation attributes in the document |
151
|
|
|
/** @var iterable<DOMAttr> $schemasList */ |
152
|
9 |
|
$schemasList = $xpath->query("//@$xsi:schemaLocation"); |
153
|
|
|
|
154
|
|
|
// process every schemaLocation and import them into schemas |
155
|
9 |
|
foreach ($schemasList as $schemaAttribute) { |
156
|
8 |
|
$schemaValue = $schemaAttribute->nodeValue; |
157
|
8 |
|
if (null !== $schemaValue) { |
158
|
8 |
|
$schemas->import($this->buildSchemasFromSchemaLocationValue($schemaValue)); |
159
|
|
|
} |
160
|
|
|
} |
161
|
|
|
|
162
|
8 |
|
return $schemas; |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
/** |
166
|
|
|
* Create a schemas collection from the content of a schema location |
167
|
|
|
* |
168
|
|
|
* @param string $content |
169
|
|
|
* @return Schemas |
170
|
|
|
* @throws SchemaLocationPartsNotEvenException when the schemaLocation attribute does not have even parts |
171
|
|
|
*/ |
172
|
10 |
|
public function buildSchemasFromSchemaLocationValue(string $content): Schemas |
173
|
|
|
{ |
174
|
|
|
// get parts without inner spaces |
175
|
10 |
|
$parts = array_values(array_filter(preg_split('/\s+/', $content) ?: [])); |
176
|
10 |
|
$partsCount = count($parts); |
177
|
|
|
|
178
|
|
|
// check that the list count is an even number |
179
|
10 |
|
if (0 !== $partsCount % 2) { |
180
|
1 |
|
throw SchemaLocationPartsNotEvenException::create($parts); |
181
|
|
|
} |
182
|
|
|
|
183
|
|
|
// insert the uris pairs into the schemas |
184
|
9 |
|
$schemas = new Schemas(); |
185
|
9 |
|
for ($k = 0; $k < $partsCount; $k = $k + 2) { |
186
|
9 |
|
$schemas->create($parts[$k], $parts[$k + 1]); |
187
|
|
|
} |
188
|
9 |
|
return $schemas; |
189
|
|
|
} |
190
|
|
|
} |
191
|
|
|
|