This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
1 | <?php |
||
2 | |||
3 | namespace CfdiUtils\Cleaner; |
||
4 | |||
5 | use CfdiUtils\Cfdi; |
||
6 | use CfdiUtils\Cleaner\BeforeLoad\BeforeLoadCleanerInterface; |
||
7 | use CfdiUtils\Cleaner\Cleaners\SchemaLocationsXsdUrlsFixer; |
||
8 | use CfdiUtils\Utils\SchemaLocations; |
||
9 | use CfdiUtils\Utils\Xml; |
||
10 | use DOMAttr; |
||
11 | use DOMDocument; |
||
12 | use DOMNode; |
||
13 | use DOMNodeList; |
||
14 | use DOMXPath; |
||
15 | use LogicException; |
||
16 | use Throwable; |
||
17 | |||
18 | /** |
||
19 | * Class to clean CFDI and avoid bad common practices. |
||
20 | * |
||
21 | * Strictly speaking, CFDI must accomplish all XML rules, including that any other |
||
22 | * XML element must be isolated in its own namespace and follow their own XSD rules. |
||
23 | * |
||
24 | * The common practice (allowed by SAT) is that the CFDI is created, signed and |
||
25 | * some nodes are attached after sign, some of them does not follow the XML standard. |
||
26 | * |
||
27 | * This is why it's better to clear Comprobante/Addenda and remove unused namespaces |
||
28 | */ |
||
29 | class Cleaner |
||
30 | { |
||
31 | /** @var DOMDocument|null */ |
||
32 | protected $dom; |
||
33 | |||
34 | /** @var BeforeLoadCleanerInterface */ |
||
35 | private $beforeLoadCleaner; |
||
36 | |||
37 | 15 | public function __construct(string $content, BeforeLoadCleanerInterface $beforeLoadCleaner = null) |
|
38 | { |
||
39 | 15 | $this->beforeLoadCleaner = $beforeLoadCleaner ?? new BeforeLoad\BeforeLoadCleaner(); |
|
40 | 15 | if ('' !== $content) { |
|
41 | 13 | $this->load($content); |
|
42 | } |
||
43 | } |
||
44 | |||
45 | /** |
||
46 | * Method to clean content and return the result |
||
47 | * If an error occurs, an exception is thrown |
||
48 | * |
||
49 | * @param string $content |
||
50 | * @return string |
||
51 | */ |
||
52 | 1 | public static function staticClean(string $content): string |
|
53 | { |
||
54 | 1 | $cleaner = new self($content); |
|
55 | 1 | $cleaner->clean(); |
|
56 | 1 | return $cleaner->retrieveXml(); |
|
57 | } |
||
58 | |||
59 | /** |
||
60 | * Check if the CFDI version is compatible to this class |
||
61 | * |
||
62 | * @param string $version |
||
63 | * @return bool |
||
64 | */ |
||
65 | 12 | public static function isVersionAllowed(string $version): bool |
|
66 | { |
||
67 | 12 | return in_array($version, ['3.2', '3.3', '4.0']); |
|
68 | } |
||
69 | |||
70 | /** |
||
71 | * Check if a given namespace is allowed (must not be removed from CFDI) |
||
72 | * |
||
73 | * @param string $namespace |
||
74 | * @return bool |
||
75 | */ |
||
76 | 2 | public static function isNameSpaceAllowed(string $namespace): bool |
|
77 | { |
||
78 | 2 | return ( |
|
79 | 2 | 'http://www.w3.org/' === (substr($namespace, 0, 18) ?: '') |
|
80 | 2 | || 'http://www.sat.gob.mx/' === (substr($namespace, 0, 22) ?: '') |
|
81 | 2 | ); |
|
82 | } |
||
83 | |||
84 | /** |
||
85 | * Apply all removals (Addenda, Non SAT Nodes and Non SAT namespaces) |
||
86 | * |
||
87 | * @return void |
||
88 | */ |
||
89 | 1 | public function clean() |
|
90 | { |
||
91 | 1 | $this->removeAddenda(); |
|
92 | 1 | $this->removeIncompleteSchemaLocations(); |
|
93 | 1 | $this->removeNonSatNSNodes(); |
|
94 | 1 | $this->removeNonSatNSschemaLocations(); |
|
95 | 1 | $this->removeUnusedNamespaces(); |
|
96 | 1 | $this->collapseComprobanteComplemento(); |
|
97 | 1 | $this->fixKnownSchemaLocationsXsdUrls(); |
|
98 | } |
||
99 | |||
100 | /** |
||
101 | * Load the string content as a CFDI |
||
102 | * This is exposed to reuse the current object instead of create a new instance |
||
103 | * |
||
104 | * @param string $content |
||
105 | * |
||
106 | * @throws CleanerException when the content is not valid xml |
||
107 | * @throws CleanerException when the document does not use the namespace http://www.sat.gob.mx/cfd/3 |
||
108 | * @throws CleanerException when cannot find a Comprobante version (or Version) attribute |
||
109 | * @throws CleanerException when the version is not compatible |
||
110 | * |
||
111 | * @return void |
||
112 | */ |
||
113 | 15 | public function load(string $content) |
|
114 | { |
||
115 | try { |
||
116 | 15 | $content = $this->beforeLoadCleaner->clean($content); |
|
117 | 15 | $cfdi = Cfdi::newFromString($content); |
|
118 | 3 | } catch (Throwable $exception) { |
|
119 | 3 | throw new CleanerException($exception->getMessage(), $exception->getCode(), $exception->getPrevious()); |
|
120 | } |
||
121 | 12 | $version = $cfdi->getVersion(); |
|
122 | 12 | if (! $this->isVersionAllowed($version)) { |
|
123 | 2 | throw new CleanerException("The CFDI version '$version' is not allowed"); |
|
124 | } |
||
125 | 10 | $this->dom = $cfdi->getDocument(); |
|
126 | } |
||
127 | |||
128 | /** |
||
129 | * Get the XML content of the CFDI |
||
130 | * |
||
131 | * @return string |
||
132 | */ |
||
133 | 5 | public function retrieveXml(): string |
|
134 | { |
||
135 | 5 | return $this->dom()->saveXML(); |
|
136 | } |
||
137 | |||
138 | /** |
||
139 | * Get a clone of the XML DOM Document of the CFDI |
||
140 | * |
||
141 | * @return DOMDocument |
||
142 | */ |
||
143 | 1 | public function retrieveDocument(): DOMDocument |
|
144 | { |
||
145 | 1 | return clone $this->dom(); |
|
146 | } |
||
147 | |||
148 | /** |
||
149 | * Procedure to remove the Comprobante/Addenda node |
||
150 | * |
||
151 | * @return void |
||
152 | */ |
||
153 | 1 | public function removeAddenda() |
|
154 | { |
||
155 | 1 | $query = '/cfdi:Comprobante/cfdi:Addenda'; |
|
156 | 1 | $addendas = $this->xpathQuery($query); |
|
157 | 1 | foreach ($addendas as $addenda) { |
|
158 | 1 | $addenda->parentNode->removeChild($addenda); |
|
159 | } |
||
160 | } |
||
161 | |||
162 | /** |
||
163 | * Procedure to drop schemaLocations where second part does not end with '.xsd' |
||
164 | * |
||
165 | * @return void |
||
166 | */ |
||
167 | 1 | public function removeIncompleteSchemaLocations() |
|
168 | { |
||
169 | 1 | foreach ($this->obtainXsiSchemaLocations() as $attribute) { |
|
170 | 1 | $attribute->nodeValue = $this->removeIncompleteSchemaLocationPrivate($attribute->nodeValue); |
|
171 | } |
||
172 | } |
||
173 | |||
174 | /** |
||
175 | * @param string $source |
||
176 | * @return string |
||
177 | * @deprecated 2.12.0: This function is internal and visibility should be private, use SchemaLocations |
||
178 | * @internal |
||
179 | */ |
||
180 | public function removeIncompleteSchemaLocation(string $source): string |
||
181 | { |
||
182 | trigger_error('This method is deprecated, should not be used from outside this class', E_USER_DEPRECATED); |
||
183 | return $this->removeIncompleteSchemaLocationPrivate($source); |
||
184 | } |
||
185 | |||
186 | 1 | private function removeIncompleteSchemaLocationPrivate(string $source): string |
|
187 | { |
||
188 | 1 | $schemaLocations = SchemaLocations::fromStingStrictXsd($source); |
|
189 | 1 | foreach ($schemaLocations->getNamespacesWithoutLocation() as $namespace) { |
|
190 | 1 | $schemaLocations->remove($namespace); |
|
191 | } |
||
192 | 1 | return $schemaLocations->asString(); |
|
193 | } |
||
194 | |||
195 | /** |
||
196 | * Procedure to drop schemaLocations that are not allowed |
||
197 | * If the schemaLocation is empty then remove the attribute |
||
198 | * |
||
199 | * @return void |
||
200 | */ |
||
201 | 3 | public function removeNonSatNSschemaLocations() |
|
202 | { |
||
203 | 3 | $schemaLocations = $this->obtainXsiSchemaLocations(); |
|
204 | 3 | foreach ($schemaLocations as $attribute) { |
|
205 | 3 | $this->removeNonSatNSschemaLocation($attribute); |
|
206 | } |
||
207 | } |
||
208 | |||
209 | 3 | private function removeNonSatNSschemaLocation(DOMAttr $schemaLocation) |
|
210 | { |
||
211 | 3 | $source = $schemaLocation->nodeValue; |
|
212 | // load locations |
||
213 | 3 | $schemaLocations = SchemaLocations::fromString($source, true); |
|
214 | 3 | if ($schemaLocations->hasAnyNamespaceWithoutLocation()) { |
|
215 | 1 | throw new CleanerException( |
|
216 | 1 | sprintf("The schemaLocation value '%s' must have even number of URIs", $source) |
|
217 | 1 | ); |
|
218 | } |
||
219 | // filter |
||
220 | 2 | foreach ($schemaLocations as $namespace => $location) { |
|
221 | 2 | if (! $this->isNameSpaceAllowed($namespace)) { |
|
222 | 2 | $schemaLocations->remove($namespace); |
|
223 | } |
||
224 | } |
||
225 | // apply |
||
226 | 2 | $modified = $schemaLocations->asString(); |
|
227 | 2 | if ($schemaLocations->isEmpty()) { // remove node |
|
228 | 1 | $schemaLocation->ownerElement->removeAttributeNode($schemaLocation); |
|
229 | 1 | } elseif ($source !== $modified) { // replace node content and is different |
|
230 | 1 | $schemaLocation->nodeValue = $modified; |
|
231 | } |
||
232 | } |
||
233 | |||
234 | /** |
||
235 | * Procedure to remove all nodes that are not from an allowed namespace |
||
236 | * |
||
237 | * @return void |
||
238 | */ |
||
239 | 1 | public function removeNonSatNSNodes() |
|
240 | { |
||
241 | 1 | $nss = $this->obtainNamespaces(); |
|
242 | 1 | foreach ($nss as $namespace) { |
|
243 | 1 | if (! $this->isNameSpaceAllowed($namespace)) { |
|
244 | 1 | $this->removeNonSatNSNode($namespace); |
|
245 | } |
||
246 | } |
||
247 | } |
||
248 | |||
249 | /** |
||
250 | * Procedure to remove all nodes from a specific namespace |
||
251 | * |
||
252 | * @param string $namespace |
||
253 | * @return void |
||
254 | */ |
||
255 | 1 | private function removeNonSatNSNode(string $namespace) |
|
256 | { |
||
257 | 1 | foreach ($this->dom()->getElementsByTagNameNS($namespace, '*') as $children) { |
|
258 | 1 | $children->parentNode->removeChild($children); |
|
259 | } |
||
260 | } |
||
261 | |||
262 | /** |
||
263 | * Procedure to remove not allowed xmlns definitions |
||
264 | * |
||
265 | * @return void |
||
266 | */ |
||
267 | 1 | public function removeUnusedNamespaces() |
|
268 | { |
||
269 | 1 | $nss = []; |
|
270 | 1 | $dom = $this->dom(); |
|
271 | 1 | $namespaces = $this->obtainNamespaces(); |
|
272 | 1 | foreach ($namespaces as $namespace) { |
|
273 | 1 | if (! $namespace || $this->isNameSpaceAllowed($namespace)) { |
|
274 | 1 | continue; |
|
275 | } |
||
276 | 1 | $prefix = $dom->lookupPrefix($namespace); |
|
277 | 1 | $nss[$prefix] = $namespace; |
|
278 | } |
||
279 | 1 | $documentElement = Xml::documentElement($dom); |
|
280 | 1 | foreach ($nss as $prefix => $namespace) { |
|
281 | 1 | $documentElement->removeAttributeNS($namespace, $prefix); |
|
282 | } |
||
283 | } |
||
284 | |||
285 | /** |
||
286 | * Procedure to collapse Complemento elements from Comprobante |
||
287 | * Collapse will take its children and put then on the first Complemento found |
||
288 | * |
||
289 | * @return void |
||
290 | */ |
||
291 | 3 | public function collapseComprobanteComplemento() |
|
292 | { |
||
293 | 3 | $comprobante = Xml::documentElement($this->dom()); |
|
294 | 3 | $complementos = $this->xpathQuery('./cfdi:Complemento', $comprobante); |
|
295 | 3 | if ($complementos->length < 2) { |
|
296 | 1 | return; // nothing to do, there are less than 2 complemento |
|
297 | } |
||
298 | 2 | $first = null; |
|
299 | /** @var DOMNode $extra */ |
||
300 | 2 | foreach ($complementos as $extra) { // iterate over all extra children |
|
301 | 2 | if (null === $first) { |
|
302 | 2 | $first = $extra; |
|
303 | 2 | continue; |
|
304 | } |
||
305 | 2 | $comprobante->removeChild($extra); // remove extra child from parent |
|
306 | 2 | while ($extra->childNodes->length > 0) { // append extra child contents into first child |
|
307 | /** @var DOMNode $child */ |
||
308 | 2 | $child = $extra->childNodes->item(0); |
|
309 | 2 | $extra->removeChild($child); |
|
310 | 2 | $first->appendChild($child); |
|
311 | } |
||
312 | } |
||
313 | } |
||
314 | |||
315 | /** |
||
316 | * Procedure to fix XSD known location paths for CFDI and TFD |
||
317 | * |
||
318 | * @return void |
||
319 | */ |
||
320 | 1 | public function fixKnownSchemaLocationsXsdUrls() |
|
321 | { |
||
322 | 1 | $xsiLocations = $this->obtainXsiSchemaLocations(); |
|
323 | 1 | $schemasFixer = SchemaLocationsXsdUrlsFixer::createWithKnownSatUrls(); |
|
324 | 1 | foreach ($xsiLocations as $attribute) { |
|
325 | 1 | $schemasFixer->fixSchemaLocationAttribute($attribute); |
|
326 | } |
||
327 | } |
||
328 | |||
329 | /** @return DOMNodeList<DOMAttr> */ |
||
330 | 3 | private function obtainXsiSchemaLocations(): DOMNodeList |
|
331 | { |
||
332 | // Do not assume that prefix for http://www.w3.org/2001/XMLSchema-instance is "xsi" |
||
333 | 3 | $xsi = $this->dom()->lookupPrefix('http://www.w3.org/2001/XMLSchema-instance'); |
|
334 | 3 | if (! $xsi) { |
|
335 | return new DOMNodeList(); |
||
336 | } |
||
337 | /** @var DOMNodeList<DOMAttr>|false $nodeList */ |
||
338 | 3 | $nodeList = $this->xpathQuery("//@$xsi:schemaLocation"); |
|
339 | 3 | if (false === $nodeList) { |
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
340 | return new DOMNodeList(); |
||
341 | } |
||
342 | 3 | return $nodeList; |
|
343 | } |
||
344 | |||
345 | /** @return string[] */ |
||
346 | 1 | private function obtainNamespaces(): array |
|
347 | { |
||
348 | 1 | return array_unique(array_column(iterator_to_array($this->xpathQuery('//namespace::*')), 'nodeValue')); |
|
349 | } |
||
350 | |||
351 | /** |
||
352 | * Helper function to perform a XPath query using an element (or root element) |
||
353 | * |
||
354 | * @param string $query |
||
355 | * @param DOMNode|null $element |
||
356 | * @return DOMNodeList |
||
357 | */ |
||
358 | 5 | private function xpathQuery(string $query, DOMNode $element = null): DOMNodeList |
|
359 | { |
||
360 | 5 | if (null === $element) { |
|
361 | 3 | $document = $this->dom(); |
|
362 | 3 | $element = Xml::documentElement($document); |
|
363 | } else { |
||
364 | 3 | $document = Xml::ownerDocument($element); |
|
365 | } |
||
366 | 5 | $nodelist = (new DOMXPath($document))->query($query, $element); |
|
367 | 5 | if (false === $nodelist) { |
|
368 | $nodelist = new DOMNodeList(); |
||
369 | } |
||
370 | 5 | return $nodelist; |
|
371 | } |
||
372 | |||
373 | 7 | private function dom(): DOMDocument |
|
374 | { |
||
375 | 7 | if (null === $this->dom) { |
|
376 | throw new LogicException('No document has been loaded'); |
||
377 | } |
||
378 | 7 | return $this->dom; |
|
379 | } |
||
380 | } |
||
381 |