Passed
Push — master ( e458aa...d84cb7 )
by Carlos C
02:30 queued 11s
created

Cleaner::fixKnownSchemaLocationsXsdUrls()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
cc 2
eloc 4
nc 2
nop 0
dl 0
loc 6
ccs 5
cts 5
cp 1
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace CfdiUtils\Cleaner;
4
5
use CfdiUtils\Cfdi;
6
use CfdiUtils\Cleaner\BeforeLoad\BeforeLoadCleanerInterface;
7
use CfdiUtils\Cleaner\Cleaners\SchemaLocationsXsdUrlsFixer;
8
use CfdiUtils\Utils\SchemaLocations;
9
use CfdiUtils\Utils\Xml;
10
use DOMAttr;
11
use DOMDocument;
12
use DOMNode;
13
use DOMNodeList;
14
use DOMXPath;
15
16
/**
17
 * Class to clean CFDI and avoid bad common practices.
18
 *
19
 * Strictly speaking, CFDI must accomplish all XML rules, including that any other
20
 * XML element must be isolated in its own namespace and follow their own XSD rules.
21
 *
22
 * The common practice (allowed by SAT) is that the CFDI is created, signed and
23
 * some nodes are attached after sign, some of them does not follow the XML standard.
24
 *
25
 * This is why it's better to clear Comprobante/Addenda and remove unused namespaces
26
 */
27
class Cleaner
28
{
29
    /** @var DOMDocument|null */
30
    protected $dom;
31
32
    /** @var BeforeLoadCleanerInterface */
33
    private $beforeLoadCleaner;
34
35 14
    public function __construct(string $content, BeforeLoadCleanerInterface $beforeLoadCleaner = null)
36
    {
37 14
        $this->beforeLoadCleaner = $beforeLoadCleaner ?? new BeforeLoad\BeforeLoadCleaner();
38 14
        if ('' !== $content) {
39 12
            $this->load($content);
40
        }
41 11
    }
42
43
    /**
44
     * Method to clean content and return the result
45
     * If an error occurs, an exception is thrown
46
     *
47
     * @param string $content
48
     * @return string
49
     */
50 1
    public static function staticClean($content): string
51
    {
52 1
        $cleaner = new self($content);
53 1
        $cleaner->clean();
54 1
        return $cleaner->retrieveXml();
55
    }
56
57
    /**
58
     * Check if the CFDI version is complatible to this class
59
     *
60
     * @param string $version
61
     * @return bool
62
     */
63 11
    public static function isVersionAllowed(string $version): bool
64
    {
65 11
        return in_array($version, ['3.2', '3.3']);
66
    }
67
68
    /**
69
     * Check if a given namespace is allowed (must not be removed from CFDI)
70
     *
71
     * @param string $namespace
72
     * @return bool
73
     */
74 2
    public static function isNameSpaceAllowed(string $namespace): bool
75
    {
76
        return (
77 2
            'http://www.w3.org/' === (substr($namespace, 0, 18) ?: '')
78 2
            || 'http://www.sat.gob.mx/' === (substr($namespace, 0, 22) ?: '')
79
        );
80
    }
81
82
    /**
83
     * Apply all removals (Addenda, Non SAT Nodes and Non SAT namespaces)
84
     *
85
     * @return void
86
     */
87 1
    public function clean()
88
    {
89 1
        $this->removeAddenda();
90 1
        $this->removeIncompleteSchemaLocations();
91 1
        $this->removeNonSatNSNodes();
92 1
        $this->removeNonSatNSschemaLocations();
93 1
        $this->removeUnusedNamespaces();
94 1
        $this->collapseComprobanteComplemento();
95 1
        $this->fixKnownSchemaLocationsXsdUrls();
96 1
    }
97
98
    /**
99
     * Load the string content as a CFDI
100
     * This is exposed to reuse the current object instead of create a new instance
101
     *
102
     * @param string $content
103
     *
104
     * @throws CleanerException when the content is not valid xml
105
     * @throws CleanerException when the document does not use the namespace http://www.sat.gob.mx/cfd/3
106
     * @throws CleanerException when cannot find a Comprobante version (or Version) attribute
107
     * @throws CleanerException when the version is not compatible
108
     *
109
     * @return void
110
     */
111 14
    public function load(string $content)
112
    {
113
        try {
114 14
            $content = $this->beforeLoadCleaner->clean($content);
115 14
            $cfdi = Cfdi::newFromString($content);
116 3
        } catch (\Throwable $exception) {
117 3
            throw new CleanerException($exception->getMessage(), $exception->getCode(), $exception->getPrevious());
118
        }
119 11
        $version = $cfdi->getVersion();
120 11
        if (! $this->isVersionAllowed($version)) {
121 2
            throw new CleanerException("The CFDI version '$version' is not allowed");
122
        }
123 9
        $this->dom = $cfdi->getDocument();
124 9
    }
125
126
    /**
127
     * Get the XML content of the CFDI
128
     *
129
     * @return string
130
     */
131 5
    public function retrieveXml(): string
132
    {
133 5
        return $this->dom()->saveXML();
134
    }
135
136
    /**
137
     * Get a clone of the XML DOM Document of the CFDI
138
     *
139
     * @return DOMDocument
140
     */
141 1
    public function retrieveDocument(): DOMDocument
142
    {
143 1
        return clone $this->dom();
144
    }
145
146
    /**
147
     * Procedure to remove the Comprobante/Addenda node
148
     *
149
     * @return void
150
     */
151 1
    public function removeAddenda()
152
    {
153 1
        $query = '/cfdi:Comprobante/cfdi:Addenda';
154 1
        $addendas = $this->xpathQuery($query);
155 1
        foreach ($addendas as $addenda) {
156 1
            $addenda->parentNode->removeChild($addenda);
157
        }
158 1
    }
159
160
    /**
161
     * Procedure to drop schemaLocations where second part does not ends with '.xsd'
162
     *
163
     * @return void
164
     */
165 1
    public function removeIncompleteSchemaLocations()
166
    {
167 1
        foreach ($this->obtainXsiSchemaLocations() as $element) {
168 1
            $element->nodeValue = $this->removeIncompleteSchemaLocationPrivate($element->nodeValue);
169
        }
170 1
    }
171
172
    /**
173
     * @param string $source
174
     * @return string
175
     * @deprecated 2.12.0: This function is internal and visibility should be private, use SchemaLocations
176
     * @internal
177
     */
178
    public function removeIncompleteSchemaLocation(string $source): string
179
    {
180
        trigger_error('This method is deprecated, should not be used from outside this class', E_USER_DEPRECATED);
181
        return $this->removeIncompleteSchemaLocationPrivate($source);
182
    }
183
184 1
    private function removeIncompleteSchemaLocationPrivate(string $source): string
185
    {
186 1
        $schemaLocations = SchemaLocations::fromStingStrictXsd($source);
187 1
        foreach ($schemaLocations->getNamespacesWithoutLocation() as $namespace) {
188 1
            $schemaLocations->remove($namespace);
189
        }
190 1
        return $schemaLocations->asString();
191
    }
192
193
    /**
194
     * Procedure to drop schemaLocations that are not allowed
195
     * If the schemaLocation is empty then remove the attribute
196
     *
197
     * @return void
198
     */
199 3
    public function removeNonSatNSschemaLocations()
200
    {
201 3
        $schemaLocations = $this->obtainXsiSchemaLocations();
202 3
        foreach ($schemaLocations as $element) {
203 3
            $this->removeNonSatNSschemaLocation($element);
204
        }
205 2
    }
206
207 3
    private function removeNonSatNSschemaLocation(DOMAttr $schemaLocation)
208
    {
209 3
        $source = $schemaLocation->nodeValue;
210
        // load locations
211 3
        $schemaLocations = SchemaLocations::fromString($source, true);
212 3
        if ($schemaLocations->hasAnyNamespaceWithoutLocation()) {
213 1
            throw new CleanerException(
214 1
                sprintf("The schemaLocation value '%s' must have even number of URIs", $source)
215
            );
216
        }
217
        // filter
218 2
        foreach ($schemaLocations as $namespace => $location) {
219 2
            if (! $this->isNameSpaceAllowed($namespace)) {
220 2
                $schemaLocations->remove($namespace);
221
            }
222
        }
223
        // apply
224 2
        $modified = $schemaLocations->asString();
225 2
        if ($schemaLocations->isEmpty()) { // remove node
226 1
            $schemaLocation->ownerElement->removeAttributeNode($schemaLocation);
227 1
        } elseif ($source !== $modified) { // replace node content and is different
228 1
            $schemaLocation->nodeValue = $modified;
229
        }
230 2
    }
231
232
    /**
233
     * Procedure to remove all nodes that are not from an allowed namespace
234
     *
235
     * @return void
236
     */
237 1
    public function removeNonSatNSNodes()
238
    {
239 1
        $nss = $this->obtainNamespaces();
240 1
        foreach ($nss as $namespace) {
241 1
            if (! $this->isNameSpaceAllowed($namespace)) {
242 1
                $this->removeNonSatNSNode($namespace);
243
            }
244
        }
245 1
    }
246
247
    /**
248
     * Procedure to remove all nodes from an specific namespace
249
     *
250
     * @param string $namespace
251
     * @return void
252
     */
253 1
    private function removeNonSatNSNode(string $namespace)
254
    {
255 1
        foreach ($this->dom()->getElementsByTagNameNS($namespace, '*') as $children) {
256 1
            $children->parentNode->removeChild($children);
257
        }
258 1
    }
259
260
    /**
261
     * Procedure to remove not allowed xmlns definitions
262
     *
263
     * @return void
264
     */
265 1
    public function removeUnusedNamespaces()
266
    {
267 1
        $nss = [];
268 1
        $dom = $this->dom();
269 1
        $namespaces = $this->obtainNamespaces();
270 1
        foreach ($namespaces as $namespace) {
271 1
            if (! $namespace || $this->isNameSpaceAllowed($namespace)) {
272 1
                continue;
273
            }
274 1
            $prefix = $dom->lookupPrefix($namespace);
275 1
            $nss[$prefix] = $namespace;
276
        }
277 1
        $documentElement = Xml::documentElement($dom);
278 1
        foreach ($nss as $prefix => $namespace) {
279 1
            $documentElement->removeAttributeNS($namespace, $prefix);
280
        }
281 1
    }
282
283
    /**
284
     * Procedure to collapse Complemento elements from Comprobante
285
     * Collapse will take its children and put then on the first Complemento found
286
     *
287
     * @return void
288
     */
289 3
    public function collapseComprobanteComplemento()
290
    {
291 3
        $comprobante = Xml::documentElement($this->dom());
292 3
        $complementos = $this->xpathQuery('./cfdi:Complemento', $comprobante);
293 3
        if ($complementos->length < 2) {
294 1
            return; // nothing to do, there are less than 2 complemento
295
        }
296 2
        $first = null;
297
        /** @var DOMNode $extra */
298 2
        foreach ($complementos as $extra) { // iterate over all extra children
299 2
            if (null === $first) {
300 2
                $first = $extra;
301 2
                continue;
302
            }
303 2
            $comprobante->removeChild($extra); // remove extra child from parent
304 2
            while ($extra->childNodes->length > 0) { // append extra child contents into first child
305
                /** @var DOMNode $child */
306 2
                $child = $extra->childNodes->item(0);
307 2
                $extra->removeChild($child);
308 2
                $first->appendChild($child);
309
            }
310
        }
311 2
    }
312
313
    /**
314
     * Procedure to fix XSD known location paths for CFDI and TFD
315
     *
316
     * @return void
317
     */
318 1
    public function fixKnownSchemaLocationsXsdUrls()
319
    {
320 1
        $xsiLocations = $this->obtainXsiSchemaLocations();
321 1
        $schemasFixer = SchemaLocationsXsdUrlsFixer::createWithKnownSatUrls();
322 1
        foreach ($xsiLocations as $xsiSchemaLocation) {
323 1
            $schemasFixer->fixSchemaLocationAttribute($xsiSchemaLocation);
324
        }
325 1
    }
326
327
    /** @return DOMNodeList|DOMAttr[] */
328 3
    private function obtainXsiSchemaLocations(): DOMNodeList
329
    {
330
        // Do not assume that prefix for http://www.w3.org/2001/XMLSchema-instance is "xsi"
331 3
        $xsi = $this->dom()->lookupPrefix('http://www.w3.org/2001/XMLSchema-instance');
332 3
        if (! $xsi) {
333
            return new DOMNodeList();
334
        }
335 3
        return $this->xpathQuery("//@$xsi:schemaLocation");
336
    }
337
338
    /** @return string[] */
339 1
    private function obtainNamespaces(): array
340
    {
341 1
        return array_unique(array_column(iterator_to_array($this->xpathQuery('//namespace::*')), 'nodeValue'));
342
    }
343
344
    /**
345
     * Helper function to perform a XPath query using an element (or root element)
346
     *
347
     * @param string $query
348
     * @param DOMNode|null $element
349
     * @return DOMNodeList
350
     */
351 5
    private function xpathQuery(string $query, DOMNode $element = null): DOMNodeList
352
    {
353 5
        if (null === $element) {
354 3
            $document = $this->dom();
355 3
            $element = Xml::documentElement($document);
356
        } else {
357 3
            $document = Xml::ownerDocument($element);
358
        }
359
        /** @var DOMNodeList|false $nodelist phpstan does not know that query can return false */
360 5
        $nodelist = (new DOMXPath($document))->query($query, $element);
361 5
        if (false === $nodelist) {
362
            $nodelist = new DOMNodeList();
363
        }
364 5
        return $nodelist;
365
    }
366
367 7
    private function dom(): DOMDocument
368
    {
369 7
        if (null === $this->dom) {
370
            throw new \LogicException('No document has been loaded');
371
        }
372 7
        return $this->dom;
373
    }
374
}
375