Completed
Push — master ( cc6c1a...7726ef )
by Carlos C
04:19 queued 12s
created

Cleaner::removeIncompleteSchemaLocationPrivate()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 4
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 7
ccs 5
cts 5
cp 1
crap 2
rs 10
1
<?php
2
namespace CfdiUtils\Cleaner;
3
4
use CfdiUtils\Cfdi;
5
use CfdiUtils\Utils\SchemaLocations;
6
use CfdiUtils\Utils\Xml;
7
use DOMAttr;
8
use DOMDocument;
9
use DOMNode;
10
use DOMNodeList;
11
use DOMXPath;
12
13
/**
14
 * Class to clean CFDI and avoid bad common practices.
15
 *
16
 * Strictly speaking, CFDI must accomplish all XML rules, including that any other
17
 * XML element must be isolated in its own namespace and follow their own XSD rules.
18
 *
19
 * The common practice (allowed by SAT) is that the CFDI is created, signed and
20
 * some nodes are attached after sign, some of them does not follow the XML standard.
21
 *
22
 * This is why it's better to clear Comprobante/Addenda and remove unused namespaces
23
 */
24
class Cleaner
25
{
26
    /** @var DOMDocument|null */
27
    protected $dom;
28
29 13
    public function __construct(string $content)
30
    {
31 13
        if ('' !== $content) {
32 11
            $this->load($content);
33
        }
34 10
    }
35
36
    /**
37
     * Method to clean content and return the result
38
     * If an error occurs, an exception is thrown
39
     *
40
     * @param string $content
41
     * @return string
42
     */
43 1
    public static function staticClean($content): string
44
    {
45 1
        $cleaner = new self($content);
46 1
        $cleaner->clean();
47 1
        return $cleaner->retrieveXml();
48
    }
49
50
    /**
51
     * Check if the CFDI version is complatible to this class
52
     *
53
     * @param string $version
54
     * @return bool
55
     */
56 10
    public static function isVersionAllowed(string $version): bool
57
    {
58 10
        return in_array($version, ['3.2', '3.3']);
59
    }
60
61
    /**
62
     * Check if a given namespace is allowed (must not be removed from CFDI)
63
     *
64
     * @param string $namespace
65
     * @return bool
66
     */
67 2
    public static function isNameSpaceAllowed(string $namespace): bool
68
    {
69
        return (
70 2
            'http://www.w3.org/' === (substr($namespace, 0, 18) ?: '')
71 2
            || 'http://www.sat.gob.mx/' === (substr($namespace, 0, 22) ?: '')
72
        );
73
    }
74
75
    /**
76
     * Apply all removals (Addenda, Non SAT Nodes and Non SAT namespaces)
77
     *
78
     * @return void
79
     */
80 1
    public function clean()
81
    {
82 1
        $this->removeAddenda();
83 1
        $this->removeIncompleteSchemaLocations();
84 1
        $this->removeNonSatNSNodes();
85 1
        $this->removeNonSatNSschemaLocations();
86 1
        $this->removeUnusedNamespaces();
87 1
        $this->collapseComprobanteComplemento();
88 1
    }
89
90
    /**
91
     * Load the string content as a CFDI
92
     * This is exposed to reuse the current object instead of create a new instance
93
     *
94
     * @param string $content
95
     *
96
     * @throws CleanerException when the content is not valid xml
97
     * @throws CleanerException when the document does not use the namespace http://www.sat.gob.mx/cfd/3
98
     * @throws CleanerException when cannot find a Comprobante version (or Version) attribute
99
     * @throws CleanerException when the version is not compatible
100
     *
101
     * @return void
102
     */
103 13
    public function load(string $content)
104
    {
105
        try {
106 13
            $cfdi = Cfdi::newFromString($content);
107 3
        } catch (\Throwable $exception) {
108 3
            throw new CleanerException($exception->getMessage(), $exception->getCode(), $exception->getPrevious());
109
        }
110 10
        $version = $cfdi->getVersion();
111 10
        if (! $this->isVersionAllowed($version)) {
112 2
            throw new CleanerException("The CFDI version '$version' is not allowed");
113
        }
114 8
        $this->dom = $cfdi->getDocument();
115 8
    }
116
117
    /**
118
     * Get the XML content of the CFDI
119
     *
120
     * @return string
121
     */
122 4
    public function retrieveXml(): string
123
    {
124 4
        return $this->dom()->saveXML();
125
    }
126
127
    /**
128
     * Get a clone of the XML DOM Document of the CFDI
129
     *
130
     * @return DOMDocument
131
     */
132 1
    public function retrieveDocument(): DOMDocument
133
    {
134 1
        return clone $this->dom();
135
    }
136
137
    /**
138
     * Procedure to remove the Comprobante/Addenda node
139
     *
140
     * @return void
141
     */
142 1
    public function removeAddenda()
143
    {
144 1
        $query = '/cfdi:Comprobante/cfdi:Addenda';
145 1
        $addendas = $this->xpathQuery($query);
146 1
        foreach ($addendas as $addenda) {
147 1
            $addenda->parentNode->removeChild($addenda);
148
        }
149 1
    }
150
151
    /**
152
     * Procedure to drop schemaLocations where second part does not ends with '.xsd'
153
     *
154
     * @return void
155
     */
156 1
    public function removeIncompleteSchemaLocations()
157
    {
158 1
        foreach ($this->obtainXsiSchemaLocations() as $element) {
159 1
            $element->nodeValue = $this->removeIncompleteSchemaLocationPrivate($element->nodeValue);
160
        }
161 1
    }
162
163
    /**
164
     * @param string $source
165
     * @return string
166
     * @deprecated 2.12.0: This function is internal and visibility should be private, use SchemaLocations
167
     * @internal
168
     */
169
    public function removeIncompleteSchemaLocation(string $source): string
170
    {
171
        trigger_error('This method is deprecated, should not be used from outside this class', E_USER_DEPRECATED);
172
        return $this->removeIncompleteSchemaLocationPrivate($source);
173
    }
174
175 1
    private function removeIncompleteSchemaLocationPrivate(string $source): string
176
    {
177 1
        $schemaLocations = SchemaLocations::fromStingStrictXsd($source);
178 1
        foreach ($schemaLocations->getNamespacesWithoutLocation() as $namespace) {
179 1
            $schemaLocations->remove($namespace);
180
        }
181 1
        return $schemaLocations->asString();
182
    }
183
184
    /**
185
     * Procedure to drop schemaLocations that are not allowed
186
     * If the schemaLocation is empty then remove the attribute
187
     *
188
     * @return void
189
     */
190 3
    public function removeNonSatNSschemaLocations()
191
    {
192 3
        $schemaLocations = $this->obtainXsiSchemaLocations();
193 3
        foreach ($schemaLocations as $element) {
194 3
            $this->removeNonSatNSschemaLocation($element);
195
        }
196 2
    }
197
198 3
    private function removeNonSatNSschemaLocation(DOMAttr $schemaLocation)
199
    {
200 3
        $source = $schemaLocation->nodeValue;
201
        // load locations
202 3
        $schemaLocations = SchemaLocations::fromString($source, true);
203 3
        if ($schemaLocations->hasAnyNamespaceWithoutLocation()) {
204 1
            throw new CleanerException(
205 1
                sprintf("The schemaLocation value '%s' must have even number of URIs", $source)
206
            );
207
        }
208
        // filter
209 2
        foreach ($schemaLocations as $namespace => $location) {
210 2
            if (! $this->isNameSpaceAllowed($namespace)) {
211 2
                $schemaLocations->remove($namespace);
212
            }
213
        }
214
        // apply
215 2
        $modified = $schemaLocations->asString();
216 2
        if ($schemaLocations->isEmpty()) { // remove node
217 1
            $schemaLocation->ownerElement->removeAttributeNode($schemaLocation);
218 1
        } elseif ($source !== $modified) { // replace node content and is different
219 1
            $schemaLocation->nodeValue = $modified;
220
        }
221 2
    }
222
223
    /**
224
     * Procedure to remove all nodes that are not from an allowed namespace
225
     *
226
     * @return void
227
     */
228 1
    public function removeNonSatNSNodes()
229
    {
230 1
        $nss = $this->obtainNamespaces();
231 1
        foreach ($nss as $namespace) {
232 1
            if (! $this->isNameSpaceAllowed($namespace)) {
233 1
                $this->removeNonSatNSNode($namespace);
234
            }
235
        }
236 1
    }
237
238
    /**
239
     * Procedure to remove all nodes from an specific namespace
240
     *
241
     * @param string $namespace
242
     * @return void
243
     */
244 1
    private function removeNonSatNSNode(string $namespace)
245
    {
246 1
        foreach ($this->dom()->getElementsByTagNameNS($namespace, '*') as $children) {
247 1
            $children->parentNode->removeChild($children);
248
        }
249 1
    }
250
251
    /**
252
     * Procedure to remove not allowed xmlns definitions
253
     *
254
     * @return void
255
     */
256 1
    public function removeUnusedNamespaces()
257
    {
258 1
        $nss = [];
259 1
        $dom = $this->dom();
260 1
        $namespaces = $this->obtainNamespaces();
261 1
        foreach ($namespaces as $namespace) {
262 1
            if (! $namespace || $this->isNameSpaceAllowed($namespace)) {
263 1
                continue;
264
            }
265 1
            $prefix = $dom->lookupPrefix($namespace);
266 1
            $nss[$prefix] = $namespace;
267
        }
268 1
        $documentElement = Xml::documentElement($dom);
269 1
        foreach ($nss as $prefix => $namespace) {
270 1
            $documentElement->removeAttributeNS($namespace, $prefix);
271
        }
272 1
    }
273
274
    /**
275
     * Procedure to collapse Complemento elements from Comprobante
276
     * Collapse will take its children and put then on the first Complemento found
277
     *
278
     * @return void
279
     */
280 3
    public function collapseComprobanteComplemento()
281
    {
282 3
        $comprobante = Xml::documentElement($this->dom());
283 3
        $complementos = $this->xpathQuery('./cfdi:Complemento', $comprobante);
284 3
        if ($complementos->length < 2) {
285 1
            return; // nothing to do, there are less than 2 complemento
286
        }
287 2
        $first = null;
288
        /** @var DOMNode $extra */
289 2
        foreach ($complementos as $extra) { // iterate over all extra children
290 2
            if (null === $first) {
291 2
                $first = $extra;
292 2
                continue;
293
            }
294 2
            $comprobante->removeChild($extra); // remove extra child from parent
295 2
            while ($extra->childNodes->length > 0) { // append extra child contents into first child
296
                /** @var DOMNode $child */
297 2
                $child = $extra->childNodes->item(0);
298 2
                $extra->removeChild($child);
299 2
                $first->appendChild($child);
300
            }
301
        }
302 2
    }
303
304
    /** @return DOMNodeList|DOMAttr[] */
305 3
    private function obtainXsiSchemaLocations(): DOMNodeList
306
    {
307
        // Do not assume that prefix for http://www.w3.org/2001/XMLSchema-instance is "xsi"
308 3
        $xsi = $this->dom()->lookupPrefix('http://www.w3.org/2001/XMLSchema-instance');
309 3
        if (! $xsi) {
310
            return new DOMNodeList();
311
        }
312 3
        return $this->xpathQuery("//@$xsi:schemaLocation");
313
    }
314
315
    /** @return string[] */
316 1
    private function obtainNamespaces(): array
317
    {
318 1
        return array_unique(array_column(iterator_to_array($this->xpathQuery('//namespace::*')), 'nodeValue'));
319
    }
320
321
    /**
322
     * Helper function to perform a XPath query using an element (or root element)
323
     *
324
     * @param string $query
325
     * @param DOMNode|null $element
326
     * @return DOMNodeList
327
     */
328 5
    private function xpathQuery(string $query, DOMNode $element = null): DOMNodeList
329
    {
330 5
        if (null === $element) {
331 3
            $document = $this->dom();
332 3
            $element = Xml::documentElement($document);
333
        } else {
334 3
            $document = Xml::ownerDocument($element);
335
        }
336
        /** @var DOMNodeList|false $nodelist phpstan does not know that query can return false */
337 5
        $nodelist = (new DOMXPath($document))->query($query, $element);
338 5
        if (false === $nodelist) {
339
            $nodelist = new DOMNodeList();
340
        }
341 5
        return $nodelist;
342
    }
343
344 6
    private function dom(): DOMDocument
345
    {
346 6
        if (null === $this->dom) {
347
            throw new \LogicException('No document has been loaded');
348
        }
349 6
        return $this->dom;
350
    }
351
}
352