Passed
Branch feature/php8.3 (4d3b0a)
by Tim
17:15
created

DOMDocumentFactory::normalizeDocument()   C

Complexity

Conditions 12
Paths 85

Size

Total Lines 45
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 12
eloc 21
nc 85
nop 1
dl 0
loc 45
rs 6.9666
c 0
b 0
f 0

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XML;
6
7
use DOMDocument;
8
use DOMElement;
9
use SimpleSAML\XML\Assert\Assert;
10
use SimpleSAML\XML\Exception\IOException;
11
use SimpleSAML\XML\Exception\RuntimeException;
12
use SimpleSAML\XML\Exception\UnparseableXMLException;
13
use SimpleSAML\XPath\XPath;
14
15
use function file_get_contents;
16
use function func_num_args;
17
use function libxml_clear_errors;
18
use function libxml_set_external_entity_loader;
19
use function libxml_use_internal_errors;
20
use function sprintf;
21
use function strpos;
22
23
/**
24
 * @package simplesamlphp/xml-common
25
 */
26
final class DOMDocumentFactory
27
{
28
    /**
29
     * @var non-negative-int
30
     * TODO: Add LIBXML_NO_XXE to the defaults when PHP 8.4.0 + libxml 2.13.0 become generally available
31
     */
32
    public const int DEFAULT_OPTIONS = \LIBXML_COMPACT | \LIBXML_NONET | \LIBXML_NSCLEAN;
0 ignored issues
show
Bug introduced by
A parse error occurred: Syntax error, unexpected T_STRING, expecting '=' on line 32 at column 21
Loading history...
33
34
35
    /**
36
     * @param string $xml
37
     * @param non-negative-int $options
38
     */
39
    public static function fromString(
40
        string $xml,
41
        int $options = self::DEFAULT_OPTIONS,
42
    ): DOMDocument {
43
        libxml_set_external_entity_loader(null);
44
        Assert::notWhitespaceOnly($xml);
45
        Assert::notRegex(
46
            $xml,
47
            '/<(\s*)!(\s*)DOCTYPE/',
48
            'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body',
49
            RuntimeException::class,
50
        );
51
52
        $internalErrors = libxml_use_internal_errors(true);
53
        libxml_clear_errors();
54
55
        // If LIBXML_NO_XXE is available and option not set
56
        if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) {
57
            $options |= \LIBXML_NO_XXE;
58
        }
59
60
        $domDocument = self::create();
61
        $loaded = $domDocument->loadXML($xml, $options);
62
63
        libxml_use_internal_errors($internalErrors);
64
65
        if (!$loaded) {
66
            $error = libxml_get_last_error();
67
            libxml_clear_errors();
68
69
            throw new UnparseableXMLException($error);
70
        }
71
72
        libxml_clear_errors();
73
74
        foreach ($domDocument->childNodes as $child) {
75
            Assert::false(
76
                $child->nodeType === \XML_DOCUMENT_TYPE_NODE,
77
                'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body',
78
                RuntimeException::class,
79
            );
80
        }
81
82
        return $domDocument;
83
    }
84
85
86
    /**
87
     * @param string $file
88
     * @param non-negative-int $options
89
     */
90
    public static function fromFile(
91
        string $file,
92
        int $options = self::DEFAULT_OPTIONS,
93
    ): DOMDocument {
94
        error_clear_last();
95
        $xml = @file_get_contents($file);
96
        if ($xml === false) {
97
            $e = error_get_last();
98
            $error = $e['message'] ?? "Check that the file exists and can be read.";
99
100
            throw new IOException("File '$file' was not loaded;  $error");
101
        }
102
103
        Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class);
104
        return (func_num_args() < 2) ? static::fromString($xml) : static::fromString($xml, $options);
105
    }
106
107
108
    /**
109
     * @param string $version
110
     * @param string $encoding
111
     */
112
    public static function create(string $version = '1.0', string $encoding = 'UTF-8'): DOMDocument
113
    {
114
        return new DOMDocument($version, $encoding);
115
    }
116
117
118
    /**
119
     * @param \DOMDocument $doc
120
     */
121
    public static function normalizeDocument(DOMDocument $doc): DOMDocument
122
    {
123
        // Get the root element
124
        $root = $doc->documentElement;
125
126
        // Collect all xmlns attributes from the document
127
        $xpath = XPath::getXPath($doc);
128
        $xmlnsAttributes = [];
129
130
        // Register all namespaces to ensure XPath can handle them
131
        foreach ($xpath->query('//namespace::*') as $node) {
132
            $name = $node->nodeName === 'xmlns' ? 'xmlns' : $node->nodeName;
133
            if ($name !== 'xmlns:xml') {
134
                $xmlnsAttributes[$name] = $node->nodeValue;
135
            }
136
        }
137
138
        // If no xmlns attributes found, return early with debug info
139
        if (empty($xmlnsAttributes)) {
140
            return $root->ownerDocument;
141
        }
142
143
        // Remove xmlns attributes from all elements
144
        $nodes = $xpath->query('//*[namespace::*]');
145
        foreach ($nodes as $node) {
146
            if ($node instanceof DOMElement) {
147
                $attributesToRemove = [];
148
                foreach ($node->attributes as $attr) {
149
                    if (strpos($attr->nodeName, 'xmlns') === 0 || $attr->nodeName === 'xmlns') {
150
                        $attributesToRemove[] = $attr->nodeName;
151
                    }
152
                }
153
                foreach ($attributesToRemove as $attrName) {
154
                    $node->removeAttribute($attrName);
155
                }
156
            }
157
        }
158
159
        // Add all collected xmlns attributes to the root element
160
        foreach ($xmlnsAttributes as $name => $value) {
161
            $root->setAttribute($name, $value);
162
        }
163
164
        // Return the normalized XML
165
        return static::fromString($root->ownerDocument->saveXML());
166
    }
167
168
169
    /**
170
     * @param \DOMElement $elt
171
     * @param string $prefix
172
     */
173
    public static function lookupNamespaceURI(DOMElement $elt, string $prefix): ?string
174
    {
175
        // Collect all xmlns attributes from the document
176
        $xpath = XPath::getXPath($elt->ownerDocument);
177
178
        // Register all namespaces to ensure XPath can handle them
179
        $xmlnsAttributes = [];
180
        foreach ($xpath->query('//namespace::*') as $node) {
181
            $xmlnsAttributes[$node->localName] = $node->nodeValue;
182
        }
183
184
        if (array_key_exists($prefix, $xmlnsAttributes)) {
185
            return $xmlnsAttributes[$prefix];
186
        }
187
188
        return null;
189
    }
190
}
191