Passed
Push — master ( 542827...7d7dc3 )
by Tim
01:52
created

DOMDocumentFactory::fromString()   B

Complexity

Conditions 6
Paths 12

Size

Total Lines 50
Code Lines 27

Duplication

Lines 0
Ratio 0 %

Importance

Changes 11
Bugs 0 Features 1
Metric Value
eloc 27
c 11
b 0
f 1
dl 0
loc 50
rs 8.8657
cc 6
nc 12
nop 3
1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XML;
6
7
use DOMDocument;
8
use Exception;
9
use LibXMLError;
10
use SimpleSAML\Assert\Assert;
11
use SimpleSAML\XML\Exception\IOException;
12
use SimpleSAML\XML\Exception\RuntimeException;
13
use SimpleSAML\XML\Exception\SchemaViolationException;
14
use SimpleSAML\XML\Exception\UnparseableXMLException;
15
use XMLReader;
16
17
use function array_unique;
18
use function file_get_contents;
19
use function func_num_args;
20
use function implode;
21
use function libxml_clear_errors;
22
use function libxml_get_last_error;
23
use function libxml_set_external_entity_loader;
24
use function libxml_use_internal_errors;
25
use function sprintf;
26
use function trim;
27
28
/**
29
 * @package simplesamlphp/xml-common
30
 */
31
final class DOMDocumentFactory
32
{
33
    /**
34
     * @var non-negative-int
35
     * TODO: Add LIBXML_NO_XXE to the defaults when PHP 8.4.0 + libxml 2.13.0 become generally available
36
     */
37
    public const DEFAULT_OPTIONS = LIBXML_COMPACT | LIBXML_NONET | LIBXML_NSCLEAN;
38
39
40
    /**
41
     * @param string $xml
42
     * @param string|null $schemaFile
43
     * @param non-negative-int $options
0 ignored issues
show
Documentation Bug introduced by
The doc comment non-negative-int at position 0 could not be parsed: Unknown type name 'non-negative-int' at position 0 in non-negative-int.
Loading history...
44
     *
45
     * @return \DOMDocument
46
     */
47
    public static function fromString(
48
        string $xml,
49
        ?string $schemaFile = null,
50
        int $options = self::DEFAULT_OPTIONS,
51
    ): DOMDocument {
52
        libxml_set_external_entity_loader(null);
53
        Assert::notWhitespaceOnly($xml);
54
        Assert::notRegex(
55
            $xml,
56
            '/<(\s*)!(\s*)DOCTYPE/',
57
            'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body',
58
            RuntimeException::class,
59
        );
60
61
        $internalErrors = libxml_use_internal_errors(true);
62
        libxml_clear_errors();
63
64
        // If LIBXML_NO_XXE is available and option not set
65
        if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) {
66
            $options |= LIBXML_NO_XXE;
0 ignored issues
show
Bug introduced by
The constant SimpleSAML\XML\LIBXML_NO_XXE was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
67
        }
68
69
        // Perform optional schema validation
70
        if (!empty($schemaFile)) {
71
            self::schemaValidation($xml, $schemaFile, $options);
72
        }
73
74
        $domDocument = self::create();
75
        $loaded = $domDocument->loadXML($xml, $options);
76
77
        libxml_use_internal_errors($internalErrors);
78
79
        if (!$loaded) {
80
            $error = libxml_get_last_error();
81
            libxml_clear_errors();
82
83
            throw new UnparseableXMLException($error);
84
        }
85
86
        libxml_clear_errors();
87
88
        foreach ($domDocument->childNodes as $child) {
89
            Assert::false(
90
                $child->nodeType === XML_DOCUMENT_TYPE_NODE,
91
                'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body',
92
                RuntimeException::class,
93
            );
94
        }
95
96
        return $domDocument;
97
    }
98
99
100
    /**
101
     * @param string $file
102
     * @param string|null $schemaFile
103
     * @param non-negative-int $options
0 ignored issues
show
Documentation Bug introduced by
The doc comment non-negative-int at position 0 could not be parsed: Unknown type name 'non-negative-int' at position 0 in non-negative-int.
Loading history...
104
     *
105
     * @return \DOMDocument
106
     */
107
    public static function fromFile(
108
        string $file,
109
        ?string $schemaFile = null,
110
        int $options = self::DEFAULT_OPTIONS,
111
    ): DOMDocument {
112
        error_clear_last();
113
        $xml = @file_get_contents($file);
114
        if ($xml === false) {
115
            $e = error_get_last();
116
            $error = $e['message'] ?? "Check that the file exists and can be read.";
117
118
            throw new IOException("File '$file' was not loaded;  $error");
119
        }
120
121
        Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class);
122
        return (func_num_args() < 3)
123
            ? static::fromString($xml, $schemaFile)
124
            : static::fromString($xml, $schemaFile, $options);
125
    }
126
127
128
    /**
129
     * @param string $version
130
     * @param string $encoding
131
     * @return \DOMDocument
132
     */
133
    public static function create(string $version = '1.0', string $encoding = 'UTF-8'): DOMDocument
134
    {
135
        return new DOMDocument($version, $encoding);
136
    }
137
138
139
    /**
140
     * Validate an XML-string against a given schema.
141
     *
142
     * @param string $xml
143
     * @param string $schemaFile
144
     * @param int $options
145
     *
146
     * @throws \SimpleSAML\XML\Exception\SchemaViolationException when validation fails.
147
     */
148
    public static function schemaValidation(
149
        string $xml,
150
        string $schemaFile,
151
        int $options = self::DEFAULT_OPTIONS,
152
    ): void {
153
        $xmlReader = XMLReader::XML($xml, null, $options);
154
        Assert::notFalse($xmlReader, SchemaViolationException::class);
155
156
        libxml_use_internal_errors(true);
157
158
        try {
159
            $xmlReader->setSchema($schemaFile);
160
        } catch (Exception) {
161
            $err = libxml_get_last_error();
162
            throw new SchemaViolationException(trim($err->message) . ' on line ' . $err->line);
163
        }
164
165
        $msgs = [];
166
        while ($xmlReader->read()) {
167
            if (!$xmlReader->isValid()) {
168
                $err = libxml_get_last_error();
169
                if ($err instanceof LibXMLError) {
170
                    $msgs[] = trim($err->message) . ' on line ' . $err->line;
171
                }
172
            }
173
        }
174
175
        if ($msgs) {
176
            throw new SchemaViolationException(sprintf(
177
                "XML schema validation errors:\n - %s",
178
                implode("\n - ", array_unique($msgs)),
179
            ));
180
        }
181
    }
182
}
183