Issues (21)

src/DOMDocumentFactory.php (3 issues)

1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XML;
6
7
use DOMDocument;
8
use SimpleSAML\Assert\Assert;
9
use SimpleSAML\XML\Exception\{IOException, RuntimeException, UnparseableXMLException};
10
11
use function file_get_contents;
12
use function func_num_args;
13
use function libxml_clear_errors;
14
use function libxml_set_external_entity_loader;
15
use function libxml_use_internal_errors;
16
use function sprintf;
17
18
/**
19
 * @package simplesamlphp/xml-common
20
 */
21
final class DOMDocumentFactory
22
{
23
    /**
24
     * @var non-negative-int
25
     * TODO: Add LIBXML_NO_XXE to the defaults when PHP 8.4.0 + libxml 2.13.0 become generally available
26
     */
27
    public const DEFAULT_OPTIONS = \LIBXML_COMPACT | \LIBXML_NONET | \LIBXML_NSCLEAN;
28
29
30
    /**
31
     * @param string $xml
32
     * @param non-negative-int $options
0 ignored issues
show
Documentation Bug introduced by
The doc comment non-negative-int at position 0 could not be parsed: Unknown type name 'non-negative-int' at position 0 in non-negative-int.
Loading history...
33
     *
34
     * @return \DOMDocument
35
     */
36
    public static function fromString(
37
        string $xml,
38
        int $options = self::DEFAULT_OPTIONS,
39
    ): DOMDocument {
40
        libxml_set_external_entity_loader(null);
41
        Assert::notWhitespaceOnly($xml);
42
        Assert::notRegex(
43
            $xml,
44
            '/<(\s*)!(\s*)DOCTYPE/',
45
            'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body',
46
            RuntimeException::class,
47
        );
48
49
        $internalErrors = libxml_use_internal_errors(true);
50
        libxml_clear_errors();
51
52
        // If LIBXML_NO_XXE is available and option not set
53
        if (func_num_args() === 1 && defined('LIBXML_NO_XXE')) {
54
            $options |= \LIBXML_NO_XXE;
0 ignored issues
show
The constant LIBXML_NO_XXE was not found. Maybe you did not declare it correctly or list all dependencies?
Loading history...
55
        }
56
57
        $domDocument = self::create();
58
        $loaded = $domDocument->loadXML($xml, $options);
59
60
        libxml_use_internal_errors($internalErrors);
61
62
        if (!$loaded) {
63
            $error = libxml_get_last_error();
64
            libxml_clear_errors();
65
66
            throw new UnparseableXMLException($error);
67
        }
68
69
        libxml_clear_errors();
70
71
        foreach ($domDocument->childNodes as $child) {
72
            Assert::false(
73
                $child->nodeType === \XML_DOCUMENT_TYPE_NODE,
74
                'Dangerous XML detected, DOCTYPE nodes are not allowed in the XML body',
75
                RuntimeException::class,
76
            );
77
        }
78
79
        return $domDocument;
80
    }
81
82
83
    /**
84
     * @param string $file
85
     * @param non-negative-int $options
0 ignored issues
show
Documentation Bug introduced by
The doc comment non-negative-int at position 0 could not be parsed: Unknown type name 'non-negative-int' at position 0 in non-negative-int.
Loading history...
86
     *
87
     * @return \DOMDocument
88
     */
89
    public static function fromFile(
90
        string $file,
91
        int $options = self::DEFAULT_OPTIONS,
92
    ): DOMDocument {
93
        error_clear_last();
94
        $xml = @file_get_contents($file);
95
        if ($xml === false) {
96
            $e = error_get_last();
97
            $error = $e['message'] ?? "Check that the file exists and can be read.";
98
99
            throw new IOException("File '$file' was not loaded;  $error");
100
        }
101
102
        Assert::notWhitespaceOnly($xml, sprintf('File "%s" does not have content', $file), RuntimeException::class);
103
        return (func_num_args() < 2) ? static::fromString($xml) : static::fromString($xml, $options);
104
    }
105
106
107
    /**
108
     * @param string $version
109
     * @param string $encoding
110
     * @return \DOMDocument
111
     */
112
    public static function create(string $version = '1.0', string $encoding = 'UTF-8'): DOMDocument
113
    {
114
        return new DOMDocument($version, $encoding);
115
    }
116
}
117