Passed
Push — master ( 64aed6...6f25d9 )
by Tim
12:28
created

XPathFilter::filterXPathAxis()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 37
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 6
nc 1
nop 2
dl 0
loc 37
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XPath;
6
7
use SimpleSAML\XML\Assert\Assert;
8
use SimpleSAML\XPath\Constants as C;
9
use SimpleSAML\XPath\Exception\{
10
    AxisNotAllowedException,
11
    FunctionNotAllowedException,
12
    RuntimeException,
13
};
14
15
use function preg_match_all;
16
use function preg_replace;
17
18
/**
19
 * XPathFilter helper functions for the XML library.
20
 *
21
 * @package simplesamlphp/xml-common
22
 */
23
class XPathFilter
24
{
25
    /**
26
     * Remove the content from all single or double-quoted strings in $input, leaving only quotes.
27
     *
28
     * @param string $input
29
     * @return string
30
     * @throws \SimpleSAML\XPath\Exception\RuntimeException
31
     */
32
    public static function removeStringContents(string $input): string
33
    {
34
        /**
35
         * This regex should not be vulnerable to a ReDOS, because it uses possessive quantifiers
36
         * that prevent backtracking.
37
         *
38
         * @see https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS
39
         *
40
         * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
41
         *
42
         * '/(["\'])(?:(?!\1).)*+\1/'
43
         *  (["\'])  # Match a single or double quote and capture it in group 1
44
         *  (?:      # Start a non-capturing group
45
         *    (?!    # Negative lookahead
46
         *      \1   # Match the same quote as in group 1
47
         *    )      # End of negative lookahead
48
         *    .      # Match any character (that is not a quote, because of the negative lookahead)
49
         *  )*+      # Repeat the non-capturing group zero or more times, possessively
50
         *  \1       # Match the same quote as in group 1
51
         */
52
        $res = preg_replace(
53
            '/(["\'])(?:(?!\\1).)*+\\1/',
54
            "\\1\\1",   // Replace the content with two of the quotes that were matched
55
            $input,
56
        );
57
58
        if (null === $res) {
59
            throw new RuntimeException("Error in preg_replace");
60
        }
61
62
        return $res;
63
    }
64
65
66
    /**
67
     * Check if the $xpathExpression uses an XPath function that is not in the list of allowed functions
68
     *
69
     * @param string $xpathExpression the expression to check. Should be a valid xpath expression
70
     * @param string[] $allowedFunctions array of string with a list of allowed function names
71
     * @throws \SimpleSAML\XPath\Exception\RuntimeException
72
     */
73
    public static function filterXPathFunction(
74
        string $xpathExpression,
75
        array $allowedFunctions = C::DEFAULT_ALLOWED_FUNCTIONS,
76
    ): void {
77
        /**
78
         * Look for the function specifier '(' and look for a function name before it.
79
         * Ignoring whitespace before the '(' and the function name.
80
         * All functions must match a string on a list of allowed function names
81
         */
82
        $matches = [];
83
        preg_match_all(
84
            /**
85
             * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
86
             * but cannot start or end with a hyphen. To match this, we start with matching one or more
87
             * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
88
             * and then match one or more lower-case alpha characters. This ensures that the function name
89
             * cannot start or end with a hyphen, but can contain one or more hyphens.
90
             * More than one consecutive hyphen does not match.
91
             *
92
             * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
93
             * and thus prevent a ReDOS.
94
95
             * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
96
             * (           # Start a capturing group
97
             *   [a-z]++   # Match one or more lower-case alpha characters
98
             *   (?>       # Start an atomic group (no capturing)
99
             *     -       # Match a hyphen
100
             *     [a-z]++ # Match one or more lower-case alpha characters, possessively
101
             *   )*+       # Repeat the atomic group zero or more times,
102
             * )           # End of the capturing group
103
             * \s*+        # Match zero or more whitespace characters, possessively
104
             * \(          # Match an opening parenthesis
105
            */
106
107
            '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/',
108
            $xpathExpression,
109
            $matches,
110
        );
111
112
        // Check that all the function names we found are in the list of allowed function names
113
        Assert::allOneOf($matches[1], $allowedFunctions, "Invalid function: %s", FunctionNotAllowedException::class);
114
    }
115
116
117
    /**
118
     * Check if the $xpathExpression uses an XPath axis that is not in the list of allowed axes
119
     *
120
     * @param string $xpathExpression the expression to check. Should be a valid xpath expression
121
     * @param string[] $allowedAxes array of string with a list of allowed axes names
122
     * @throws \SimpleSAML\XPath\Exception\RuntimeException
123
     */
124
    public static function filterXPathAxis(string $xpathExpression, array $allowedAxes): void
125
    {
126
        /**
127
         * Look for the axis specifier '::' and look for a function name before it.
128
         * Ignoring whitespace before the '::' and the axis name.
129
         * All axes must match a string on a list of allowed axis names
130
         */
131
        $matches = [];
132
        preg_match_all(
133
            /**
134
             * We use the same rules for matching Axis names as we do for function names.
135
             * The only difference is that we match the '::' instead of the '('
136
             * so everything that was said about the regular expression for function names
137
             * applies here as well.
138
             *
139
             * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
140
             * and thus prevent a ReDOS.
141
             *
142
             * '/([a-z]++(?>-[a-z]++)*+)\s*+::'
143
             * (           # Start a capturing group
144
             *   [a-z]++   # Match one or more lower-case alpha characters
145
             *   (?>       # Start an atomic group (no capturing)
146
             *     -       # Match a hyphen
147
             *     [a-z]++ # Match one or more lower-case alpha characters, possessively
148
             *   )*+       # Repeat the atomic group zero or more times,
149
             * )           # End of the capturing group
150
             * \s*+        # Match zero or more whitespace characters, possessively
151
             * \(          # Match an opening parenthesis
152
            */
153
154
            '/([a-z]++(?>-[a-z]++)*+)\\s*+::/',
155
            $xpathExpression,
156
            $matches,
157
        );
158
159
        // Check that all the axes names we found are in the list of allowed axes names
160
        Assert::allInArray($matches[1], $allowedAxes, "Invalid axis: %s", AxisNotAllowedException::class);
161
    }
162
}
163