XPathFilter   A
last analyzed

Complexity

Total Complexity 4

Size/Duplication

Total Lines 138
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 19
dl 0
loc 138
rs 10
c 0
b 0
f 0
wmc 4

3 Methods

Rating   Name   Duplication   Size   Complexity  
A removeStringContents() 0 31 2
A filterXPathAxis() 0 37 1
A filterXPathFunction() 0 41 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XPath;
6
7
use SimpleSAML\XML\Assert\Assert;
8
use SimpleSAML\XPath\Constants as C;
9
use SimpleSAML\XPath\Exception\AxisNotAllowedException;
10
use SimpleSAML\XPath\Exception\FunctionNotAllowedException;
11
use SimpleSAML\XPath\Exception\RuntimeException;
12
13
use function preg_match_all;
14
use function preg_replace;
15
16
/**
17
 * XPathFilter helper functions for the XML library.
18
 *
19
 * @package simplesamlphp/xml-common
20
 */
21
class XPathFilter
22
{
23
    /**
24
     * Remove the content from all single or double-quoted strings in $input, leaving only quotes.
25
     *
26
     * @param string $input
27
     * @return string
28
     * @throws \SimpleSAML\XPath\Exception\RuntimeException
29
     */
30
    public static function removeStringContents(string $input): string
31
    {
32
        /**
33
         * This regex should not be vulnerable to a ReDOS, because it uses possessive quantifiers
34
         * that prevent backtracking.
35
         *
36
         * @see https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS
37
         *
38
         * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
39
         *
40
         * '/(["\'])(?:(?!\1).)*+\1/'
41
         *  (["\'])  # Match a single or double quote and capture it in group 1
42
         *  (?:      # Start a non-capturing group
43
         *    (?!    # Negative lookahead
44
         *      \1   # Match the same quote as in group 1
45
         *    )      # End of negative lookahead
46
         *    .      # Match any character (that is not a quote, because of the negative lookahead)
47
         *  )*+      # Repeat the non-capturing group zero or more times, possessively
48
         *  \1       # Match the same quote as in group 1
49
         */
50
        $res = preg_replace(
51
            '/(["\'])(?:(?!\\1).)*+\\1/',
52
            "\\1\\1",   // Replace the content with two of the quotes that were matched
53
            $input,
54
        );
55
56
        if (null === $res) {
57
            throw new RuntimeException("Error in preg_replace");
58
        }
59
60
        return $res;
61
    }
62
63
64
    /**
65
     * Check if the $xpathExpression uses an XPath function that is not in the list of allowed functions
66
     *
67
     * @param string $xpathExpression the expression to check. Should be a valid xpath expression
68
     * @param string[] $allowedFunctions array of string with a list of allowed function names
69
     * @throws \SimpleSAML\XPath\Exception\RuntimeException
70
     */
71
    public static function filterXPathFunction(
72
        string $xpathExpression,
73
        array $allowedFunctions = C::DEFAULT_ALLOWED_FUNCTIONS,
74
    ): void {
75
        /**
76
         * Look for the function specifier '(' and look for a function name before it.
77
         * Ignoring whitespace before the '(' and the function name.
78
         * All functions must match a string on a list of allowed function names
79
         */
80
        $matches = [];
81
        preg_match_all(
82
            /**
83
             * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
84
             * but cannot start or end with a hyphen. To match this, we start with matching one or more
85
             * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
86
             * and then match one or more lower-case alpha characters. This ensures that the function name
87
             * cannot start or end with a hyphen, but can contain one or more hyphens.
88
             * More than one consecutive hyphen does not match.
89
             *
90
             * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
91
             * and thus prevent a ReDOS.
92
93
             * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
94
             * (           # Start a capturing group
95
             *   [a-z]++   # Match one or more lower-case alpha characters
96
             *   (?>       # Start an atomic group (no capturing)
97
             *     -       # Match a hyphen
98
             *     [a-z]++ # Match one or more lower-case alpha characters, possessively
99
             *   )*+       # Repeat the atomic group zero or more times,
100
             * )           # End of the capturing group
101
             * \s*+        # Match zero or more whitespace characters, possessively
102
             * \(          # Match an opening parenthesis
103
            */
104
105
            '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/',
106
            $xpathExpression,
107
            $matches,
108
        );
109
110
        // Check that all the function names we found are in the list of allowed function names
111
        Assert::allOneOf($matches[1], $allowedFunctions, "Invalid function: %s", FunctionNotAllowedException::class);
112
    }
113
114
115
    /**
116
     * Check if the $xpathExpression uses an XPath axis that is not in the list of allowed axes
117
     *
118
     * @param string $xpathExpression the expression to check. Should be a valid xpath expression
119
     * @param string[] $allowedAxes array of string with a list of allowed axes names
120
     * @throws \SimpleSAML\XPath\Exception\RuntimeException
121
     */
122
    public static function filterXPathAxis(string $xpathExpression, array $allowedAxes): void
123
    {
124
        /**
125
         * Look for the axis specifier '::' and look for a function name before it.
126
         * Ignoring whitespace before the '::' and the axis name.
127
         * All axes must match a string on a list of allowed axis names
128
         */
129
        $matches = [];
130
        preg_match_all(
131
            /**
132
             * We use the same rules for matching Axis names as we do for function names.
133
             * The only difference is that we match the '::' instead of the '('
134
             * so everything that was said about the regular expression for function names
135
             * applies here as well.
136
             *
137
             * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
138
             * and thus prevent a ReDOS.
139
             *
140
             * '/([a-z]++(?>-[a-z]++)*+)\s*+::'
141
             * (           # Start a capturing group
142
             *   [a-z]++   # Match one or more lower-case alpha characters
143
             *   (?>       # Start an atomic group (no capturing)
144
             *     -       # Match a hyphen
145
             *     [a-z]++ # Match one or more lower-case alpha characters, possessively
146
             *   )*+       # Repeat the atomic group zero or more times,
147
             * )           # End of the capturing group
148
             * \s*+        # Match zero or more whitespace characters, possessively
149
             * \(          # Match an opening parenthesis
150
            */
151
152
            '/([a-z]++(?>-[a-z]++)*+)\\s*+::/',
153
            $xpathExpression,
154
            $matches,
155
        );
156
157
        // Check that all the axes names we found are in the list of allowed axes names
158
        Assert::allInArray($matches[1], $allowedAxes, "Invalid axis: %s", AxisNotAllowedException::class);
159
    }
160
}
161