Passed
Pull Request — master (#33)
by Tim
01:50
created

XPathFilterTrait   A

Complexity

Total Complexity 10

Size/Duplication

Total Lines 157
Duplicated Lines 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
wmc 10
eloc 33
c 2
b 0
f 0
dl 0
loc 157
rs 10

1 Method

Rating   Name   Duplication   Size   Complexity  
B allowedXPathFilter() 0 67 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XML\Assert;
6
7
use Exception;
8
use InvalidArgumentException;
9
use SimpleSAML\Assert\Assert as BaseAssert;
10
use SimpleSAML\Assert\AssertionFailedException;
11
use SimpleSAML\XML\Constants as C;
12
13
use function in_array;
14
use function preg_match_all;
15
use function preg_replace;
16
use function sprintf;
17
18
/**
19
 * @package simplesamlphp/xml-common
20
 */
21
trait XPathFilterTrait
22
{
23
    /**
24
     * Remove the content from all single or double-quoted strings in $input, leaving only quotes.
25
     * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
26
     *
27
     * '/(["\'])(?:(?!\1).)*+\1/'
28
     *  (["\'])  # Match a single or double quote and capture it in group 1
29
     *  (?:      # Start a non-capturing group
30
     *    (?!    # Negative lookahead
31
     *      \1   # Match the same quote as in group 1
32
     *    )      # End of negative lookahead
33
     *    .      # Match any character (that is not a quote, because of the negative lookahead)
34
     *  )*+      # Repeat the non-capturing group zero or more times, possessively
35
     *  \1       # Match the same quote as in group 1
36
     */
37
    private static string $regex_xpfilter_remove_strings = '/(["\'])(?:(?!\1).)*+\1/';
38
39
    /**
40
     * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
41
     * but cannot start or end with a hyphen. To match this, we start with matching one or more
42
     * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
43
     * and then match one or more lower-case alpha characters. This ensures that the function name
44
     * cannot start or end with a hyphen, but can contain one or more hyphens.
45
     * More than one consecutive hyphen does not match.
46
     *
47
     * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
48
     * (           # Start a capturing group
49
     *   [a-z]++   # Match one or more lower-case alpha characters
50
     *   (?>       # Start an atomic group (no capturing)
51
     *     -       # Match a hyphen
52
     *     [a-z]++ # Match one or more lower-case alpha characters, possessively
53
     *   )*+        # Repeat the atomic group zero or more times,
54
     * )           # End of the capturing group
55
     * \s*+        # Match zero or more whitespace characters, possessively
56
     * \(          # Match an opening parenthesis
57
     */
58
    private static string $regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/';
59
60
    /**
61
     * We use the same rules for matching Axis names as we do for function names.
62
     * The only difference is that we match the '::' instead of the '('
63
     * so everything that was said about the regular expression for function names
64
     * applies here as well.
65
     *
66
     * '/([a-z]++(?>-[a-z]++)*+)\s*+::'
67
     * (           # Start a capturing group
68
     *   [a-z]++   # Match one or more lower-case alpha characters
69
     *   (?>       # Start an atomic group (no capturing)
70
     *     -       # Match a hyphen
71
     *     [a-z]++ # Match one or more lower-case alpha characters, possessively
72
     *   )*+       # Repeat the atomic group zero or more times,
73
     * )           # End of the capturing group
74
     * \s*+        # Match zero or more whitespace characters, possessively
75
     * \(          # Match an opening parenthesis
76
     */
77
    private static string $regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+)\\s*+::/';
78
79
80
    /***********************************************************************************
81
     *  NOTE:  Custom assertions may be added below this line.                         *
82
     *         They SHOULD be marked as `private` to ensure the call is forced         *
83
     *          through __callStatic().                                                *
84
     *         Assertions marked `public` are called directly and will                 *
85
     *          not handle any custom exception passed to it.                          *
86
     ***********************************************************************************/
87
88
    /**
89
     * Check an XPath expression for allowed axes and functions
90
     * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing
91
     * a select subset of functions and axes.
92
     * The check uses a list of allowed functions and axes, and throws an exception when an unknown function
93
     * or axis is found in the $xpath_expression.
94
     *
95
     * Limitations:
96
     * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not
97
     *   evaluate all possible valid XPath expressions correctly and cause either false positives for valid
98
     *   expressions or false negatives for invalid expressions.
99
     * - The check may still allow expressions that are not safe, I.e. expressions that consist of only
100
     *   functions and axes that are deemed "save", but that are still slow to evaluate. The time it takes to
101
     *   evaluate an XPath expression depends on the complexity of both the XPath expression and the XML document.
102
     *   This check, however, does not take the XML document into account, nor is it aware of the internals of the
103
     *   XPath processor that will evaluate the expression.
104
     * - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0.
105
     *
106
     * @param string $value
107
     * @param array<string> $allowed_axes
108
     * @param array<string> $allowed_functions
109
     * @param string $message
110
     */
111
    public static function allowedXPathFilter(
112
        string $value,
113
        array $allowed_axes = C::DEFAULT_ALLOWED_AXES,
114
        array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS,
115
        string $message = '',
116
    ): void {
117
        BaseAssert::allString($allowed_axes);
118
        BaseAssert::allString($allowed_functions);
119
        BaseAssert::maxLength(
120
            $value,
121
            C::XPATH_FILTER_MAX_LENGTH,
122
            sprintf('XPath Filter exceeds the limit of 100 characters.'),
123
        );
124
125
        $strippedValue = preg_replace(
126
            self::$regex_xpfilter_remove_strings,
127
            // Replace the content with two of the quotes that were matched
128
            "\\1\\1",
129
            $value,
130
        );
131
132
        if ($strippedValue === null) {
133
            throw new Exception("Error in preg_replace.");
134
        }
135
136
        /**
137
         * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
138
         *
139
         * Look for the function specifier '(' and look for a function name before it.
140
         * Ignoring whitespace before the '(' and the function name.
141
         * All functions must match a string on a list of allowed function names
142
         */
143
        $matches = [];
144
        $res = preg_match_all(self::$regex_xpfilter_functions, $strippedValue, $matches);
145
        if ($res === false) {
146
            throw new Exception("Error in preg_match_all.");
147
        }
148
149
        // Check that all the function names we found are in the list of allowed function names
150
        foreach ($matches[1] as $match) {
151
            if (!in_array($match, $allowed_functions)) {
152
                throw new AssertionFailedException(sprintf(
153
                    $message ?: '\'%s\' is not an allowed XPath function.',
154
                    $match,
155
                ));
156
            }
157
        }
158
159
        /**
160
         * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes
161
         *
162
         * Look for the axis specifier '::' and look for a function name before it.
163
         * Ignoring whitespace before the '::' and the axis name.
164
         * All axes must match a string on a list of allowed axis names
165
         */
166
        $matches = [];
167
        $res = preg_match_all(self::$regex_xpfilter_axes, $strippedValue, $matches);
168
        if ($res === false) {
169
            throw new Exception("Error in preg_match_all.");
170
        }
171
172
        // Check that all the axes names we found are in the list of allowed axes names
173
        foreach ($matches[1] as $match) {
174
            if (!in_array($match, $allowed_axes)) {
175
                throw new AssertionFailedException(sprintf(
176
                    $message ?: '\'%s\' is not an allowed XPath axis.',
177
                    $match,
178
                ));
179
            }
180
        }
181
    }
182
}
183