Passed
Pull Request — master (#33)
by Tim
03:08 queued 01:22
created

XPathFilterTrait   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 184
Duplicated Lines 0 %

Importance

Changes 2
Bugs 0 Features 0
Metric Value
wmc 12
eloc 35
c 2
b 0
f 0
dl 0
loc 184
rs 10

3 Methods

Rating   Name   Duplication   Size   Complexity  
A validAllowedXPathFilter() 0 27 2
A validAllowedXPathAxes() 0 24 5
A validAllowedXPathFunctions() 0 24 5
1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XML\Assert;
6
7
use Exception;
8
use SimpleSAML\Assert\Assert as BaseAssert;
9
use SimpleSAML\Assert\AssertionFailedException;
10
use SimpleSAML\XML\Constants as C;
11
12
use function in_array;
13
use function preg_match_all;
14
use function preg_replace;
15
use function sprintf;
16
17
/**
18
 * @package simplesamlphp/xml-common
19
 */
20
trait XPathFilterTrait
21
{
22
    /**
23
     * Remove the content from all single or double-quoted strings in $input, leaving only quotes.
24
     * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
25
     *
26
     * '/(["\'])(?:(?!\1).)*+\1/'
27
     *  (["\'])  # Match a single or double quote and capture it in group 1
28
     *  (?:      # Start a non-capturing group
29
     *    (?!    # Negative lookahead
30
     *      \1   # Match the same quote as in group 1
31
     *    )      # End of negative lookahead
32
     *    .      # Match any character (that is not a quote, because of the negative lookahead)
33
     *  )*+      # Repeat the non-capturing group zero or more times, possessively
34
     *  \1       # Match the same quote as in group 1
35
     */
36
    private static string $regex_xpfilter_remove_strings = '/(["\'])(?:(?!\1).)*+\1/';
37
38
    /**
39
     * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
40
     * but cannot start or end with a hyphen. To match this, we start with matching one or more
41
     * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
42
     * and then match one or more lower-case alpha characters. This ensures that the function name
43
     * cannot start or end with a hyphen, but can contain one or more hyphens.
44
     * More than one consecutive hyphen does not match.
45
     *
46
     * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
47
     * (           # Start a capturing group
48
     *   [a-z]++   # Match one or more lower-case alpha characters
49
     *   (?>       # Start an atomic group (no capturing)
50
     *     -       # Match a hyphen
51
     *     [a-z]++ # Match one or more lower-case alpha characters, possessively
52
     *   )*+        # Repeat the atomic group zero or more times,
53
     * )           # End of the capturing group
54
     * \s*+        # Match zero or more whitespace characters, possessively
55
     * \(          # Match an opening parenthesis
56
     */
57
    private static string $regex_xpfilter_functions = '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/';
58
59
    /**
60
     * We use the same rules for matching Axis names as we do for function names.
61
     * The only difference is that we match the '::' instead of the '('
62
     * so everything that was said about the regular expression for function names
63
     * applies here as well.
64
     *
65
     * '/([a-z]++(?>-[a-z]++)*+)\s*+::'
66
     * (           # Start a capturing group
67
     *   [a-z]++   # Match one or more lower-case alpha characters
68
     *   (?>       # Start an atomic group (no capturing)
69
     *     -       # Match a hyphen
70
     *     [a-z]++ # Match one or more lower-case alpha characters, possessively
71
     *   )*+       # Repeat the atomic group zero or more times,
72
     * )           # End of the capturing group
73
     * \s*+        # Match zero or more whitespace characters, possessively
74
     * \(          # Match an opening parenthesis
75
     */
76
    private static string $regex_xpfilter_axes = '/([a-z]++(?>-[a-z]++)*+)\\s*+::/';
77
78
79
    /***********************************************************************************
80
     *  NOTE:  Custom assertions may be added below this line.                         *
81
     *         They SHOULD be marked as `private` to ensure the call is forced         *
82
     *          through __callStatic().                                                *
83
     *         Assertions marked `public` are called directly and will                 *
84
     *          not handle any custom exception passed to it.                          *
85
     ***********************************************************************************/
86
87
    /**
88
     * Check an XPath expression for allowed axes and functions
89
     * The goal is preventing DoS attacks by limiting the complexity of the XPath expression by only allowing
90
     * a select subset of functions and axes.
91
     * The check uses a list of allowed functions and axes, and throws an exception when an unknown function
92
     * or axis is found in the $xpath_expression.
93
     *
94
     * Limitations:
95
     * - The implementation is based on regular expressions, and does not employ an XPath 1.0 parser. It may not
96
     *   evaluate all possible valid XPath expressions correctly and cause either false positives for valid
97
     *   expressions or false negatives for invalid expressions.
98
     * - The check may still allow expressions that are not safe, I.e. expressions that consist of only
99
     *   functions and axes that are deemed "save", but that are still slow to evaluate. The time it takes to
100
     *   evaluate an XPath expression depends on the complexity of both the XPath expression and the XML document.
101
     *   This check, however, does not take the XML document into account, nor is it aware of the internals of the
102
     *   XPath processor that will evaluate the expression.
103
     * - The check was written with the XPath 1.0 syntax in mind, but should work equally well for XPath 2.0 and 3.0.
104
     *
105
     * @param string $value
106
     * @param array<string> $allowed_axes
107
     * @param array<string> $allowed_functions
108
     * @param string $message
109
     */
110
    public static function validAllowedXPathFilter(
111
        string $value,
112
        array $allowed_axes = C::DEFAULT_ALLOWED_AXES,
113
        array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS,
114
        string $message = '',
0 ignored issues
show
Unused Code introduced by
The parameter $message is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

114
        /** @scrutinizer ignore-unused */ string $message = '',

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
115
    ): void {
116
        BaseAssert::allString($allowed_axes);
117
        BaseAssert::allString($allowed_functions);
118
        BaseAssert::maxLength(
119
            $value,
120
            C::XPATH_FILTER_MAX_LENGTH,
121
            sprintf('XPath Filter exceeds the limit of 100 characters.'),
122
        );
123
124
        $strippedValue = preg_replace(
125
            self::$regex_xpfilter_remove_strings,
126
            // Replace the content with two of the quotes that were matched
127
            "\\1\\1",
128
            $value,
129
        );
130
131
        if ($strippedValue === null) {
132
            throw new Exception("Error in preg_replace.");
133
        }
134
135
        self::validAllowedXpathFunctions($strippedValue, $allowed_functions);
136
        self::validAllowedXpathAxes($strippedValue, $allowed_axes);
137
    }
138
139
140
    /**
141
     * @param string $value
142
     * @param array<string> $allowed_functions
143
     * @param string $message
144
     */
145
    public static function validAllowedXPathFunctions(
146
        string $value,
147
        array $allowed_functions = C::DEFAULT_ALLOWED_FUNCTIONS,
148
        string $message = '',
149
    ): void {
150
        /**
151
         * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
152
         *
153
         * Look for the function specifier '(' and look for a function name before it.
154
         * Ignoring whitespace before the '(' and the function name.
155
         * All functions must match a string on a list of allowed function names
156
         */
157
        $matches = [];
158
        $res = preg_match_all(self::$regex_xpfilter_functions, $value, $matches);
159
        if ($res === false) {
160
            throw new Exception("Error in preg_match_all.");
161
        }
162
163
        // Check that all the function names we found are in the list of allowed function names
164
        foreach ($matches[1] as $match) {
165
            if (!in_array($match, $allowed_functions)) {
166
                throw new AssertionFailedException(sprintf(
167
                    $message ?: '\'%s\' is not an allowed XPath function.',
168
                    $match,
169
                ));
170
            }
171
        }
172
    }
173
174
175
    /**
176
     * @param string $value
177
     * @param array<string> $allowed_axes
178
     * @param string $message
179
     */
180
    public static function validAllowedXPathAxes(
181
        string $value,
182
        array $allowed_axes = C::DEFAULT_ALLOWED_AXES,
183
        string $message = '',
184
    ): void {
185
        /**
186
         * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes
187
         *
188
         * Look for the axis specifier '::' and look for a function name before it.
189
         * Ignoring whitespace before the '::' and the axis name.
190
         * All axes must match a string on a list of allowed axis names
191
         */
192
        $matches = [];
193
        $res = preg_match_all(self::$regex_xpfilter_axes, $value, $matches);
194
        if ($res === false) {
195
            throw new Exception("Error in preg_match_all.");
196
        }
197
198
        // Check that all the axes names we found are in the list of allowed axes names
199
        foreach ($matches[1] as $match) {
200
            if (!in_array($match, $allowed_axes)) {
201
                throw new AssertionFailedException(sprintf(
202
                    $message ?: '\'%s\' is not an allowed XPath axis.',
203
                    $match,
204
                ));
205
            }
206
        }
207
    }
208
}
209