Completed
Push — master ( 6df7b6...2250c0 )
by Tim
24s queued 20s
created

XPathFilter::filterXPathFunction()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 41
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 8
c 1
b 0
f 0
dl 0
loc 41
rs 10
cc 3
nc 3
nop 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\XML\Utils;
6
7
use SimpleSAML\XML\Exception\RuntimeException;
8
9
use function in_array;
10
use function preg_match_all;
11
use function preg_replace;
12
13
/**
14
 * XPathFilter helper functions for the XML library.
15
 *
16
 * @package simplesamlphp/xml-common
17
 */
18
class XPathFilter
19
{
20
    /**
21
     * Remove the content from all single or double-quoted strings in $input, leaving only quotes.
22
     *
23
     * @param string $input
24
     * @return string
25
     * @throws \SimpleSAML\XML\Exception\RuntimeException
26
     */
27
    public static function removeStringContents(string $input): string
28
    {
29
        /**
30
         * This regex should not be vulnerable to a ReDOS, because it uses possessive quantifiers
31
         * that prevent backtracking.
32
         *
33
         * @see https://www.owasp.org/index.php/Regular_expression_Denial_of_Service_-_ReDoS
34
         *
35
         * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking.
36
         *
37
         * '/(["\'])(?:(?!\1).)*+\1/'
38
         *  (["\'])  # Match a single or double quote and capture it in group 1
39
         *  (?:      # Start a non-capturing group
40
         *    (?!    # Negative lookahead
41
         *      \1   # Match the same quote as in group 1
42
         *    )      # End of negative lookahead
43
         *    .      # Match any character (that is not a quote, because of the negative lookahead)
44
         *  )*+      # Repeat the non-capturing group zero or more times, possessively
45
         *  \1       # Match the same quote as in group 1
46
         */
47
        $res = preg_replace(
48
            '/(["\'])(?:(?!\\1).)*+\\1/',
49
            "\\1\\1",   // Replace the content with two of the quotes that were matched
50
            $input,
51
        );
52
53
        if (null === $res) {
54
            throw new RuntimeException("Error in preg_replace");
55
        }
56
57
        return $res;
58
    }
59
60
61
    /**
62
     * Check if the $xpath_expression uses an XPath function that is not in the list of allowed functions
63
     *
64
     * @param string $xpathExpression the expression to check. Should be a valid xpath expression
65
     * @param string[] $allowedFunctions array of string with a list of allowed function names
66
     * @throws \SimpleSAML\XML\Exception\RuntimeException
67
     */
68
    public static function filterXPathFunction(string $xpathExpression, array $allowedFunctions): void
69
    {
70
        /**
71
         * Look for the function specifier '(' and look for a function name before it.
72
         * Ignoring whitespace before the '(' and the function name.
73
         * All functions must match a string on a list of allowed function names
74
         */
75
        $matches = [];
76
        $res = preg_match_all(
0 ignored issues
show
Unused Code introduced by
The assignment to $res is dead and can be removed.
Loading history...
77
            /**
78
             * Function names are lower-case alpha (i.e. [a-z]) and can contain one or more hyphens,
79
             * but cannot start or end with a hyphen. To match this, we start with matching one or more
80
             * lower-case alpha characters, followed by zero or more atomic groups that start with a hyphen
81
             * and then match one or more lower-case alpha characters. This ensures that the function name
82
             * cannot start or end with a hyphen, but can contain one or more hyphens.
83
             * More than one consecutive hyphen does not match.
84
             *
85
             * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
86
             * and thus prevent a ReDOS.
87
88
             * '/([a-z]++(?>-[a-z]++)*+)\s*+\(/'
89
             * (           # Start a capturing group
90
             *   [a-z]++   # Match one or more lower-case alpha characters
91
             *   (?>       # Start an atomic group (no capturing)
92
             *     -       # Match a hyphen
93
             *     [a-z]++ # Match one or more lower-case alpha characters, possessively
94
             *   )*+       # Repeat the atomic group zero or more times,
95
             * )           # End of the capturing group
96
             * \s*+        # Match zero or more whitespace characters, possessively
97
             * \(          # Match an opening parenthesis
98
            */
99
100
            '/([a-z]++(?>-[a-z]++)*+)\\s*+\\(/',
101
            $xpathExpression,
102
            $matches,
103
        );
104
105
        // Check that all the function names we found are in the list of allowed function names
106
        foreach ($matches[1] as $match) {
107
            if (!in_array($match, $allowedFunctions)) {
108
                throw new RuntimeException("Invalid function: '" . $match . "'");
109
            }
110
        }
111
    }
112
113
114
    /**
115
     * Check if the $xpath_expression uses an XPath axis that is not in the list of allowed axes
116
     *
117
     * @param string $xpathExpression the expression to check. Should be a valid xpath expression
118
     * @param string[] $allowedAxes array of string with a list of allowed axes names
119
     * @throws \SimpleSAML\XML\Exception\RuntimeException
120
     */
121
    public static function filterXPathAxis(string $xpathExpression, array $allowedAxes): void
122
    {
123
        /**
124
         * Look for the axis specifier '::' and look for a function name before it.
125
         * Ignoring whitespace before the '::' and the axis name.
126
         * All axes must match a string on a list of allowed axis names
127
         */
128
        $matches = [];
129
        $res = preg_match_all(
0 ignored issues
show
Unused Code introduced by
The assignment to $res is dead and can be removed.
Loading history...
130
            /**
131
             * We use the same rules for matching Axis names as we do for function names.
132
             * The only difference is that we match the '::' instead of the '('
133
             * so everything that was said about the regular expression for function names
134
             * applies here as well.
135
             *
136
             * Use possessive quantifiers (i.e. *+ and ++ instead of * and + respectively) to prevent backtracking
137
             * and thus prevent a ReDOS.
138
             *
139
             * '/([a-z]++(?>-[a-z]++)*+)\s*+::'
140
             * (           # Start a capturing group
141
             *   [a-z]++   # Match one or more lower-case alpha characters
142
             *   (?>       # Start an atomic group (no capturing)
143
             *     -       # Match a hyphen
144
             *     [a-z]++ # Match one or more lower-case alpha characters, possessively
145
             *   )*+       # Repeat the atomic group zero or more times,
146
             * )           # End of the capturing group
147
             * \s*+        # Match zero or more whitespace characters, possessively
148
             * \(          # Match an opening parenthesis
149
            */
150
151
            '/([a-z]++(?>-[a-z]++)*+)\\s*+::/',
152
            $xpathExpression,
153
            $matches,
154
        );
155
156
        // Check that all the axes names we found are in the list of allowed axes names
157
        foreach ($matches[1] as $match) {
158
            if (!in_array($match, $allowedAxes)) {
159
                throw new RuntimeException("Invalid axis: '" . $match . "'");
160
            }
161
        }
162
    }
163
}
164