Parser::tokenize()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 14
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 10
dl 0
loc 14
c 0
b 0
f 0
ccs 2
cts 2
cp 1
rs 9.9332
nc 2
nop 2
cc 2
crap 2
1
<?php
2
declare(strict_types=1);
3
4
namespace Lead\Router;
5
6
use Lead\Router\Exception\ParserException;
7
8
/**
9
 * Parses route pattern.
10
 *
11
 * The parser can produce a tokens structure from route pattern using `Parser::tokenize()`.
12
 * A tokens structure root node is of the following form:
13
 *
14
 * ```php
15
 * $token = Parser::tokenize('/test/{param}');
16
 * ```
17
 *
18
 * The returned `$token` looks like the following:
19
 * ```
20
 * [
21
 *     'optional' => false,
22
 *     'greedy'   => '',
23
 *     'repeat'   => false,
24
 *     'pattern'  => '/test/{param}',
25
 *     'tokens'   => [
26
 *         '/test/',
27
 *         [
28
 *             'name'      => 'param',
29
 *             'pattern'   => '[^/]+'
30
 *         ]
31
 *     ]
32
 * ]
33
 * ```
34
 *
35
 * Then tokens structures can be compiled to get the regex representation with associated variable.
36
 *
37
 * ```php
38
 * $rule = Parser::compile($token);
39
 * ```
40
 *
41
 * `$rule` looks like the following:
42
 *
43
 * ```
44
 * [
45
 *     '/test/([^/]+)',
46
 *     ['param' => false]
47
 * ]
48
 * ```
49
 */
50
class Parser implements ParserInterface
51
{
52
53
    /**
54
     * Variable capturing block regex.
55
     */
56
    const PLACEHOLDER_REGEX = <<<EOD
57
\{
58
    (
59
        [a-zA-Z][a-zA-Z0-9_]*
60
    )
61
    (?:
62
        :(
63
            [^{}]*
64
            (?:
65
                \{(?-1)\}[^{}]*
66
            )*
67
        )
68
    )?
69
\}
70
EOD;
71
72
    /**
73
     * Tokenizes a route pattern. Optional segments are identified by square brackets.
74
     *
75
     * @param string $pattern A route pattern
76
     * @param string $delimiter The path delimiter.
77
     * @param array The tokens structure root node.
0 ignored issues
show
Bug introduced by
The type Lead\Router\The was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
78
     * @return array
79
     */
80
    public static function tokenize(string $pattern, string $delimiter = '/'): array
81
    {
82
        // Checks if the pattern has some optional segments.
83
        if (count(preg_split('~' . static::PLACEHOLDER_REGEX . '(*SKIP)(*F)|\[~x', $pattern)) > 1) {
0 ignored issues
show
Bug introduced by
It seems like preg_split('~' . static:...P)(*F)|\[~x', $pattern) can also be of type false; however, parameter $var of count() does only seem to accept Countable|array, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

83
        if (count(/** @scrutinizer ignore-type */ preg_split('~' . static::PLACEHOLDER_REGEX . '(*SKIP)(*F)|\[~x', $pattern)) > 1) {
Loading history...
84 33
            $tokens = static::_tokenizePattern($pattern, $delimiter);
85
        } else {
86 57
            $tokens = static::_tokenizeSegment($pattern, $delimiter);
87
        }
88
        return [
89
            'optional' => false,
90
            'greedy'   => '',
91
            'repeat'   => false,
92
            'pattern'  => $pattern,
93
            'tokens'   => $tokens
94 86
        ];
95
    }
96
97
    /**
98
     * Tokenizes patterns.
99
     *
100
     * @param string $pattern   A route pattern
101
     * @param string $delimiter The path delimiter.
102
     * @param array An array of tokens structure.
0 ignored issues
show
Bug introduced by
The type Lead\Router\An was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
103
     * @return array
104
     */
105
    protected static function _tokenizePattern(string $pattern, string $delimiter, &$variable = null): array
106
    {
107 33
        $tokens = [];
108 33
        $index = 0;
0 ignored issues
show
Unused Code introduced by
The assignment to $index is dead and can be removed.
Loading history...
109 33
        $path = '';
0 ignored issues
show
Unused Code introduced by
The assignment to $path is dead and can be removed.
Loading history...
110 33
        $parts = static::split($pattern);
111
112
        foreach ($parts as $part) {
113
            if (is_string($part)) {
114 32
                $tokens = array_merge($tokens, static::_tokenizeSegment($part, $delimiter, $variable));
115 32
                continue;
116
            }
117
118 32
            $greedy = $part[1];
119 32
            $repeat = $greedy === '+' || $greedy === '*';
120 32
            $optional = $greedy === '?' || $greedy === '*';
121
122 32
            $children = static::_tokenizePattern($part[0], $delimiter, $variable);
123
124
            $tokens[] = [
125
                'optional' => $optional,
126
                'greedy'   => $greedy ?: '?',
127
                'repeat'   => $repeat ? $variable : false,
128
                'pattern'  => $part[0],
129
                'tokens'   => $children
130 32
            ];
131
132
        }
133 32
        return $tokens;
134
    }
135
136
    /**
137
     * Tokenizes segments which are patterns with optional segments filtered out.
138
     * Only classic placeholder are supported.
139
     *
140
     * @param string $pattern   A route pattern with no optional segments.
141
     * @param string $delimiter The path delimiter.
142
     * @param array An array of tokens structure.
143
     * @return array
144
     */
145
    protected static function _tokenizeSegment($pattern, $delimiter, &$variable = null): array
146
    {
147 86
        $tokens = [];
148 86
        $index = 0;
149 86
        $path = '';
150
151
        if (preg_match_all('~' . static::PLACEHOLDER_REGEX . '()~x', $pattern, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
152
            foreach ($matches as $match) {
153 64
                $offset = $match[0][1];
154
155 64
                $path .= substr($pattern, $index, $offset - $index);
156 64
                $index = $offset + strlen($match[0][0]);
157
158
                if ($path) {
159 61
                    $tokens[] = $path;
160 61
                    $path = '';
161
                }
162
163 64
                $variable = $match[1][0];
164 64
                $capture = $match[2][0] ?: '[^' . $delimiter . ']+';
165
166
                $tokens[] = [
167
                    'name'      => $variable,
168
                    'pattern'   => $capture
169 64
                ];
170
            }
171
        }
172
173
        if ($index < strlen($pattern)) {
174 62
            $path .= substr($pattern, $index);
175
            if ($path) {
176 62
                $tokens[] = $path;
177
            }
178
        }
179 86
        return $tokens;
180
    }
181
182
    /**
183
     * Splits a pattern in segments and patterns.
184
     *
185
     * segments will be represented by string value and patterns by an array containing
186
     * the string pattern as first value and the greedy value as second value.
187
     *
188
     * example:
189
     * `/user[/{id}]*` will gives `['/user', ['id', '*']]`
190
     *
191
     * Unfortunately recursive regex matcher can't help here so this function is required.
192
     *
193
     * @param string $pattern A route pattern.
194
     * @param array The split  pattern.
195
     * @return array
196
     */
197
    public static function split(string $pattern): array
198
    {
199 33
        $segments = [];
200 33
        $len = strlen($pattern);
201 33
        $buffer = '';
202 33
        $opened = 0;
203 33
        for ($i = 0; $i < $len; $i++) {
204
            if ($pattern[$i] === '{') {
205
                do {
206 29
                    $buffer .= $pattern[$i++];
207
                    if ($pattern[$i] === '}') {
208 29
                        $buffer .= $pattern[$i];
209 29
                        break;
210
                    }
211 29
                } while ($i < $len);
212
            } elseif ($pattern[$i] === '[') {
213 33
                $opened++;
214
                if ($opened === 1) {
215 33
                    $segments[] = $buffer;
216 33
                    $buffer = '';
217
                } else {
218 33
                    $buffer .= $pattern[$i];
219
                }
220
            } elseif ($pattern[$i] === ']') {
221 33
                $opened--;
222
                if ($opened === 0) {
223 32
                    $greedy = '?';
224
                    if ($i < $len -1) {
225
                        if ($pattern[$i + 1] === '*' || $pattern[$i + 1] === '+') {
226 13
                            $greedy = $pattern[$i + 1];
227 13
                            $i++;
228
                        }
229
                    }
230 32
                    $segments[] = [$buffer, $greedy];
231 32
                    $buffer = '';
232
                } else {
233 33
                    $buffer .= $pattern[$i];
234
                }
235
            } else {
236 33
                $buffer .= $pattern[$i];
237
            }
238
        }
239
        if ($buffer) {
240 33
            $segments[] = $buffer;
241
        }
242
        if ($opened) {
243 1
            throw ParserException::squareBracketMismatch();
244
        }
245
246 32
        return $segments;
247
    }
248
249
    /**
250
     * Builds a regex from a tokens structure array.
251
     *
252
     * @param array $token A tokens structure root node.
253
     * @return array An array containing the regex pattern and its associated variable names.
254
     */
255
    public static function compile($token): array
256
    {
257 54
        $variables = [];
258 54
        $regex = '';
259
        foreach ($token['tokens'] as $child) {
260
            if (is_string($child)) {
261 54
                $regex .= preg_quote($child, '~');
262
            } elseif (isset($child['tokens'])) {
263 17
                $rule = static::compile($child);
264
                if ($child['repeat']) {
265
                    if (count($rule[1]) > 1) {
266 1
                        throw ParserException::placeholderExceeded();
267
                    }
268 6
                    $regex .= '((?:' . $rule[0] . ')' . $child['greedy'] . ')';
269
                } elseif ($child['optional']) {
270 14
                    $regex .= '(?:' . $rule[0] . ')?';
271
                }
272
                foreach ($rule[1] as $name => $pattern) {
273
                    if (isset($variables[$name])) {
274 1
                        throw ParserException::duplicatePlaceholder($name);
275
                    }
276 15
                    $variables[$name] = $pattern;
277
                }
278
            } else {
279 37
                $name = $child['name'];
280
                if (isset($variables[$name])) {
281 1
                    throw ParserException::duplicatePlaceholder($name);
282
                }
283
                if ($token['repeat']) {
284 7
                    $variables[$name] = $token['pattern'];
285 7
                    $regex .= $child['pattern'];
286
                } else {
287 36
                    $variables[$name] = false;
288 54
                    $regex .= '(' . $child['pattern'] . ')';
289
                }
290
            }
291
        }
292 53
        return [$regex, $variables];
293
    }
294
}
295