1 | <?php |
||
2 | declare(strict_types=1); |
||
3 | |||
4 | namespace Lead\Router; |
||
5 | |||
6 | use Lead\Router\Exception\ParserException; |
||
7 | |||
8 | /** |
||
9 | * Parses route pattern. |
||
10 | * |
||
11 | * The parser can produce a tokens structure from route pattern using `Parser::tokenize()`. |
||
12 | * A tokens structure root node is of the following form: |
||
13 | * |
||
14 | * ```php |
||
15 | * $token = Parser::tokenize('/test/{param}'); |
||
16 | * ``` |
||
17 | * |
||
18 | * The returned `$token` looks like the following: |
||
19 | * ``` |
||
20 | * [ |
||
21 | * 'optional' => false, |
||
22 | * 'greedy' => '', |
||
23 | * 'repeat' => false, |
||
24 | * 'pattern' => '/test/{param}', |
||
25 | * 'tokens' => [ |
||
26 | * '/test/', |
||
27 | * [ |
||
28 | * 'name' => 'param', |
||
29 | * 'pattern' => '[^/]+' |
||
30 | * ] |
||
31 | * ] |
||
32 | * ] |
||
33 | * ``` |
||
34 | * |
||
35 | * Then tokens structures can be compiled to get the regex representation with associated variable. |
||
36 | * |
||
37 | * ```php |
||
38 | * $rule = Parser::compile($token); |
||
39 | * ``` |
||
40 | * |
||
41 | * `$rule` looks like the following: |
||
42 | * |
||
43 | * ``` |
||
44 | * [ |
||
45 | * '/test/([^/]+)', |
||
46 | * ['param' => false] |
||
47 | * ] |
||
48 | * ``` |
||
49 | */ |
||
50 | class Parser implements ParserInterface |
||
51 | { |
||
52 | |||
53 | /** |
||
54 | * Variable capturing block regex. |
||
55 | */ |
||
56 | const PLACEHOLDER_REGEX = <<<EOD |
||
57 | \{ |
||
58 | ( |
||
59 | [a-zA-Z][a-zA-Z0-9_]* |
||
60 | ) |
||
61 | (?: |
||
62 | :( |
||
63 | [^{}]* |
||
64 | (?: |
||
65 | \{(?-1)\}[^{}]* |
||
66 | )* |
||
67 | ) |
||
68 | )? |
||
69 | \} |
||
70 | EOD; |
||
71 | |||
72 | /** |
||
73 | * Tokenizes a route pattern. Optional segments are identified by square brackets. |
||
74 | * |
||
75 | * @param string $pattern A route pattern |
||
76 | * @param string $delimiter The path delimiter. |
||
77 | * @param array The tokens structure root node. |
||
78 | * @return array |
||
79 | */ |
||
80 | public static function tokenize(string $pattern, string $delimiter = '/'): array |
||
81 | { |
||
82 | // Checks if the pattern has some optional segments. |
||
83 | if (count(preg_split('~' . static::PLACEHOLDER_REGEX . '(*SKIP)(*F)|\[~x', $pattern)) > 1) { |
||
0 ignored issues
–
show
Bug
introduced
by
![]() |
|||
84 | 33 | $tokens = static::_tokenizePattern($pattern, $delimiter); |
|
85 | } else { |
||
86 | 57 | $tokens = static::_tokenizeSegment($pattern, $delimiter); |
|
87 | } |
||
88 | return [ |
||
89 | 'optional' => false, |
||
90 | 'greedy' => '', |
||
91 | 'repeat' => false, |
||
92 | 'pattern' => $pattern, |
||
93 | 'tokens' => $tokens |
||
94 | 86 | ]; |
|
95 | } |
||
96 | |||
97 | /** |
||
98 | * Tokenizes patterns. |
||
99 | * |
||
100 | * @param string $pattern A route pattern |
||
101 | * @param string $delimiter The path delimiter. |
||
102 | * @param array An array of tokens structure. |
||
103 | * @return array |
||
104 | */ |
||
105 | protected static function _tokenizePattern(string $pattern, string $delimiter, &$variable = null): array |
||
106 | { |
||
107 | 33 | $tokens = []; |
|
108 | 33 | $index = 0; |
|
0 ignored issues
–
show
|
|||
109 | 33 | $path = ''; |
|
0 ignored issues
–
show
|
|||
110 | 33 | $parts = static::split($pattern); |
|
111 | |||
112 | foreach ($parts as $part) { |
||
113 | if (is_string($part)) { |
||
114 | 32 | $tokens = array_merge($tokens, static::_tokenizeSegment($part, $delimiter, $variable)); |
|
115 | 32 | continue; |
|
116 | } |
||
117 | |||
118 | 32 | $greedy = $part[1]; |
|
119 | 32 | $repeat = $greedy === '+' || $greedy === '*'; |
|
120 | 32 | $optional = $greedy === '?' || $greedy === '*'; |
|
121 | |||
122 | 32 | $children = static::_tokenizePattern($part[0], $delimiter, $variable); |
|
123 | |||
124 | $tokens[] = [ |
||
125 | 'optional' => $optional, |
||
126 | 'greedy' => $greedy ?: '?', |
||
127 | 'repeat' => $repeat ? $variable : false, |
||
128 | 'pattern' => $part[0], |
||
129 | 'tokens' => $children |
||
130 | 32 | ]; |
|
131 | |||
132 | } |
||
133 | 32 | return $tokens; |
|
134 | } |
||
135 | |||
136 | /** |
||
137 | * Tokenizes segments which are patterns with optional segments filtered out. |
||
138 | * Only classic placeholder are supported. |
||
139 | * |
||
140 | * @param string $pattern A route pattern with no optional segments. |
||
141 | * @param string $delimiter The path delimiter. |
||
142 | * @param array An array of tokens structure. |
||
143 | * @return array |
||
144 | */ |
||
145 | protected static function _tokenizeSegment($pattern, $delimiter, &$variable = null): array |
||
146 | { |
||
147 | 86 | $tokens = []; |
|
148 | 86 | $index = 0; |
|
149 | 86 | $path = ''; |
|
150 | |||
151 | if (preg_match_all('~' . static::PLACEHOLDER_REGEX . '()~x', $pattern, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) { |
||
152 | foreach ($matches as $match) { |
||
153 | 64 | $offset = $match[0][1]; |
|
154 | |||
155 | 64 | $path .= substr($pattern, $index, $offset - $index); |
|
156 | 64 | $index = $offset + strlen($match[0][0]); |
|
157 | |||
158 | if ($path) { |
||
159 | 61 | $tokens[] = $path; |
|
160 | 61 | $path = ''; |
|
161 | } |
||
162 | |||
163 | 64 | $variable = $match[1][0]; |
|
164 | 64 | $capture = $match[2][0] ?: '[^' . $delimiter . ']+'; |
|
165 | |||
166 | $tokens[] = [ |
||
167 | 'name' => $variable, |
||
168 | 'pattern' => $capture |
||
169 | 64 | ]; |
|
170 | } |
||
171 | } |
||
172 | |||
173 | if ($index < strlen($pattern)) { |
||
174 | 62 | $path .= substr($pattern, $index); |
|
175 | if ($path) { |
||
176 | 62 | $tokens[] = $path; |
|
177 | } |
||
178 | } |
||
179 | 86 | return $tokens; |
|
180 | } |
||
181 | |||
182 | /** |
||
183 | * Splits a pattern in segments and patterns. |
||
184 | * |
||
185 | * segments will be represented by string value and patterns by an array containing |
||
186 | * the string pattern as first value and the greedy value as second value. |
||
187 | * |
||
188 | * example: |
||
189 | * `/user[/{id}]*` will gives `['/user', ['id', '*']]` |
||
190 | * |
||
191 | * Unfortunately recursive regex matcher can't help here so this function is required. |
||
192 | * |
||
193 | * @param string $pattern A route pattern. |
||
194 | * @param array The split pattern. |
||
195 | * @return array |
||
196 | */ |
||
197 | public static function split(string $pattern): array |
||
198 | { |
||
199 | 33 | $segments = []; |
|
200 | 33 | $len = strlen($pattern); |
|
201 | 33 | $buffer = ''; |
|
202 | 33 | $opened = 0; |
|
203 | 33 | for ($i = 0; $i < $len; $i++) { |
|
204 | if ($pattern[$i] === '{') { |
||
205 | do { |
||
206 | 29 | $buffer .= $pattern[$i++]; |
|
207 | if ($pattern[$i] === '}') { |
||
208 | 29 | $buffer .= $pattern[$i]; |
|
209 | 29 | break; |
|
210 | } |
||
211 | 29 | } while ($i < $len); |
|
212 | } elseif ($pattern[$i] === '[') { |
||
213 | 33 | $opened++; |
|
214 | if ($opened === 1) { |
||
215 | 33 | $segments[] = $buffer; |
|
216 | 33 | $buffer = ''; |
|
217 | } else { |
||
218 | 33 | $buffer .= $pattern[$i]; |
|
219 | } |
||
220 | } elseif ($pattern[$i] === ']') { |
||
221 | 33 | $opened--; |
|
222 | if ($opened === 0) { |
||
223 | 32 | $greedy = '?'; |
|
224 | if ($i < $len -1) { |
||
225 | if ($pattern[$i + 1] === '*' || $pattern[$i + 1] === '+') { |
||
226 | 13 | $greedy = $pattern[$i + 1]; |
|
227 | 13 | $i++; |
|
228 | } |
||
229 | } |
||
230 | 32 | $segments[] = [$buffer, $greedy]; |
|
231 | 32 | $buffer = ''; |
|
232 | } else { |
||
233 | 33 | $buffer .= $pattern[$i]; |
|
234 | } |
||
235 | } else { |
||
236 | 33 | $buffer .= $pattern[$i]; |
|
237 | } |
||
238 | } |
||
239 | if ($buffer) { |
||
240 | 33 | $segments[] = $buffer; |
|
241 | } |
||
242 | if ($opened) { |
||
243 | 1 | throw ParserException::squareBracketMismatch(); |
|
244 | } |
||
245 | |||
246 | 32 | return $segments; |
|
247 | } |
||
248 | |||
249 | /** |
||
250 | * Builds a regex from a tokens structure array. |
||
251 | * |
||
252 | * @param array $token A tokens structure root node. |
||
253 | * @return array An array containing the regex pattern and its associated variable names. |
||
254 | */ |
||
255 | public static function compile($token): array |
||
256 | { |
||
257 | 54 | $variables = []; |
|
258 | 54 | $regex = ''; |
|
259 | foreach ($token['tokens'] as $child) { |
||
260 | if (is_string($child)) { |
||
261 | 54 | $regex .= preg_quote($child, '~'); |
|
262 | } elseif (isset($child['tokens'])) { |
||
263 | 17 | $rule = static::compile($child); |
|
264 | if ($child['repeat']) { |
||
265 | if (count($rule[1]) > 1) { |
||
266 | 1 | throw ParserException::placeholderExceeded(); |
|
267 | } |
||
268 | 6 | $regex .= '((?:' . $rule[0] . ')' . $child['greedy'] . ')'; |
|
269 | } elseif ($child['optional']) { |
||
270 | 14 | $regex .= '(?:' . $rule[0] . ')?'; |
|
271 | } |
||
272 | foreach ($rule[1] as $name => $pattern) { |
||
273 | if (isset($variables[$name])) { |
||
274 | 1 | throw ParserException::duplicatePlaceholder($name); |
|
275 | } |
||
276 | 15 | $variables[$name] = $pattern; |
|
277 | } |
||
278 | } else { |
||
279 | 37 | $name = $child['name']; |
|
280 | if (isset($variables[$name])) { |
||
281 | 1 | throw ParserException::duplicatePlaceholder($name); |
|
282 | } |
||
283 | if ($token['repeat']) { |
||
284 | 7 | $variables[$name] = $token['pattern']; |
|
285 | 7 | $regex .= $child['pattern']; |
|
286 | } else { |
||
287 | 36 | $variables[$name] = false; |
|
288 | 54 | $regex .= '(' . $child['pattern'] . ')'; |
|
289 | } |
||
290 | } |
||
291 | } |
||
292 | 53 | return [$regex, $variables]; |
|
293 | } |
||
294 | } |
||
295 |