burzum /
php-router
| 1 | <?php |
||
| 2 | declare(strict_types=1); |
||
| 3 | |||
| 4 | namespace Lead\Router; |
||
| 5 | |||
| 6 | use Lead\Router\Exception\ParserException; |
||
| 7 | |||
| 8 | /** |
||
| 9 | * Parses route pattern. |
||
| 10 | * |
||
| 11 | * The parser can produce a tokens structure from route pattern using `Parser::tokenize()`. |
||
| 12 | * A tokens structure root node is of the following form: |
||
| 13 | * |
||
| 14 | * ```php |
||
| 15 | * $token = Parser::tokenize('/test/{param}'); |
||
| 16 | * ``` |
||
| 17 | * |
||
| 18 | * The returned `$token` looks like the following: |
||
| 19 | * ``` |
||
| 20 | * [ |
||
| 21 | * 'optional' => false, |
||
| 22 | * 'greedy' => '', |
||
| 23 | * 'repeat' => false, |
||
| 24 | * 'pattern' => '/test/{param}', |
||
| 25 | * 'tokens' => [ |
||
| 26 | * '/test/', |
||
| 27 | * [ |
||
| 28 | * 'name' => 'param', |
||
| 29 | * 'pattern' => '[^/]+' |
||
| 30 | * ] |
||
| 31 | * ] |
||
| 32 | * ] |
||
| 33 | * ``` |
||
| 34 | * |
||
| 35 | * Then tokens structures can be compiled to get the regex representation with associated variable. |
||
| 36 | * |
||
| 37 | * ```php |
||
| 38 | * $rule = Parser::compile($token); |
||
| 39 | * ``` |
||
| 40 | * |
||
| 41 | * `$rule` looks like the following: |
||
| 42 | * |
||
| 43 | * ``` |
||
| 44 | * [ |
||
| 45 | * '/test/([^/]+)', |
||
| 46 | * ['param' => false] |
||
| 47 | * ] |
||
| 48 | * ``` |
||
| 49 | */ |
||
| 50 | class Parser implements ParserInterface |
||
| 51 | { |
||
| 52 | |||
| 53 | /** |
||
| 54 | * Variable capturing block regex. |
||
| 55 | */ |
||
| 56 | const PLACEHOLDER_REGEX = <<<EOD |
||
| 57 | \{ |
||
| 58 | ( |
||
| 59 | [a-zA-Z][a-zA-Z0-9_]* |
||
| 60 | ) |
||
| 61 | (?: |
||
| 62 | :( |
||
| 63 | [^{}]* |
||
| 64 | (?: |
||
| 65 | \{(?-1)\}[^{}]* |
||
| 66 | )* |
||
| 67 | ) |
||
| 68 | )? |
||
| 69 | \} |
||
| 70 | EOD; |
||
| 71 | |||
| 72 | /** |
||
| 73 | * Tokenizes a route pattern. Optional segments are identified by square brackets. |
||
| 74 | * |
||
| 75 | * @param string $pattern A route pattern |
||
| 76 | * @param string $delimiter The path delimiter. |
||
| 77 | * @param array The tokens structure root node. |
||
| 78 | * @return array |
||
| 79 | */ |
||
| 80 | public static function tokenize(string $pattern, string $delimiter = '/'): array |
||
| 81 | { |
||
| 82 | // Checks if the pattern has some optional segments. |
||
| 83 | if (count(preg_split('~' . static::PLACEHOLDER_REGEX . '(*SKIP)(*F)|\[~x', $pattern)) > 1) { |
||
|
0 ignored issues
–
show
Bug
introduced
by
Loading history...
|
|||
| 84 | 33 | $tokens = static::_tokenizePattern($pattern, $delimiter); |
|
| 85 | } else { |
||
| 86 | 57 | $tokens = static::_tokenizeSegment($pattern, $delimiter); |
|
| 87 | } |
||
| 88 | return [ |
||
| 89 | 'optional' => false, |
||
| 90 | 'greedy' => '', |
||
| 91 | 'repeat' => false, |
||
| 92 | 'pattern' => $pattern, |
||
| 93 | 'tokens' => $tokens |
||
| 94 | 86 | ]; |
|
| 95 | } |
||
| 96 | |||
| 97 | /** |
||
| 98 | * Tokenizes patterns. |
||
| 99 | * |
||
| 100 | * @param string $pattern A route pattern |
||
| 101 | * @param string $delimiter The path delimiter. |
||
| 102 | * @param array An array of tokens structure. |
||
| 103 | * @return array |
||
| 104 | */ |
||
| 105 | protected static function _tokenizePattern(string $pattern, string $delimiter, &$variable = null): array |
||
| 106 | { |
||
| 107 | 33 | $tokens = []; |
|
| 108 | 33 | $index = 0; |
|
|
0 ignored issues
–
show
|
|||
| 109 | 33 | $path = ''; |
|
|
0 ignored issues
–
show
|
|||
| 110 | 33 | $parts = static::split($pattern); |
|
| 111 | |||
| 112 | foreach ($parts as $part) { |
||
| 113 | if (is_string($part)) { |
||
| 114 | 32 | $tokens = array_merge($tokens, static::_tokenizeSegment($part, $delimiter, $variable)); |
|
| 115 | 32 | continue; |
|
| 116 | } |
||
| 117 | |||
| 118 | 32 | $greedy = $part[1]; |
|
| 119 | 32 | $repeat = $greedy === '+' || $greedy === '*'; |
|
| 120 | 32 | $optional = $greedy === '?' || $greedy === '*'; |
|
| 121 | |||
| 122 | 32 | $children = static::_tokenizePattern($part[0], $delimiter, $variable); |
|
| 123 | |||
| 124 | $tokens[] = [ |
||
| 125 | 'optional' => $optional, |
||
| 126 | 'greedy' => $greedy ?: '?', |
||
| 127 | 'repeat' => $repeat ? $variable : false, |
||
| 128 | 'pattern' => $part[0], |
||
| 129 | 'tokens' => $children |
||
| 130 | 32 | ]; |
|
| 131 | |||
| 132 | } |
||
| 133 | 32 | return $tokens; |
|
| 134 | } |
||
| 135 | |||
| 136 | /** |
||
| 137 | * Tokenizes segments which are patterns with optional segments filtered out. |
||
| 138 | * Only classic placeholder are supported. |
||
| 139 | * |
||
| 140 | * @param string $pattern A route pattern with no optional segments. |
||
| 141 | * @param string $delimiter The path delimiter. |
||
| 142 | * @param array An array of tokens structure. |
||
| 143 | * @return array |
||
| 144 | */ |
||
| 145 | protected static function _tokenizeSegment($pattern, $delimiter, &$variable = null): array |
||
| 146 | { |
||
| 147 | 86 | $tokens = []; |
|
| 148 | 86 | $index = 0; |
|
| 149 | 86 | $path = ''; |
|
| 150 | |||
| 151 | if (preg_match_all('~' . static::PLACEHOLDER_REGEX . '()~x', $pattern, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) { |
||
| 152 | foreach ($matches as $match) { |
||
| 153 | 64 | $offset = $match[0][1]; |
|
| 154 | |||
| 155 | 64 | $path .= substr($pattern, $index, $offset - $index); |
|
| 156 | 64 | $index = $offset + strlen($match[0][0]); |
|
| 157 | |||
| 158 | if ($path) { |
||
| 159 | 61 | $tokens[] = $path; |
|
| 160 | 61 | $path = ''; |
|
| 161 | } |
||
| 162 | |||
| 163 | 64 | $variable = $match[1][0]; |
|
| 164 | 64 | $capture = $match[2][0] ?: '[^' . $delimiter . ']+'; |
|
| 165 | |||
| 166 | $tokens[] = [ |
||
| 167 | 'name' => $variable, |
||
| 168 | 'pattern' => $capture |
||
| 169 | 64 | ]; |
|
| 170 | } |
||
| 171 | } |
||
| 172 | |||
| 173 | if ($index < strlen($pattern)) { |
||
| 174 | 62 | $path .= substr($pattern, $index); |
|
| 175 | if ($path) { |
||
| 176 | 62 | $tokens[] = $path; |
|
| 177 | } |
||
| 178 | } |
||
| 179 | 86 | return $tokens; |
|
| 180 | } |
||
| 181 | |||
| 182 | /** |
||
| 183 | * Splits a pattern in segments and patterns. |
||
| 184 | * |
||
| 185 | * segments will be represented by string value and patterns by an array containing |
||
| 186 | * the string pattern as first value and the greedy value as second value. |
||
| 187 | * |
||
| 188 | * example: |
||
| 189 | * `/user[/{id}]*` will gives `['/user', ['id', '*']]` |
||
| 190 | * |
||
| 191 | * Unfortunately recursive regex matcher can't help here so this function is required. |
||
| 192 | * |
||
| 193 | * @param string $pattern A route pattern. |
||
| 194 | * @param array The split pattern. |
||
| 195 | * @return array |
||
| 196 | */ |
||
| 197 | public static function split(string $pattern): array |
||
| 198 | { |
||
| 199 | 33 | $segments = []; |
|
| 200 | 33 | $len = strlen($pattern); |
|
| 201 | 33 | $buffer = ''; |
|
| 202 | 33 | $opened = 0; |
|
| 203 | 33 | for ($i = 0; $i < $len; $i++) { |
|
| 204 | if ($pattern[$i] === '{') { |
||
| 205 | do { |
||
| 206 | 29 | $buffer .= $pattern[$i++]; |
|
| 207 | if ($pattern[$i] === '}') { |
||
| 208 | 29 | $buffer .= $pattern[$i]; |
|
| 209 | 29 | break; |
|
| 210 | } |
||
| 211 | 29 | } while ($i < $len); |
|
| 212 | } elseif ($pattern[$i] === '[') { |
||
| 213 | 33 | $opened++; |
|
| 214 | if ($opened === 1) { |
||
| 215 | 33 | $segments[] = $buffer; |
|
| 216 | 33 | $buffer = ''; |
|
| 217 | } else { |
||
| 218 | 33 | $buffer .= $pattern[$i]; |
|
| 219 | } |
||
| 220 | } elseif ($pattern[$i] === ']') { |
||
| 221 | 33 | $opened--; |
|
| 222 | if ($opened === 0) { |
||
| 223 | 32 | $greedy = '?'; |
|
| 224 | if ($i < $len -1) { |
||
| 225 | if ($pattern[$i + 1] === '*' || $pattern[$i + 1] === '+') { |
||
| 226 | 13 | $greedy = $pattern[$i + 1]; |
|
| 227 | 13 | $i++; |
|
| 228 | } |
||
| 229 | } |
||
| 230 | 32 | $segments[] = [$buffer, $greedy]; |
|
| 231 | 32 | $buffer = ''; |
|
| 232 | } else { |
||
| 233 | 33 | $buffer .= $pattern[$i]; |
|
| 234 | } |
||
| 235 | } else { |
||
| 236 | 33 | $buffer .= $pattern[$i]; |
|
| 237 | } |
||
| 238 | } |
||
| 239 | if ($buffer) { |
||
| 240 | 33 | $segments[] = $buffer; |
|
| 241 | } |
||
| 242 | if ($opened) { |
||
| 243 | 1 | throw ParserException::squareBracketMismatch(); |
|
| 244 | } |
||
| 245 | |||
| 246 | 32 | return $segments; |
|
| 247 | } |
||
| 248 | |||
| 249 | /** |
||
| 250 | * Builds a regex from a tokens structure array. |
||
| 251 | * |
||
| 252 | * @param array $token A tokens structure root node. |
||
| 253 | * @return array An array containing the regex pattern and its associated variable names. |
||
| 254 | */ |
||
| 255 | public static function compile($token): array |
||
| 256 | { |
||
| 257 | 54 | $variables = []; |
|
| 258 | 54 | $regex = ''; |
|
| 259 | foreach ($token['tokens'] as $child) { |
||
| 260 | if (is_string($child)) { |
||
| 261 | 54 | $regex .= preg_quote($child, '~'); |
|
| 262 | } elseif (isset($child['tokens'])) { |
||
| 263 | 17 | $rule = static::compile($child); |
|
| 264 | if ($child['repeat']) { |
||
| 265 | if (count($rule[1]) > 1) { |
||
| 266 | 1 | throw ParserException::placeholderExceeded(); |
|
| 267 | } |
||
| 268 | 6 | $regex .= '((?:' . $rule[0] . ')' . $child['greedy'] . ')'; |
|
| 269 | } elseif ($child['optional']) { |
||
| 270 | 14 | $regex .= '(?:' . $rule[0] . ')?'; |
|
| 271 | } |
||
| 272 | foreach ($rule[1] as $name => $pattern) { |
||
| 273 | if (isset($variables[$name])) { |
||
| 274 | 1 | throw ParserException::duplicatePlaceholder($name); |
|
| 275 | } |
||
| 276 | 15 | $variables[$name] = $pattern; |
|
| 277 | } |
||
| 278 | } else { |
||
| 279 | 37 | $name = $child['name']; |
|
| 280 | if (isset($variables[$name])) { |
||
| 281 | 1 | throw ParserException::duplicatePlaceholder($name); |
|
| 282 | } |
||
| 283 | if ($token['repeat']) { |
||
| 284 | 7 | $variables[$name] = $token['pattern']; |
|
| 285 | 7 | $regex .= $child['pattern']; |
|
| 286 | } else { |
||
| 287 | 36 | $variables[$name] = false; |
|
| 288 | 54 | $regex .= '(' . $child['pattern'] . ')'; |
|
| 289 | } |
||
| 290 | } |
||
| 291 | } |
||
| 292 | 53 | return [$regex, $variables]; |
|
| 293 | } |
||
| 294 | } |
||
| 295 |