Issues (25)

src/Parser.php (2 issues)

1
<?php
2
namespace Lead\Router;
3
4
/**
5
 * Parses route pattern.
6
 *
7
 * The parser can produce a tokens structure from route pattern using `Parser::tokenize()`.
8
 * A tokens structure root node is of the following form:
9
 *
10
 * ```php
11
 * $token = Parser::tokenize('/test/{param}');
12
 * ```
13
 *
14
 * The returned `$token` looks like the following:
15
 * ```
16
 * [
17
 *     'optional' => false,
18
 *     'greedy'   => '',
19
 *     'repeat'   => false,
20
 *     'pattern'  => '/test/{param}',
21
 *     'tokens'   => [
22
 *         '/test/',
23
 *         [
24
 *             'name'      => 'param',
25
 *             'pattern'   => '[^/]+'
26
 *         ]
27
 *     ]
28
 * ]
29
 * ```
30
 *
31
 * Then tokens structures can be compiled to get the regex representation with associated variable.
32
 *
33
 * ```php
34
 * $rule = Parser::compile($token);
35
 * ```
36
 *
37
 * `$rule` looks like the following:
38
 *
39
 * ```
40
 * [
41
 *     '/test/([^/]+)',
42
 *     ['param' => false]
43
 * ]
44
 * ```
45
 */
46
class Parser {
47
48
    /**
49
     * Variable capturing block regex.
50
     */
51
    const PLACEHOLDER_REGEX = <<<EOD
52
\{
53
    (
54
        [a-zA-Z][a-zA-Z0-9_]*
55
    )
56
    (?:
57
        :(
58
            [^{}]*
59
            (?:
60
                \{(?-1)\}[^{}]*
61
            )*
62
        )
63
    )?
64
\}
65
EOD;
66
67
    /**
68
     * Tokenizes a route pattern. Optional segments are identified by square brackets.
69
     *
70
     * @param string $pattern   A route pattern
71
     * @param string $delimiter The path delimiter.
72
     * @param array             The tokens structure root node.
73
     */
74
    public static function tokenize($pattern, $delimiter = '/')
75
    {
76
        // Checks if the pattern has some optional segments.
77
        if (count(preg_split('~' . static::PLACEHOLDER_REGEX . '(*SKIP)(*F)|\[~x', $pattern)) > 1) {
78 18
            $tokens = static::_tokenizePattern($pattern, $delimiter);
79
        } else {
80 34
            $tokens = static::_tokenizeSegment($pattern, $delimiter);
81
        }
82
        return [
83
            'optional' => false,
84
            'greedy'   => '',
85
            'repeat'   => false,
86
            'pattern'  => $pattern,
87
            'tokens'   => $tokens
88 38
        ];
89
    }
90
91
    /**
92
     * Tokenizes patterns.
93
     *
94
     * @param string $pattern   A route pattern
95
     * @param string $delimiter The path delimiter.
96
     * @param array             An array of tokens structure.
97
     */
98
    protected static function _tokenizePattern($pattern, $delimiter, &$variable = null)
99
    {
100 18
        $tokens = [];
101 18
        $index = 0;
0 ignored issues
show
The assignment to $index is dead and can be removed.
Loading history...
102 18
        $path = '';
0 ignored issues
show
The assignment to $path is dead and can be removed.
Loading history...
103 18
        $parts = static::split($pattern);
104
105
        foreach ($parts as $part) {
106
            if (is_string($part)) {
107 18
                $tokens = array_merge($tokens, static::_tokenizeSegment($part, $delimiter, $variable));
108 18
                continue;
109
            }
110
111 18
            $greedy = $part[1];
112 18
            $repeat = $greedy === '+' || $greedy === '*';
113 18
            $optional = $greedy === '?' || $greedy === '*';
114
115 18
            $children = static::_tokenizePattern($part[0], $delimiter, $variable);
116
117
            $tokens[] = [
118
                'optional' => $optional,
119
                'greedy'   => $greedy ?: '?',
120
                'repeat'   => $repeat ? $variable : false,
121
                'pattern'  => $part[0],
122
                'tokens'   => $children
123 18
            ];
124
125
        }
126 18
        return $tokens;
127
    }
128
129
    /**
130
     * Tokenizes segments which are patterns with optional segments filtered out.
131
     * Only classic placeholder are supported.
132
     *
133
     * @param string $pattern   A route pattern with no optional segments.
134
     * @param string $delimiter The path delimiter.
135
     * @param array             An array of tokens structure.
136
     */
137
    protected static function _tokenizeSegment($pattern, $delimiter, &$variable = null)
138
    {
139 38
        $tokens = [];
140 38
        $index = 0;
141 38
        $path = '';
142
143
        if (preg_match_all('~' . static::PLACEHOLDER_REGEX . '()~x', $pattern, $matches, PREG_OFFSET_CAPTURE | PREG_SET_ORDER)) {
144
            foreach ($matches as $match) {
145 32
                $offset = $match[0][1];
146
147 32
                $path .= substr($pattern, $index, $offset - $index);
148 32
                $index = $offset + strlen($match[0][0]);
149
150
                if ($path) {
151 30
                    $tokens[] = $path;
152 30
                    $path = '';
153
                }
154
155 32
                $variable = $match[1][0];
156 32
                $capture = $match[2][0] ?: '[^' . $delimiter . ']+';
157
158
                $tokens[] = [
159
                    'name'      => $variable,
160
                    'pattern'   => $capture
161 32
                ];
162
            }
163
        }
164
165
        if ($index < strlen($pattern)) {
166 30
            $path .= substr($pattern, $index);
167
            if ($path) {
168 30
                $tokens[] = $path;
169
            }
170
        }
171 38
        return $tokens;
172
    }
173
174
    /**
175
     * Splits a pattern in segments and patterns.
176
     * segments will be represented by string value and patterns by an array containing
177
     * the string pattern as first value and the greedy value as second value.
178
     *
179
     * example:
180
     * `/user[/{id}]*` will gives `['/user', ['id', '*']]`
181
     *
182
     * Unfortunately recursive regex matcher can't help here so this function is required.
183
     *
184
     * @param string $pattern A route pattern.
185
     * @param array           The splitted pattern.
186
     */
187
    public static function split($pattern)
188
    {
189 18
        $segments = [];
190 18
        $len = strlen($pattern);
191 18
        $buffer = '';
192 18
        $opened = 0;
193 18
        for ($i = 0; $i < $len; $i++) {
194
            if ($pattern[$i] === '{') {
195
                do {
196 18
                    $buffer .= $pattern[$i++];
197
                    if ($pattern[$i] === '}') {
198 18
                        $buffer .= $pattern[$i];
199 18
                        break;
200
                    }
201 18
                } while ($i < $len);
202
            } elseif ($pattern[$i] === '[') {
203 18
                $opened++;
204
                if ($opened === 1) {
205 18
                    $segments[] = $buffer;
206 18
                    $buffer = '';
207
                } else {
208 18
                    $buffer .= $pattern[$i];
209
                }
210
            } elseif ($pattern[$i] === ']') {
211 18
                $opened--;
212
                if ($opened === 0) {
213 18
                    $greedy = '?';
214
                    if ($i < $len -1) {
215
                        if ($pattern[$i + 1] === '*' || $pattern[$i + 1] === '+') {
216 10
                            $greedy = $pattern[$i + 1];
217 10
                            $i++;
218
                        }
219
                    }
220 18
                    $segments[] = [$buffer, $greedy];
221 18
                    $buffer = '';
222
                } else {
223 18
                    $buffer .= $pattern[$i];
224
                }
225
            } else {
226 18
                $buffer .= $pattern[$i];
227
            }
228
        }
229
        if ($buffer) {
230 18
            $segments[] = $buffer;
231
        }
232
        if ($opened) {
233 2
            throw ParserException::squareBracketMismatch();
234
        }
235 18
        return $segments;
236
    }
237
238
    /**
239
     * Builds a regex from a tokens structure array.
240
     *
241
     * @param  array $token A tokens structure root node.
242
     * @return array        An array containing the regex pattern and its associated variable names.
243
     */
244
    public static function compile($token)
245
    {
246 32
        $variables = [];
247 32
        $regex = '';
248
        foreach ($token['tokens'] as $child) {
249
            if (is_string($child)) {
250 32
                $regex .= preg_quote($child, '~');
251
            } elseif (isset($child['tokens'])) {
252 12
                $rule = static::compile($child);
253
                if ($child['repeat']) {
254
                    if (count($rule[1]) > 1) {
255 2
                        throw ParserException::placeholderExceeded();
256
                    }
257 6
                    $regex .= '((?:' . $rule[0] . ')' . $child['greedy'] . ')';
258
                } elseif ($child['optional']) {
259 12
                    $regex .= '(?:' . $rule[0] . ')?';
260
                }
261
                foreach ($rule[1] as $name => $pattern) {
262
                    if (isset($variables[$name])) {
263 2
                        throw ParserException::duplicatePlaceholder($name);
264
                    }
265 12
                    $variables[$name] = $pattern;
266
                }
267
            } else {
268 26
                $name = $child['name'];
269
                if (isset($variables[$name])) {
270 2
                    throw ParserException::duplicatePlaceholder($name);
271
                }
272
                if ($token['repeat']) {
273 6
                    $variables[$name] = $token['pattern'];
274 6
                    $regex .= $child['pattern'];
275
                } else {
276 26
                    $variables[$name] = false;
277 26
                    $regex .= '(' . $child['pattern'] . ')';
278
                }
279
            }
280
        }
281 32
        return [$regex, $variables];
282
    }
283
}
284