1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace PhpMyAdmin\SqlParser\Statements; |
||
6 | |||
7 | use PhpMyAdmin\SqlParser\Components\WithKeyword; |
||
8 | use PhpMyAdmin\SqlParser\Exceptions\ParserException; |
||
9 | use PhpMyAdmin\SqlParser\Parser; |
||
10 | use PhpMyAdmin\SqlParser\Parsers\Array2d; |
||
11 | use PhpMyAdmin\SqlParser\Parsers\OptionsArrays; |
||
12 | use PhpMyAdmin\SqlParser\Statement; |
||
13 | use PhpMyAdmin\SqlParser\TokensList; |
||
14 | use PhpMyAdmin\SqlParser\TokenType; |
||
15 | use PhpMyAdmin\SqlParser\Translator; |
||
16 | |||
17 | use function array_slice; |
||
18 | use function preg_match; |
||
19 | |||
20 | /** |
||
21 | * `WITH` statement. |
||
22 | |||
23 | * WITH [RECURSIVE] query_name [ (column_name [,...]) ] AS (SELECT ...) [, ...] |
||
24 | */ |
||
25 | final class WithStatement extends Statement |
||
26 | { |
||
27 | /** |
||
28 | * Options for `WITH` statements and their slot ID. |
||
29 | * |
||
30 | * @var array<string, int|array<int, int|string>> |
||
31 | * @psalm-var array<string, (positive-int|array{positive-int, ('var'|'var='|'expr'|'expr=')})> |
||
32 | */ |
||
33 | public static array $statementOptions = ['RECURSIVE' => 1]; |
||
34 | |||
35 | /** |
||
36 | * The clauses of this statement, in order. |
||
37 | * |
||
38 | * @see Statement::$clauses |
||
39 | * |
||
40 | * @var array<string, array{non-empty-string, int-mask-of<self::ADD_*>}> |
||
0 ignored issues
–
show
Documentation
Bug
introduced
by
![]() |
|||
41 | */ |
||
42 | public static array $clauses = [ |
||
43 | 'WITH' => [ |
||
44 | 'WITH', |
||
45 | Statement::ADD_KEYWORD, |
||
46 | ], |
||
47 | // Used for options. |
||
48 | '_OPTIONS' => [ |
||
49 | '_OPTIONS', |
||
50 | Statement::ADD_CLAUSE, |
||
51 | ], |
||
52 | 'AS' => [ |
||
53 | 'AS', |
||
54 | Statement::ADD_KEYWORD, |
||
55 | ], |
||
56 | ]; |
||
57 | |||
58 | /** @var WithKeyword[] */ |
||
59 | public array $withers = []; |
||
60 | |||
61 | /** |
||
62 | * holds the CTE parser. |
||
63 | */ |
||
64 | public Parser|null $cteStatementParser = null; |
||
65 | |||
66 | /** |
||
67 | * @param Parser $parser the instance that requests parsing |
||
68 | * @param TokensList $list the list of tokens to be parsed |
||
69 | */ |
||
70 | 52 | public function parse(Parser $parser, TokensList $list): void |
|
71 | { |
||
72 | /** |
||
73 | * The state of the parser. |
||
74 | * |
||
75 | * Below are the states of the parser. |
||
76 | * |
||
77 | * 0 ---------------- [ name ] -----------------> 1 |
||
78 | * |
||
79 | * 1 ------------------ [ ( ] ------------------> 2 |
||
80 | * |
||
81 | * 2 ------------------ [ AS ] -----------------> 3 |
||
82 | * |
||
83 | * 3 ------------------ [ ( ] ------------------> 4 |
||
84 | * |
||
85 | * 4 ------------------ [ , ] ------------------> 1 |
||
86 | * |
||
87 | * 4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5 |
||
88 | */ |
||
89 | 52 | $state = 0; |
|
90 | 52 | $wither = null; |
|
91 | |||
92 | 52 | ++$list->idx; // Skipping `WITH`. |
|
93 | |||
94 | // parse any options if provided |
||
95 | 52 | $this->options = OptionsArrays::parse($parser, $list, static::$statementOptions); |
|
96 | 52 | ++$list->idx; |
|
97 | |||
98 | 52 | for (; $list->idx < $list->count; ++$list->idx) { |
|
99 | /** |
||
100 | * Token parsed at this moment. |
||
101 | */ |
||
102 | 52 | $token = $list->tokens[$list->idx]; |
|
103 | |||
104 | // Skipping whitespaces and comments. |
||
105 | 52 | if ($token->type === TokenType::Whitespace || $token->type === TokenType::Comment) { |
|
106 | 46 | continue; |
|
107 | } |
||
108 | |||
109 | 52 | if ($state === 0) { |
|
110 | 52 | if ($token->type !== TokenType::None || ! preg_match('/^[a-zA-Z0-9_$]+$/', $token->token)) { |
|
111 | 6 | $parser->error('The name of the CTE was expected.', $token); |
|
112 | 6 | break; |
|
113 | } |
||
114 | |||
115 | 48 | $wither = $token->value; |
|
116 | 48 | $this->withers[$wither] = new WithKeyword($wither); |
|
117 | 48 | $state = 1; |
|
118 | 48 | } elseif ($state === 1) { |
|
119 | 48 | if ($token->type === TokenType::Operator && $token->value === '(') { |
|
120 | 40 | $columns = Array2d::parse($parser, $list); |
|
121 | 40 | if ($parser->errors !== []) { |
|
122 | 2 | break; |
|
123 | } |
||
124 | |||
125 | 38 | $this->withers[$wither]->columns = $columns; |
|
126 | 38 | $state = 2; |
|
127 | 12 | } elseif ($token->type === TokenType::Keyword && $token->keyword === 'AS') { |
|
128 | 10 | $state = 3; |
|
129 | } else { |
||
130 | 2 | $parser->error('Unexpected token.', $token); |
|
131 | 2 | break; |
|
132 | } |
||
133 | 44 | } elseif ($state === 2) { |
|
134 | 38 | if (! ($token->type === TokenType::Keyword && $token->keyword === 'AS')) { |
|
135 | 2 | $parser->error('AS keyword was expected.', $token); |
|
136 | 2 | break; |
|
137 | } |
||
138 | |||
139 | 36 | $state = 3; |
|
140 | 42 | } elseif ($state === 3) { |
|
141 | 42 | $idxBeforeGetNext = $list->idx; |
|
142 | |||
143 | 42 | $list->idx++; // Ignore the current token |
|
144 | 42 | $nextKeyword = $list->getNext(); |
|
145 | |||
146 | 42 | if (! ($token->value === '(' && ($nextKeyword && $nextKeyword->value === 'SELECT'))) { |
|
147 | 4 | $parser->error('Subquery of the CTE was expected.', $token); |
|
148 | 4 | $list->idx = $idxBeforeGetNext; |
|
149 | 4 | break; |
|
150 | } |
||
151 | |||
152 | // Restore the index |
||
153 | 38 | $list->idx = $idxBeforeGetNext; |
|
154 | |||
155 | 38 | ++$list->idx; |
|
156 | 38 | $subList = $this->getSubTokenList($list); |
|
157 | 38 | if ($subList instanceof ParserException) { |
|
158 | 2 | $parser->errors[] = $subList; |
|
159 | 2 | break; |
|
160 | } |
||
161 | |||
162 | 36 | $subParser = new Parser($subList); |
|
163 | |||
164 | 36 | if ($subParser->errors !== []) { |
|
165 | 2 | foreach ($subParser->errors as $error) { |
|
166 | 2 | $parser->errors[] = $error; |
|
167 | } |
||
168 | |||
169 | 2 | break; |
|
170 | } |
||
171 | |||
172 | 34 | $this->withers[$wither]->statement = $subParser; |
|
173 | |||
174 | 34 | $state = 4; |
|
175 | 34 | } elseif ($state === 4) { |
|
176 | 34 | if ($token->value === ',') { |
|
177 | // There's another WITH expression to parse, go back to state=0 |
||
178 | 10 | $state = 0; |
|
179 | 10 | continue; |
|
180 | } |
||
181 | |||
182 | if ( |
||
183 | 32 | $token->type === TokenType::Keyword && ( |
|
184 | 32 | $token->value === 'SELECT' |
|
185 | 32 | || $token->value === 'INSERT' |
|
186 | 32 | || $token->value === 'UPDATE' |
|
187 | 32 | || $token->value === 'DELETE' |
|
188 | ) |
||
189 | ) { |
||
190 | 26 | $state = 5; |
|
191 | 26 | --$list->idx; |
|
192 | 26 | continue; |
|
193 | } |
||
194 | |||
195 | 6 | $parser->error('An expression was expected.', $token); |
|
196 | 6 | break; |
|
197 | 26 | } elseif ($state === 5) { |
|
198 | /** |
||
199 | * We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression |
||
200 | * which's mostly is SELECT, or INSERT, UPDATE, or delete statement. |
||
201 | * e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3. |
||
202 | * The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query |
||
203 | * not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression |
||
204 | * to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead. |
||
205 | */ |
||
206 | |||
207 | // Index of the last parsed token by default would be the last token in the $list, because we're |
||
208 | // assuming that all remaining tokens at state 4, are related to the expression. |
||
209 | 26 | $idxOfLastParsedToken = $list->count - 1; |
|
210 | // Index before search to be able to restore the index. |
||
211 | 26 | $idxBeforeSearch = $list->idx; |
|
212 | // Length of expression tokens is null by default, in order for the $subList to start |
||
213 | // from $list->idx to the end of the $list. |
||
214 | 26 | $lengthOfExpressionTokens = null; |
|
215 | |||
216 | 26 | if ($list->getNextOfTypeAndValue(TokenType::Keyword, 'ON')) { |
|
217 | // (-1) because getNextOfTypeAndValue returned ON and increased the index. |
||
218 | 4 | $idxOfOn = $list->idx - 1; |
|
219 | // We want to make sure that it's `ON DUPLICATE KEY UPDATE` |
||
220 | 4 | $dubplicateToken = $list->getNext(); |
|
221 | 4 | $keyToken = $list->getNext(); |
|
222 | 4 | $updateToken = $list->getNext(); |
|
223 | if ( |
||
224 | 4 | $dubplicateToken && $dubplicateToken->keyword === 'DUPLICATE' |
|
225 | 4 | && ($keyToken && $keyToken->keyword === 'KEY') |
|
226 | 4 | && ($updateToken && $updateToken->keyword === 'UPDATE') |
|
227 | ) { |
||
228 | // Index of the last parsed token will be the token before the ON Keyword |
||
229 | 2 | $idxOfLastParsedToken = $idxOfOn - 1; |
|
230 | // The length of the expression tokens would be the difference |
||
231 | // between the first unrelated token `ON` and the idx |
||
232 | // before skipping the CTE tokens. |
||
233 | 2 | $lengthOfExpressionTokens = $idxOfOn - $idxBeforeSearch; |
|
234 | } |
||
235 | } |
||
236 | |||
237 | // Restore the index |
||
238 | 26 | $list->idx = $idxBeforeSearch; |
|
239 | |||
240 | 26 | $subList = new TokensList(array_slice($list->tokens, $list->idx, $lengthOfExpressionTokens)); |
|
241 | 26 | $subParser = new Parser($subList); |
|
242 | 26 | if ($subParser->errors !== []) { |
|
243 | 2 | foreach ($subParser->errors as $error) { |
|
244 | 2 | $parser->errors[] = $error; |
|
245 | } |
||
246 | |||
247 | 2 | break; |
|
248 | } |
||
249 | |||
250 | 24 | $this->cteStatementParser = $subParser; |
|
251 | |||
252 | 24 | $list->idx = $idxOfLastParsedToken; |
|
253 | 24 | break; |
|
254 | } |
||
255 | } |
||
256 | |||
257 | // 5 is the only valid end state |
||
258 | 52 | if ($state !== 5) { |
|
0 ignored issues
–
show
|
|||
259 | /** |
||
260 | * Token parsed at this moment. |
||
261 | */ |
||
262 | 26 | $token = $list->tokens[$list->idx]; |
|
263 | |||
264 | 26 | $parser->error('Unexpected end of the WITH CTE.', $token); |
|
265 | } |
||
266 | |||
267 | 52 | --$list->idx; |
|
268 | } |
||
269 | |||
270 | 10 | public function build(): string |
|
271 | { |
||
272 | 10 | $str = 'WITH '; |
|
273 | |||
274 | 10 | foreach ($this->withers as $wither) { |
|
275 | 10 | $str .= $str === 'WITH ' ? '' : ', '; |
|
276 | 10 | $str .= $wither->build(); |
|
277 | } |
||
278 | |||
279 | 10 | $str .= ' '; |
|
280 | |||
281 | 10 | if ($this->cteStatementParser) { |
|
282 | 10 | foreach ($this->cteStatementParser->statements as $statement) { |
|
283 | 10 | $str .= $statement->build(); |
|
284 | } |
||
285 | } |
||
286 | |||
287 | 10 | return $str; |
|
288 | } |
||
289 | |||
290 | /** |
||
291 | * Get tokens within the WITH expression to use them in another parser |
||
292 | */ |
||
293 | 38 | private function getSubTokenList(TokensList $list): ParserException|TokensList |
|
294 | { |
||
295 | 38 | $idx = $list->idx; |
|
296 | 38 | $token = $list->tokens[$list->idx]; |
|
297 | 38 | $openParenthesis = 0; |
|
298 | |||
299 | 38 | while ($list->idx < $list->count) { |
|
300 | 38 | if ($token->value === '(') { |
|
301 | 4 | ++$openParenthesis; |
|
302 | 38 | } elseif ($token->value === ')') { |
|
303 | 38 | if (--$openParenthesis === -1) { |
|
304 | 36 | break; |
|
305 | } |
||
306 | } |
||
307 | |||
308 | 38 | ++$list->idx; |
|
309 | 38 | if (! isset($list->tokens[$list->idx])) { |
|
310 | 2 | break; |
|
311 | } |
||
312 | |||
313 | 38 | $token = $list->tokens[$list->idx]; |
|
314 | } |
||
315 | |||
316 | // performance improvement: return the error to avoid a try/catch in the loop |
||
317 | 38 | if ($list->idx === $list->count) { |
|
318 | 2 | --$list->idx; |
|
319 | |||
320 | 2 | return new ParserException( |
|
321 | 2 | Translator::gettext('A closing bracket was expected.'), |
|
322 | 2 | $token, |
|
323 | 2 | ); |
|
324 | } |
||
325 | |||
326 | 36 | $length = $list->idx - $idx; |
|
327 | |||
328 | 36 | return new TokensList(array_slice($list->tokens, $idx, $length)); |
|
329 | } |
||
330 | } |
||
331 |