phpmyadmin /
sql-parser
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace PhpMyAdmin\SqlParser\Statements; |
||
| 6 | |||
| 7 | use PhpMyAdmin\SqlParser\Components\WithKeyword; |
||
| 8 | use PhpMyAdmin\SqlParser\Exceptions\ParserException; |
||
| 9 | use PhpMyAdmin\SqlParser\Parser; |
||
| 10 | use PhpMyAdmin\SqlParser\Parsers\Array2d; |
||
| 11 | use PhpMyAdmin\SqlParser\Parsers\OptionsArrays; |
||
| 12 | use PhpMyAdmin\SqlParser\Statement; |
||
| 13 | use PhpMyAdmin\SqlParser\TokensList; |
||
| 14 | use PhpMyAdmin\SqlParser\TokenType; |
||
| 15 | use PhpMyAdmin\SqlParser\Translator; |
||
| 16 | |||
| 17 | use function array_slice; |
||
| 18 | use function preg_match; |
||
| 19 | |||
| 20 | /** |
||
| 21 | * `WITH` statement. |
||
| 22 | |||
| 23 | * WITH [RECURSIVE] query_name [ (column_name [,...]) ] AS (SELECT ...) [, ...] |
||
| 24 | */ |
||
| 25 | final class WithStatement extends Statement |
||
| 26 | { |
||
| 27 | /** |
||
| 28 | * Options for `WITH` statements and their slot ID. |
||
| 29 | * |
||
| 30 | * @var array<string, int|array<int, int|string>> |
||
| 31 | * @psalm-var array<string, (positive-int|array{positive-int, ('var'|'var='|'expr'|'expr=')})> |
||
| 32 | */ |
||
| 33 | public static array $statementOptions = ['RECURSIVE' => 1]; |
||
| 34 | |||
| 35 | /** |
||
| 36 | * The clauses of this statement, in order. |
||
| 37 | * |
||
| 38 | * @see Statement::$clauses |
||
| 39 | * |
||
| 40 | * @var array<string, array{non-empty-string, int-mask-of<self::ADD_*>}> |
||
|
0 ignored issues
–
show
Documentation
Bug
introduced
by
Loading history...
|
|||
| 41 | */ |
||
| 42 | public static array $clauses = [ |
||
| 43 | 'WITH' => [ |
||
| 44 | 'WITH', |
||
| 45 | Statement::ADD_KEYWORD, |
||
| 46 | ], |
||
| 47 | // Used for options. |
||
| 48 | '_OPTIONS' => [ |
||
| 49 | '_OPTIONS', |
||
| 50 | Statement::ADD_CLAUSE, |
||
| 51 | ], |
||
| 52 | 'AS' => [ |
||
| 53 | 'AS', |
||
| 54 | Statement::ADD_KEYWORD, |
||
| 55 | ], |
||
| 56 | ]; |
||
| 57 | |||
| 58 | /** @var WithKeyword[] */ |
||
| 59 | public array $withers = []; |
||
| 60 | |||
| 61 | /** |
||
| 62 | * holds the CTE parser. |
||
| 63 | */ |
||
| 64 | public Parser|null $cteStatementParser = null; |
||
| 65 | |||
| 66 | /** |
||
| 67 | * @param Parser $parser the instance that requests parsing |
||
| 68 | * @param TokensList $list the list of tokens to be parsed |
||
| 69 | */ |
||
| 70 | 52 | public function parse(Parser $parser, TokensList $list): void |
|
| 71 | { |
||
| 72 | /** |
||
| 73 | * The state of the parser. |
||
| 74 | * |
||
| 75 | * Below are the states of the parser. |
||
| 76 | * |
||
| 77 | * 0 ---------------- [ name ] -----------------> 1 |
||
| 78 | * |
||
| 79 | * 1 ------------------ [ ( ] ------------------> 2 |
||
| 80 | * |
||
| 81 | * 2 ------------------ [ AS ] -----------------> 3 |
||
| 82 | * |
||
| 83 | * 3 ------------------ [ ( ] ------------------> 4 |
||
| 84 | * |
||
| 85 | * 4 ------------------ [ , ] ------------------> 1 |
||
| 86 | * |
||
| 87 | * 4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5 |
||
| 88 | */ |
||
| 89 | 52 | $state = 0; |
|
| 90 | 52 | $wither = null; |
|
| 91 | |||
| 92 | 52 | ++$list->idx; // Skipping `WITH`. |
|
| 93 | |||
| 94 | // parse any options if provided |
||
| 95 | 52 | $this->options = OptionsArrays::parse($parser, $list, static::$statementOptions); |
|
| 96 | 52 | ++$list->idx; |
|
| 97 | |||
| 98 | 52 | for (; $list->idx < $list->count; ++$list->idx) { |
|
| 99 | /** |
||
| 100 | * Token parsed at this moment. |
||
| 101 | */ |
||
| 102 | 52 | $token = $list->tokens[$list->idx]; |
|
| 103 | |||
| 104 | // Skipping whitespaces and comments. |
||
| 105 | 52 | if ($token->type === TokenType::Whitespace || $token->type === TokenType::Comment) { |
|
| 106 | 46 | continue; |
|
| 107 | } |
||
| 108 | |||
| 109 | 52 | if ($state === 0) { |
|
| 110 | 52 | if ($token->type !== TokenType::None || ! preg_match('/^[a-zA-Z0-9_$]+$/', $token->token)) { |
|
| 111 | 6 | $parser->error('The name of the CTE was expected.', $token); |
|
| 112 | 6 | break; |
|
| 113 | } |
||
| 114 | |||
| 115 | 48 | $wither = $token->value; |
|
| 116 | 48 | $this->withers[$wither] = new WithKeyword($wither); |
|
| 117 | 48 | $state = 1; |
|
| 118 | 48 | } elseif ($state === 1) { |
|
| 119 | 48 | if ($token->type === TokenType::Operator && $token->value === '(') { |
|
| 120 | 40 | $columns = Array2d::parse($parser, $list); |
|
| 121 | 40 | if ($parser->errors !== []) { |
|
| 122 | 2 | break; |
|
| 123 | } |
||
| 124 | |||
| 125 | 38 | $this->withers[$wither]->columns = $columns; |
|
| 126 | 38 | $state = 2; |
|
| 127 | 12 | } elseif ($token->type === TokenType::Keyword && $token->keyword === 'AS') { |
|
| 128 | 10 | $state = 3; |
|
| 129 | } else { |
||
| 130 | 2 | $parser->error('Unexpected token.', $token); |
|
| 131 | 2 | break; |
|
| 132 | } |
||
| 133 | 44 | } elseif ($state === 2) { |
|
| 134 | 38 | if (! ($token->type === TokenType::Keyword && $token->keyword === 'AS')) { |
|
| 135 | 2 | $parser->error('AS keyword was expected.', $token); |
|
| 136 | 2 | break; |
|
| 137 | } |
||
| 138 | |||
| 139 | 36 | $state = 3; |
|
| 140 | 42 | } elseif ($state === 3) { |
|
| 141 | 42 | $idxBeforeGetNext = $list->idx; |
|
| 142 | |||
| 143 | 42 | $list->idx++; // Ignore the current token |
|
| 144 | 42 | $nextKeyword = $list->getNext(); |
|
| 145 | |||
| 146 | 42 | if (! ($token->value === '(' && ($nextKeyword && $nextKeyword->value === 'SELECT'))) { |
|
| 147 | 4 | $parser->error('Subquery of the CTE was expected.', $token); |
|
| 148 | 4 | $list->idx = $idxBeforeGetNext; |
|
| 149 | 4 | break; |
|
| 150 | } |
||
| 151 | |||
| 152 | // Restore the index |
||
| 153 | 38 | $list->idx = $idxBeforeGetNext; |
|
| 154 | |||
| 155 | 38 | ++$list->idx; |
|
| 156 | 38 | $subList = $this->getSubTokenList($list); |
|
| 157 | 38 | if ($subList instanceof ParserException) { |
|
| 158 | 2 | $parser->errors[] = $subList; |
|
| 159 | 2 | break; |
|
| 160 | } |
||
| 161 | |||
| 162 | 36 | $subParser = new Parser($subList); |
|
| 163 | |||
| 164 | 36 | if ($subParser->errors !== []) { |
|
| 165 | 2 | foreach ($subParser->errors as $error) { |
|
| 166 | 2 | $parser->errors[] = $error; |
|
| 167 | } |
||
| 168 | |||
| 169 | 2 | break; |
|
| 170 | } |
||
| 171 | |||
| 172 | 34 | $this->withers[$wither]->statement = $subParser; |
|
| 173 | |||
| 174 | 34 | $state = 4; |
|
| 175 | 34 | } elseif ($state === 4) { |
|
| 176 | 34 | if ($token->value === ',') { |
|
| 177 | // There's another WITH expression to parse, go back to state=0 |
||
| 178 | 10 | $state = 0; |
|
| 179 | 10 | continue; |
|
| 180 | } |
||
| 181 | |||
| 182 | if ( |
||
| 183 | 32 | $token->type === TokenType::Keyword && ( |
|
| 184 | 32 | $token->value === 'SELECT' |
|
| 185 | 32 | || $token->value === 'INSERT' |
|
| 186 | 32 | || $token->value === 'UPDATE' |
|
| 187 | 32 | || $token->value === 'DELETE' |
|
| 188 | ) |
||
| 189 | ) { |
||
| 190 | 26 | $state = 5; |
|
| 191 | 26 | --$list->idx; |
|
| 192 | 26 | continue; |
|
| 193 | } |
||
| 194 | |||
| 195 | 6 | $parser->error('An expression was expected.', $token); |
|
| 196 | 6 | break; |
|
| 197 | 26 | } elseif ($state === 5) { |
|
| 198 | /** |
||
| 199 | * We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression |
||
| 200 | * which's mostly is SELECT, or INSERT, UPDATE, or delete statement. |
||
| 201 | * e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3. |
||
| 202 | * The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query |
||
| 203 | * not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression |
||
| 204 | * to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead. |
||
| 205 | */ |
||
| 206 | |||
| 207 | // Index of the last parsed token by default would be the last token in the $list, because we're |
||
| 208 | // assuming that all remaining tokens at state 4, are related to the expression. |
||
| 209 | 26 | $idxOfLastParsedToken = $list->count - 1; |
|
| 210 | // Index before search to be able to restore the index. |
||
| 211 | 26 | $idxBeforeSearch = $list->idx; |
|
| 212 | // Length of expression tokens is null by default, in order for the $subList to start |
||
| 213 | // from $list->idx to the end of the $list. |
||
| 214 | 26 | $lengthOfExpressionTokens = null; |
|
| 215 | |||
| 216 | 26 | if ($list->getNextOfTypeAndValue(TokenType::Keyword, 'ON')) { |
|
| 217 | // (-1) because getNextOfTypeAndValue returned ON and increased the index. |
||
| 218 | 4 | $idxOfOn = $list->idx - 1; |
|
| 219 | // We want to make sure that it's `ON DUPLICATE KEY UPDATE` |
||
| 220 | 4 | $dubplicateToken = $list->getNext(); |
|
| 221 | 4 | $keyToken = $list->getNext(); |
|
| 222 | 4 | $updateToken = $list->getNext(); |
|
| 223 | if ( |
||
| 224 | 4 | $dubplicateToken && $dubplicateToken->keyword === 'DUPLICATE' |
|
| 225 | 4 | && ($keyToken && $keyToken->keyword === 'KEY') |
|
| 226 | 4 | && ($updateToken && $updateToken->keyword === 'UPDATE') |
|
| 227 | ) { |
||
| 228 | // Index of the last parsed token will be the token before the ON Keyword |
||
| 229 | 2 | $idxOfLastParsedToken = $idxOfOn - 1; |
|
| 230 | // The length of the expression tokens would be the difference |
||
| 231 | // between the first unrelated token `ON` and the idx |
||
| 232 | // before skipping the CTE tokens. |
||
| 233 | 2 | $lengthOfExpressionTokens = $idxOfOn - $idxBeforeSearch; |
|
| 234 | } |
||
| 235 | } |
||
| 236 | |||
| 237 | // Restore the index |
||
| 238 | 26 | $list->idx = $idxBeforeSearch; |
|
| 239 | |||
| 240 | 26 | $subList = new TokensList(array_slice($list->tokens, $list->idx, $lengthOfExpressionTokens)); |
|
| 241 | 26 | $subParser = new Parser($subList); |
|
| 242 | 26 | if ($subParser->errors !== []) { |
|
| 243 | 2 | foreach ($subParser->errors as $error) { |
|
| 244 | 2 | $parser->errors[] = $error; |
|
| 245 | } |
||
| 246 | |||
| 247 | 2 | break; |
|
| 248 | } |
||
| 249 | |||
| 250 | 24 | $this->cteStatementParser = $subParser; |
|
| 251 | |||
| 252 | 24 | $list->idx = $idxOfLastParsedToken; |
|
| 253 | 24 | break; |
|
| 254 | } |
||
| 255 | } |
||
| 256 | |||
| 257 | // 5 is the only valid end state |
||
| 258 | 52 | if ($state !== 5) { |
|
|
0 ignored issues
–
show
|
|||
| 259 | /** |
||
| 260 | * Token parsed at this moment. |
||
| 261 | */ |
||
| 262 | 26 | $token = $list->tokens[$list->idx]; |
|
| 263 | |||
| 264 | 26 | $parser->error('Unexpected end of the WITH CTE.', $token); |
|
| 265 | } |
||
| 266 | |||
| 267 | 52 | --$list->idx; |
|
| 268 | } |
||
| 269 | |||
| 270 | 10 | public function build(): string |
|
| 271 | { |
||
| 272 | 10 | $str = 'WITH '; |
|
| 273 | |||
| 274 | 10 | foreach ($this->withers as $wither) { |
|
| 275 | 10 | $str .= $str === 'WITH ' ? '' : ', '; |
|
| 276 | 10 | $str .= $wither->build(); |
|
| 277 | } |
||
| 278 | |||
| 279 | 10 | $str .= ' '; |
|
| 280 | |||
| 281 | 10 | if ($this->cteStatementParser) { |
|
| 282 | 10 | foreach ($this->cteStatementParser->statements as $statement) { |
|
| 283 | 10 | $str .= $statement->build(); |
|
| 284 | } |
||
| 285 | } |
||
| 286 | |||
| 287 | 10 | return $str; |
|
| 288 | } |
||
| 289 | |||
| 290 | /** |
||
| 291 | * Get tokens within the WITH expression to use them in another parser |
||
| 292 | */ |
||
| 293 | 38 | private function getSubTokenList(TokensList $list): ParserException|TokensList |
|
| 294 | { |
||
| 295 | 38 | $idx = $list->idx; |
|
| 296 | 38 | $token = $list->tokens[$list->idx]; |
|
| 297 | 38 | $openParenthesis = 0; |
|
| 298 | |||
| 299 | 38 | while ($list->idx < $list->count) { |
|
| 300 | 38 | if ($token->value === '(') { |
|
| 301 | 4 | ++$openParenthesis; |
|
| 302 | 38 | } elseif ($token->value === ')') { |
|
| 303 | 38 | if (--$openParenthesis === -1) { |
|
| 304 | 36 | break; |
|
| 305 | } |
||
| 306 | } |
||
| 307 | |||
| 308 | 38 | ++$list->idx; |
|
| 309 | 38 | if (! isset($list->tokens[$list->idx])) { |
|
| 310 | 2 | break; |
|
| 311 | } |
||
| 312 | |||
| 313 | 38 | $token = $list->tokens[$list->idx]; |
|
| 314 | } |
||
| 315 | |||
| 316 | // performance improvement: return the error to avoid a try/catch in the loop |
||
| 317 | 38 | if ($list->idx === $list->count) { |
|
| 318 | 2 | --$list->idx; |
|
| 319 | |||
| 320 | 2 | return new ParserException( |
|
| 321 | 2 | Translator::gettext('A closing bracket was expected.'), |
|
| 322 | 2 | $token, |
|
| 323 | 2 | ); |
|
| 324 | } |
||
| 325 | |||
| 326 | 36 | $length = $list->idx - $idx; |
|
| 327 | |||
| 328 | 36 | return new TokensList(array_slice($list->tokens, $idx, $length)); |
|
| 329 | } |
||
| 330 | } |
||
| 331 |