yiisoft /
db-sqlite
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | namespace Yiisoft\Db\Sqlite; |
||
| 6 | |||
| 7 | use SplStack; |
||
| 8 | use Yiisoft\Db\Exception\InvalidArgumentException; |
||
| 9 | |||
| 10 | use function is_array; |
||
| 11 | use function is_string; |
||
| 12 | use function mb_strlen; |
||
| 13 | use function mb_strpos; |
||
| 14 | use function mb_strtoupper; |
||
| 15 | use function mb_substr; |
||
| 16 | use function reset; |
||
| 17 | use function usort; |
||
| 18 | |||
| 19 | /** |
||
| 20 | * Splits an SQL query into individual SQL tokens. |
||
| 21 | * |
||
| 22 | * You can use it to obtain addition information from an SQL code. |
||
| 23 | * |
||
| 24 | * Usage example: |
||
| 25 | * |
||
| 26 | * ```php |
||
| 27 | * $tokenizer = new SqlTokenizer("SELECT * FROM {{%user}} WHERE [[id]] = 1"); |
||
| 28 | * $root = $tokenizer->tokenize(); |
||
| 29 | * $sqlTokens = $root->getChildren(); |
||
| 30 | * ``` |
||
| 31 | * |
||
| 32 | * Tokens are instances of {@see SqlToken}. |
||
| 33 | */ |
||
| 34 | abstract class AbstractTokenizer |
||
| 35 | { |
||
| 36 | /** |
||
| 37 | * @var int SQL code string length. |
||
| 38 | */ |
||
| 39 | protected int $length = 0; |
||
| 40 | |||
| 41 | /** |
||
| 42 | * @var int SQL code string current offset. |
||
| 43 | */ |
||
| 44 | protected int $offset = 0; |
||
| 45 | |||
| 46 | /** |
||
| 47 | * @var SplStack Of active tokens. |
||
| 48 | * |
||
| 49 | * @psalm-var SplStack<SqlToken> |
||
| 50 | * |
||
| 51 | * @psalm-suppress PropertyNotSetInConstructor |
||
| 52 | */ |
||
| 53 | private SplStack $tokenStack; |
||
| 54 | |||
| 55 | /** |
||
| 56 | * @var array|SqlToken Active token. It's usually a top of the token stack. |
||
| 57 | * |
||
| 58 | * @psalm-var SqlToken|SqlToken[] |
||
| 59 | * |
||
| 60 | * @psalm-suppress PropertyNotSetInConstructor |
||
| 61 | */ |
||
| 62 | private array|SqlToken $currentToken; |
||
| 63 | |||
| 64 | /** |
||
| 65 | * @var array Cached substrings. |
||
| 66 | * |
||
| 67 | * @psalm-var string[] |
||
| 68 | */ |
||
| 69 | private array $substrings = []; |
||
| 70 | |||
| 71 | /** |
||
| 72 | * @var string Buffer for the current token. |
||
| 73 | */ |
||
| 74 | private string $buffer = ''; |
||
| 75 | |||
| 76 | 171 | public function __construct(private string $sql) |
|
| 77 | { |
||
| 78 | 171 | } |
|
| 79 | |||
| 80 | /** |
||
| 81 | * Tokenizes and returns a code type token. |
||
| 82 | * |
||
| 83 | * @throws InvalidArgumentException If the SQL code is invalid. |
||
| 84 | * |
||
| 85 | * @return SqlToken Code type token. |
||
| 86 | * |
||
| 87 | * @psalm-suppress MixedPropertyTypeCoercion |
||
| 88 | */ |
||
| 89 | 171 | public function tokenize(): SqlToken |
|
| 90 | { |
||
| 91 | 171 | $this->length = mb_strlen($this->sql, 'UTF-8'); |
|
| 92 | 171 | $this->offset = 0; |
|
| 93 | 171 | $this->substrings = []; |
|
| 94 | 171 | $this->buffer = ''; |
|
| 95 | |||
| 96 | 171 | $token = (new SqlToken())->type(SqlToken::TYPE_CODE)->content($this->sql); |
|
| 97 | |||
| 98 | 171 | $this->tokenStack = new SplStack(); |
|
| 99 | 171 | $this->tokenStack->push($token); |
|
| 100 | |||
| 101 | 171 | $token[] = (new SqlToken())->type(SqlToken::TYPE_STATEMENT); |
|
| 102 | |||
| 103 | 171 | $this->tokenStack->push($token[0]); |
|
| 104 | 171 | $this->currentToken = $this->tokenStack->top(); |
|
| 105 | 171 | $length = 0; |
|
| 106 | |||
| 107 | 171 | while (!$this->isEof()) { |
|
| 108 | 171 | if ($this->isWhitespace($length) || $this->isComment($length)) { |
|
| 109 | 171 | $this->addTokenFromBuffer(); |
|
| 110 | 171 | $this->advance($length); |
|
| 111 | |||
| 112 | 171 | continue; |
|
| 113 | } |
||
| 114 | |||
| 115 | /** @psalm-suppress ConflictingReferenceConstraint */ |
||
| 116 | 171 | if ($this->tokenizeOperator($length) || $this->tokenizeDelimitedString($length)) { |
|
| 117 | 171 | $this->advance($length); |
|
| 118 | |||
| 119 | 171 | continue; |
|
| 120 | } |
||
| 121 | |||
| 122 | 171 | $this->buffer .= $this->substring(1); |
|
| 123 | 171 | $this->advance(1); |
|
| 124 | } |
||
| 125 | |||
| 126 | 171 | $this->addTokenFromBuffer(); |
|
| 127 | |||
| 128 | if ( |
||
| 129 | 171 | $token->getHasChildren() && |
|
| 130 | 171 | $token[-1] instanceof SqlToken && |
|
| 131 | 171 | !$token[-1]->getHasChildren() |
|
|
0 ignored issues
–
show
|
|||
| 132 | ) { |
||
| 133 | 64 | unset($token[-1]); |
|
| 134 | } |
||
| 135 | |||
| 136 | 171 | return $token; |
|
| 137 | } |
||
| 138 | |||
| 139 | /** |
||
| 140 | * Returns whether there's a space or blank at the current offset. |
||
| 141 | * |
||
| 142 | * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. |
||
| 143 | * |
||
| 144 | * @param int $length Length of the matched string. |
||
| 145 | * |
||
| 146 | * @return bool Whether there's a space or blank at the current offset. |
||
| 147 | */ |
||
| 148 | abstract protected function isWhitespace(int &$length): bool; |
||
| 149 | |||
| 150 | /** |
||
| 151 | * Returns whether there's a commentary at the current offset. |
||
| 152 | * |
||
| 153 | * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. |
||
| 154 | * |
||
| 155 | * @param int $length Length of the matched string. |
||
| 156 | * |
||
| 157 | * @return bool Whether there's a commentary at the current offset. |
||
| 158 | */ |
||
| 159 | abstract protected function isComment(int &$length): bool; |
||
| 160 | |||
| 161 | /** |
||
| 162 | * Returns whether there's an operator at the current offset. |
||
| 163 | * |
||
| 164 | * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may |
||
| 165 | * also set `$content` to a string that will be used as a token content. |
||
| 166 | * |
||
| 167 | * @param int $length Length of the matched string. |
||
| 168 | * @param string|null $content Optional content instead of the matched string. |
||
| 169 | * |
||
| 170 | * @return bool Whether there's an operator at the current offset. |
||
| 171 | */ |
||
| 172 | abstract protected function isOperator(int &$length, string|null &$content): bool; |
||
| 173 | |||
| 174 | /** |
||
| 175 | * Returns whether there's an identifier at the current offset. |
||
| 176 | * |
||
| 177 | * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may |
||
| 178 | * also set `$content` to a string that will be used as a token content. |
||
| 179 | * |
||
| 180 | * @param int $length Length of the matched string. |
||
| 181 | * @param string|null $content Optional content instead of the matched string. |
||
| 182 | * |
||
| 183 | * @return bool Whether there's an identifier at the current offset. |
||
| 184 | */ |
||
| 185 | abstract protected function isIdentifier(int &$length, string|null &$content): bool; |
||
| 186 | |||
| 187 | /** |
||
| 188 | * Returns whether there's a string literal at the current offset. |
||
| 189 | * |
||
| 190 | * If this method returns `true`, it has to set the `$length` parameter to the length of the matched string. It may |
||
| 191 | * also set `$content` to a string that will be used as a token content. |
||
| 192 | * |
||
| 193 | * @param int $length Length of the matched string. |
||
| 194 | * @param string|null $content Optional content instead of the matched string. |
||
| 195 | * |
||
| 196 | * @return bool Whether there's a string literal at the current offset. |
||
| 197 | */ |
||
| 198 | abstract protected function isStringLiteral(int &$length, string|null &$content): bool; |
||
| 199 | |||
| 200 | /** |
||
| 201 | * Returns whether the given string is a keyword. |
||
| 202 | * |
||
| 203 | * The method may set `$content` to a string that will be used as a token content. |
||
| 204 | * |
||
| 205 | * @param string $string String to match. |
||
| 206 | * @param string|null $content Optional content instead of the matched string. |
||
| 207 | * |
||
| 208 | * @return bool Whether the given string is a keyword. |
||
| 209 | */ |
||
| 210 | abstract protected function isKeyword(string $string, string|null &$content): bool; |
||
| 211 | |||
| 212 | /** |
||
| 213 | * Returns whether the longest common prefix equals to the SQL code of the same length at the current offset. |
||
| 214 | * |
||
| 215 | * @param array $with Strings to test. The method `will` change this parameter to speed up lookups. |
||
| 216 | * @param bool $caseSensitive Whether to perform a case-sensitive comparison. |
||
| 217 | * @param int $length Length of the matched string. |
||
| 218 | * @param string|null $content Matched string. |
||
| 219 | * |
||
| 220 | * @return bool Whether there is a match. |
||
| 221 | * |
||
| 222 | * @psalm-param array<array-key, string> $with |
||
| 223 | */ |
||
| 224 | 171 | protected function startsWithAnyLongest( |
|
| 225 | array $with, |
||
| 226 | bool $caseSensitive, |
||
| 227 | int &$length, |
||
| 228 | string &$content = null |
||
| 229 | ): bool { |
||
| 230 | 171 | if (empty($with)) { |
|
| 231 | return false; |
||
| 232 | } |
||
| 233 | |||
| 234 | 171 | if (!is_array(reset($with))) { |
|
| 235 | 171 | usort($with, static fn (string $string1, string $string2) => mb_strlen($string2, 'UTF-8') - mb_strlen($string1, 'UTF-8')); |
|
| 236 | |||
| 237 | 171 | $map = []; |
|
| 238 | |||
| 239 | 171 | foreach ($with as $string) { |
|
| 240 | 171 | $map[mb_strlen($string, 'UTF-8')][$caseSensitive ? $string : mb_strtoupper($string, 'UTF-8')] = true; |
|
| 241 | } |
||
| 242 | |||
| 243 | 171 | $with = $map; |
|
| 244 | } |
||
| 245 | |||
| 246 | /** @psalm-var array<int, array> $with */ |
||
| 247 | 171 | foreach ($with as $testLength => $testValues) { |
|
| 248 | 171 | $content = $this->substring($testLength, $caseSensitive); |
|
| 249 | |||
| 250 | 171 | if (isset($testValues[$content])) { |
|
| 251 | 171 | $length = $testLength; |
|
| 252 | 171 | return true; |
|
| 253 | } |
||
| 254 | } |
||
| 255 | |||
| 256 | 171 | return false; |
|
| 257 | } |
||
| 258 | |||
| 259 | /** |
||
| 260 | * Returns a string of the given length starting with the specified offset. |
||
| 261 | * |
||
| 262 | * @param int $length String length to return. |
||
| 263 | * @param bool $caseSensitive If it's `false`, the string will be uppercase. |
||
| 264 | * @param int|null $offset SQL code offset, defaults to current if `null` is passed. |
||
| 265 | * |
||
| 266 | * @return string Result string, it may be empty if there's nothing to return. |
||
| 267 | */ |
||
| 268 | 171 | protected function substring(int $length, bool $caseSensitive = true, int $offset = null): string |
|
| 269 | { |
||
| 270 | 171 | if ($offset === null) { |
|
| 271 | 171 | $offset = $this->offset; |
|
| 272 | } |
||
| 273 | |||
| 274 | 171 | if ($offset + $length > $this->length) { |
|
| 275 | 171 | return ''; |
|
| 276 | } |
||
| 277 | |||
| 278 | 171 | $cacheKey = $offset . ',' . $length; |
|
| 279 | |||
| 280 | 171 | if (!isset($this->substrings[$cacheKey . ',1'])) { |
|
| 281 | 171 | $this->substrings[$cacheKey . ',1'] = mb_substr($this->sql, $offset, $length, 'UTF-8'); |
|
| 282 | } |
||
| 283 | |||
| 284 | 171 | if (!$caseSensitive && !isset($this->substrings[$cacheKey . ',0'])) { |
|
| 285 | $this->substrings[$cacheKey . ',0'] = mb_strtoupper($this->substrings[$cacheKey . ',1'], 'UTF-8'); |
||
| 286 | } |
||
| 287 | |||
| 288 | 171 | return $this->substrings[$cacheKey . ',' . (int) $caseSensitive]; |
|
| 289 | } |
||
| 290 | |||
| 291 | /** |
||
| 292 | * Returns an index after the given string in the SQL code starting with the specified offset. |
||
| 293 | * |
||
| 294 | * @param string $string String to find. |
||
| 295 | * @param int|null $offset SQL code offset, defaults to current if `null` is passed. |
||
| 296 | * |
||
| 297 | * @return int Index after the given string or end of string index. |
||
| 298 | */ |
||
| 299 | 131 | protected function indexAfter(string $string, int $offset = null): int |
|
| 300 | { |
||
| 301 | 131 | if ($offset === null) { |
|
| 302 | 31 | $offset = $this->offset; |
|
| 303 | } |
||
| 304 | |||
| 305 | 131 | if ($offset + mb_strlen($string, 'UTF-8') > $this->length) { |
|
| 306 | return $this->length; |
||
| 307 | } |
||
| 308 | |||
| 309 | 131 | $afterIndexOf = mb_strpos($this->sql, $string, $offset, 'UTF-8'); |
|
| 310 | |||
| 311 | 131 | if ($afterIndexOf === false) { |
|
| 312 | $afterIndexOf = $this->length; |
||
| 313 | } else { |
||
| 314 | 131 | $afterIndexOf += mb_strlen($string, 'UTF-8'); |
|
| 315 | } |
||
| 316 | |||
| 317 | 131 | return $afterIndexOf; |
|
| 318 | } |
||
| 319 | |||
| 320 | /** |
||
| 321 | * Determines whether there is a delimited string at the current offset and adds it to the token children. |
||
| 322 | */ |
||
| 323 | 171 | private function tokenizeDelimitedString(int &$length): bool |
|
| 324 | { |
||
| 325 | 171 | $isIdentifier = $this->isIdentifier($length, $content); |
|
| 326 | 171 | $isStringLiteral = !$isIdentifier && $this->isStringLiteral($length, $content); |
|
| 327 | |||
| 328 | 171 | if (!$isIdentifier && !$isStringLiteral) { |
|
| 329 | 171 | return false; |
|
| 330 | } |
||
| 331 | |||
| 332 | 131 | $this->addTokenFromBuffer(); |
|
| 333 | |||
| 334 | 131 | $this->currentToken[] = (new SqlToken()) |
|
| 335 | 131 | ->type($isIdentifier ? SqlToken::TYPE_IDENTIFIER : SqlToken::TYPE_STRING_LITERAL) |
|
| 336 | 131 | ->content(is_string($content) ? $content : $this->substring($length)) |
|
| 337 | 131 | ->startOffset($this->offset) |
|
| 338 | 131 | ->endOffset($this->offset + $length); |
|
| 339 | |||
| 340 | 131 | return true; |
|
| 341 | } |
||
| 342 | |||
| 343 | /** |
||
| 344 | * Determines whether there is an operator at the current offset and adds it to the token children. |
||
| 345 | */ |
||
| 346 | 171 | private function tokenizeOperator(int &$length): bool |
|
| 347 | { |
||
| 348 | 171 | if (!$this->isOperator($length, $content)) { |
|
| 349 | 171 | return false; |
|
| 350 | } |
||
| 351 | |||
| 352 | 171 | $this->addTokenFromBuffer(); |
|
| 353 | |||
| 354 | 171 | switch ($this->substring($length)) { |
|
| 355 | 171 | case '(': |
|
| 356 | 171 | $this->currentToken[] = (new SqlToken()) |
|
| 357 | 171 | ->type(SqlToken::TYPE_OPERATOR) |
|
| 358 | 171 | ->content(is_string($content) ? $content : $this->substring($length)) |
|
| 359 | 171 | ->startOffset($this->offset) |
|
| 360 | 171 | ->endOffset($this->offset + $length); |
|
| 361 | 171 | $this->currentToken[] = (new SqlToken())->type(SqlToken::TYPE_PARENTHESIS); |
|
| 362 | |||
| 363 | 171 | if ($this->currentToken[-1] !== null) { |
|
| 364 | 171 | $this->tokenStack->push($this->currentToken[-1]); |
|
| 365 | } |
||
| 366 | |||
| 367 | 171 | $this->currentToken = $this->tokenStack->top(); |
|
| 368 | |||
| 369 | 171 | break; |
|
| 370 | |||
| 371 | 171 | case ')': |
|
| 372 | 171 | $this->tokenStack->pop(); |
|
| 373 | 171 | $this->currentToken = $this->tokenStack->top(); |
|
| 374 | 171 | $this->currentToken[] = (new SqlToken()) |
|
| 375 | 171 | ->type(SqlToken::TYPE_OPERATOR) |
|
| 376 | 171 | ->content(')') |
|
| 377 | 171 | ->startOffset($this->offset) |
|
| 378 | 171 | ->endOffset($this->offset + $length); |
|
| 379 | |||
| 380 | 171 | break; |
|
| 381 | 122 | case ';': |
|
| 382 | 13 | if ($this->currentToken instanceof SqlToken && !$this->currentToken->getHasChildren()) { |
|
| 383 | break; |
||
| 384 | } |
||
| 385 | |||
| 386 | 13 | $this->currentToken[] = (new SqlToken()) |
|
| 387 | 13 | ->type(SqlToken::TYPE_OPERATOR) |
|
| 388 | 13 | ->content(is_string($content) ? $content : $this->substring($length)) |
|
| 389 | 13 | ->startOffset($this->offset) |
|
| 390 | 13 | ->endOffset($this->offset + $length); |
|
| 391 | 13 | $this->tokenStack->pop(); |
|
| 392 | 13 | $this->currentToken = $this->tokenStack->top(); |
|
| 393 | 13 | $this->currentToken[] = (new SqlToken())->type(SqlToken::TYPE_STATEMENT); |
|
| 394 | |||
| 395 | 13 | if ($this->currentToken[-1] instanceof SqlToken) { |
|
| 396 | 13 | $this->tokenStack->push($this->currentToken[-1]); |
|
| 397 | } |
||
| 398 | |||
| 399 | 13 | $this->currentToken = $this->tokenStack->top(); |
|
| 400 | |||
| 401 | 13 | break; |
|
| 402 | default: |
||
| 403 | 122 | $this->currentToken[] = (new SqlToken()) |
|
| 404 | 122 | ->type(SqlToken::TYPE_OPERATOR) |
|
| 405 | 122 | ->content(is_string($content) ? $content : $this->substring($length)) |
|
| 406 | 122 | ->startOffset($this->offset) |
|
| 407 | 122 | ->endOffset($this->offset + $length); |
|
| 408 | |||
| 409 | 122 | break; |
|
| 410 | } |
||
| 411 | |||
| 412 | 171 | return true; |
|
| 413 | } |
||
| 414 | |||
| 415 | /** |
||
| 416 | * Determines a type of text in the buffer, tokenizes it and adds it to the token children. |
||
| 417 | */ |
||
| 418 | 171 | private function addTokenFromBuffer(): void |
|
| 419 | { |
||
| 420 | 171 | if ($this->buffer === '') { |
|
| 421 | 171 | return; |
|
| 422 | } |
||
| 423 | |||
| 424 | 171 | $isKeyword = $this->isKeyword($this->buffer, $content); |
|
| 425 | |||
| 426 | 171 | $this->currentToken[] = (new SqlToken()) |
|
| 427 | 171 | ->type($isKeyword ? SqlToken::TYPE_KEYWORD : SqlToken::TYPE_TOKEN) |
|
| 428 | 171 | ->content(is_string($content) ? $content : $this->buffer) |
|
| 429 | 171 | ->startOffset($this->offset - mb_strlen($this->buffer, 'UTF-8')) |
|
| 430 | 171 | ->endOffset($this->offset); |
|
| 431 | |||
| 432 | 171 | $this->buffer = ''; |
|
| 433 | } |
||
| 434 | |||
| 435 | /** |
||
| 436 | * Adds the specified length to the current offset. |
||
| 437 | * |
||
| 438 | * @throws InvalidArgumentException If the length is less than or equal to 0. |
||
| 439 | */ |
||
| 440 | 171 | private function advance(int $length): void |
|
| 441 | { |
||
| 442 | 171 | if ($length <= 0) { |
|
| 443 | throw new InvalidArgumentException('Length must be greater than 0.'); |
||
| 444 | } |
||
| 445 | |||
| 446 | 171 | $this->offset += $length; |
|
| 447 | 171 | $this->substrings = []; |
|
| 448 | } |
||
| 449 | |||
| 450 | /** |
||
| 451 | * Returns whether the SQL code is completely traversed. |
||
| 452 | */ |
||
| 453 | 171 | private function isEof(): bool |
|
| 454 | { |
||
| 455 | 171 | return $this->offset >= $this->length; |
|
| 456 | } |
||
| 457 | } |
||
| 458 |
This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.
This is most likely a typographical error or the method has been renamed.