Passed
Pull Request — master (#505)
by
unknown
04:43 queued 01:55
created

Lexer::parseBool()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 26
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 13
nc 5
nop 0
dl 0
loc 26
ccs 14
cts 14
cp 1
crap 5
rs 9.5222
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
8
9
use function in_array;
10
use function mb_strlen;
11
use function sprintf;
12
use function str_ends_with;
13
use function strlen;
14
use function substr;
15
16
/**
17
 * Defines the lexer of the library.
18
 *
19
 * This is one of the most important components, along with the parser.
20
 *
21
 * Depends on context to extract lexemes.
22
 *
23
 * Performs lexical analysis over a SQL statement and splits it in multiple tokens.
24
 *
25
 * The output of the lexer is affected by the context of the SQL statement.
26
 *
27
 * @see Context
28
 */
29
class Lexer extends Core
30
{
31
    /**
32
     * A list of methods that are used in lexing the SQL query.
33
     */
34
    private const PARSER_METHODS = [
35
        // It is best to put the parsers in order of their complexity
36
        // (ascending) and their occurrence rate (descending).
37
        //
38
        // Conflicts:
39
        //
40
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
41
        // They fight over delimiter. The delimiter may be a keyword, a
42
        // number or almost any character which makes the delimiter one of
43
        // the first tokens that must be parsed.
44
        //
45
        // 1. `parseNumber` and `parseOperator`
46
        // They fight over `+` and `-`.
47
        //
48
        // 2. `parseComment` and `parseOperator`
49
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
50
        //
51
        // 3. `parseBool` and `parseKeyword`
52
        // They fight over `TRUE` and `FALSE`.
53
        //
54
        // 4. `parseKeyword` and `parseUnknown`
55
        // They fight over words. `parseUnknown` does not know about
56
        // keywords.
57
58
        'parseDelimiter',
59
        'parseWhitespace',
60
        'parseNumber',
61
        'parseComment',
62
        'parseOperator',
63
        'parseBool',
64
        'parseString',
65
        'parseSymbol',
66
        'parseKeyword',
67
        'parseLabel',
68
        'parseUnknown',
69
    ];
70
71
    /**
72
     * A list of keywords that indicate that the function keyword
73
     * is not used as a function
74
     */
75
    private const KEYWORD_NAME_INDICATORS = [
76
        'FROM',
77
        'SET',
78
        'WHERE',
79
    ];
80
81
    /**
82
     * A list of operators that indicate that the function keyword
83
     * is not used as a function
84
     */
85
    private const OPERATOR_NAME_INDICATORS = [
86
        ',',
87
        '.',
88
    ];
89
90
    /**
91
     * The string to be parsed.
92
     *
93
     * @var string|UtfString
94
     */
95
    public $str = '';
96
97
    /**
98
     * The length of `$str`.
99
     *
100
     * By storing its length, a lot of time is saved, because parsing methods
101
     * would call `strlen` everytime.
102
     *
103
     * @var int
104
     */
105
    public $len = 0;
106
107
    /**
108
     * The index of the last parsed character.
109
     *
110
     * @var int
111
     */
112
    public $last = 0;
113
114
    /**
115
     * Tokens extracted from given strings.
116
     *
117
     * @var TokensList
118
     */
119
    public $list;
120
121
    /**
122
     * The default delimiter. This is used, by default, in all new instances.
123
     *
124
     * @var string
125
     */
126
    public static $defaultDelimiter = ';';
127
128
    /**
129
     * Statements delimiter.
130
     * This may change during lexing.
131
     *
132
     * @var string
133
     */
134
    public $delimiter;
135
136
    /**
137
     * The length of the delimiter.
138
     *
139
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
140
     * calls to `strlen`, which might affect performance when the delimiter is
141
     * big.
142
     *
143
     * @var int
144
     */
145
    public $delimiterLen;
146
147
    /**
148
     * Gets the tokens list parsed by a new instance of a lexer.
149
     *
150
     * @param string|UtfString $str       the query to be lexed
151
     * @param bool             $strict    whether strict mode should be
152
     *                                    enabled or not
153
     * @param string           $delimiter the delimiter to be used
154
     */
155 2
    public static function getTokens($str, $strict = false, $delimiter = null): TokensList
156
    {
157 2
        $lexer = new self($str, $strict, $delimiter);
158
159 2
        return $lexer->list;
160
    }
161
162
    /**
163
     * @param string|UtfString $str       the query to be lexed
164
     * @param bool             $strict    whether strict mode should be
165
     *                                    enabled or not
166
     * @param string           $delimiter the delimiter to be used
167
     */
168 1418
    public function __construct($str, $strict = false, $delimiter = null)
169
    {
170 1418
        parent::__construct();
171
172
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
173
        // parse each byte of the input.
174 1418
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
175
176
        // For multi-byte strings, a new instance of `UtfString` is initialized.
177 1418
        if (! $str instanceof UtfString && $len !== mb_strlen($str, 'UTF-8')) {
178 10
            $str = new UtfString($str);
179
        }
180
181 1418
        $this->str = $str;
182 1418
        $this->len = $str instanceof UtfString ? $str->length() : $len;
183
184 1418
        $this->strict = $strict;
185
186
        // Setting the delimiter.
187 1418
        $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$defaultDelimiter);
188
189 1418
        $this->lex();
190
    }
191
192
    /**
193
     * Sets the delimiter.
194
     *
195
     * @param string $delimiter the new delimiter
196
     */
197 1418
    public function setDelimiter($delimiter): void
198
    {
199 1418
        $this->delimiter = $delimiter;
200 1418
        $this->delimiterLen = strlen($delimiter);
201
    }
202
203
    /**
204
     * Parses the string and extracts lexemes.
205
     */
206 1418
    public function lex(): void
207
    {
208
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
209
        // is* functions. For a better performance, some rules can be deduced
210
        // from context.
211
        // For example, in `parseBool` there is no need to compare the token
212
        // every time with `true` and `false`. The first step would be to
213
        // compare with 'true' only and just after that add another letter from
214
        // context and compare again with `false`.
215
        // Another example is `parseComment`.
216
217 1418
        $list = new TokensList();
218
219
        /**
220
         * Last processed token.
221
         *
222
         * @var Token
223
         */
224 1418
        $lastToken = null;
225
226 1418
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
227
            /**
228
             * The new token.
229
             *
230
             * @var Token
231
             */
232 1408
            $token = null;
233
234 1408
            foreach (self::PARSER_METHODS as $method) {
235 1408
                $token = $this->$method();
236
237 1408
                if ($token) {
238 1408
                    break;
239
                }
240
            }
241
242 1408
            if ($token === null) {
243
                // @assert($this->last === $lastIdx);
244 4
                $token = new Token($this->str[$this->last]);
245 4
                $this->error('Unexpected character.', $this->str[$this->last], $this->last);
246
            } elseif (
247 1408
                $lastToken !== null
248 1408
                && $token->type === Token::TYPE_SYMBOL
249 1408
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
250
                && (
251 1408
                    $lastToken->type === Token::TYPE_STRING
252 1408
                    || (
253 1408
                        $lastToken->type === Token::TYPE_SYMBOL
254 1408
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
255 1408
                    )
256
                )
257
            ) {
258
                // Handles ```... FROM 'user'@'%' ...```.
259 46
                $lastToken->token .= $token->token;
260 46
                $lastToken->type = Token::TYPE_SYMBOL;
261 46
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
262 46
                $lastToken->value .= '@' . $token->value;
263 46
                continue;
264
            } elseif (
265 1408
                $lastToken !== null
266 1408
                && $token->type === Token::TYPE_KEYWORD
267 1408
                && $lastToken->type === Token::TYPE_OPERATOR
268 1408
                && $lastToken->value === '.'
269
            ) {
270
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
271
                // a reserved word.
272 30
                $token->type = Token::TYPE_NONE;
273 30
                $token->flags = 0;
274 30
                $token->value = $token->token;
275
            }
276
277 1408
            $token->position = $lastIdx;
278
279 1408
            $list->tokens[$list->count++] = $token;
280
281
            // Handling delimiters.
282 1408
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
283 36
                if ($this->last + 1 >= $this->len) {
284 2
                    $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1);
285 2
                    continue;
286
                }
287
288
                // Skipping last R (from `delimiteR`) and whitespaces between
289
                // the keyword `DELIMITER` and the actual delimiter.
290 34
                $pos = ++$this->last;
291 34
                $token = $this->parseWhitespace();
292
293 34
                if ($token !== null) {
294 32
                    $token->position = $pos;
295 32
                    $list->tokens[$list->count++] = $token;
296
                }
297
298
                // Preparing the token that holds the new delimiter.
299 34
                if ($this->last + 1 >= $this->len) {
300 2
                    $this->error('Expected delimiter.', '', $this->last + 1);
301 2
                    continue;
302
                }
303
304 32
                $pos = $this->last + 1;
305
306
                // Parsing the delimiter.
307 32
                $this->delimiter = null;
308 32
                $delimiterLen = 0;
309
                while (
310 32
                    ++$this->last < $this->len
311 32
                    && ! Context::isWhitespace($this->str[$this->last])
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

311
                    && ! Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])
Loading history...
312 32
                    && $delimiterLen < 15
313
                ) {
314 30
                    $this->delimiter .= $this->str[$this->last];
315 30
                    ++$delimiterLen;
316
                }
317
318 32
                if (empty($this->delimiter)) {
319 2
                    $this->error('Expected delimiter.', '', $this->last);
320 2
                    $this->delimiter = ';';
321
                }
322
323 32
                --$this->last;
324
325
                // Saving the delimiter and its token.
326 32
                $this->delimiterLen = strlen($this->delimiter);
327 32
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
328 32
                $token->position = $pos;
329 32
                $list->tokens[$list->count++] = $token;
330
            }
331
332 1404
            $lastToken = $token;
333
        }
334
335
        // Adding a final delimiter to mark the ending.
336 1418
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
337
338
        // Saving the tokens list.
339 1418
        $this->list = $list;
340
341 1418
        $this->solveAmbiguityOnStarOperator();
342 1418
        $this->solveAmbiguityOnFunctionKeywords();
343
    }
344
345
    /**
346
     * Resolves the ambiguity when dealing with the "*" operator.
347
     *
348
     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
349
     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
350
     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
351
     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
352
     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
353
     * - "," (a comma separator like in "SELECT *, field FROM...");
354
     * - ")" (a closing parenthesis like in "COUNT(*)").
355
     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
356
     * default flag (arithmetic) will be kept.
357
     */
358 1418
    private function solveAmbiguityOnStarOperator(): void
359
    {
360 1418
        $iBak = $this->list->idx;
361 1418
        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
362
            // getNext() already gets rid of whitespaces and comments.
363 200
            $next = $this->list->getNext();
364
365 200
            if ($next === null) {
366
                continue;
367
            }
368
369
            if (
370 200
                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
371 200
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
372
            ) {
373 16
                continue;
374
            }
375
376 186
            $starToken->flags = Token::FLAG_OPERATOR_SQL;
377
        }
378
379 1418
        $this->list->idx = $iBak;
380
    }
381
382
    /**
383
     * Resolves the ambiguity when dealing with the functions keywords.
384
     *
385
     * In SQL statements, the function keywords might be used as table names or columns names.
386
     * To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
387
     * comments, right after the function keyword position. The function keyword is for sure used
388
     * as column name or table name if the next token found is any of:
389
     *
390
     * - "FROM" (the FROM keyword like in "SELECT Country x, AverageSalary avg FROM...");
391
     * - "WHERE" (the WHERE keyword like in "DELETE FROM emp x WHERE x.salary = 20");
392
     * - "SET" (the SET keyword like in "UPDATE Country x, City y set x.Name=x.Name");
393
     * - "," (a comma separator like 'x,' in "UPDATE Country x, City y set x.Name=x.Name");
394
     * - "." (a dot separator like in "x.asset_id FROM (SELECT evt.asset_id FROM evt)".
395
     * - "NULL" (when used as a table alias like in "avg.col FROM (SELECT ev.col FROM ev) avg").
396
     *
397
     * This method will change the flag of the function keyword tokens when any of those
398
     * condition above is true. Otherwise, the
399
     * default flag (function keyword) will be kept.
400
     */
401 1418
    private function solveAmbiguityOnFunctionKeywords(): void
402
    {
403 1418
        $iBak = $this->list->idx;
404 1418
        $keywordFunction = Token::TYPE_KEYWORD | Token::FLAG_KEYWORD_FUNCTION;
405 1418
        while (($keywordToken = $this->list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, $keywordFunction)) !== null) {
406 214
            $next = $this->list->getNext();
407
            if (
408 214
                ($next->type !== Token::TYPE_KEYWORD
409 214
                    || ! in_array($next->value, self::KEYWORD_NAME_INDICATORS, true)
410
                )
411 214
                && ($next->type !== Token::TYPE_OPERATOR
412 214
                    || ! in_array($next->value, self::OPERATOR_NAME_INDICATORS, true)
413
                )
414 214
                && ($next->value !== null)
415
            ) {
416 204
                continue;
417
            }
418
419 12
            $keywordToken->type = Token::TYPE_NONE;
420 12
            $keywordToken->flags = Token::TYPE_NONE;
421 12
            $keywordToken->keyword = $keywordToken->value;
422
        }
423
424 1418
        $this->list->idx = $iBak;
425
    }
426
427
    /**
428
     * Creates a new error log.
429
     *
430
     * @param string $msg  the error message
431
     * @param string $str  the character that produced the error
432
     * @param int    $pos  the position of the character
433
     * @param int    $code the code of the error
434
     *
435
     * @throws LexerException throws the exception, if strict mode is enabled.
436
     */
437 34
    public function error($msg, $str = '', $pos = 0, $code = 0): void
438
    {
439 34
        $error = new LexerException(
440 34
            Translator::gettext($msg),
441 34
            $str,
442 34
            $pos,
443 34
            $code
444 34
        );
445 34
        parent::error($error);
446
    }
447
448
    /**
449
     * Parses a keyword.
450
     */
451 1390
    public function parseKeyword(): Token|null
452
    {
453 1390
        $token = '';
454
455
        /**
456
         * Value to be returned.
457
         *
458
         * @var Token
459
         */
460 1390
        $ret = null;
461
462
        /**
463
         * The value of `$this->last` where `$token` ends in `$this->str`.
464
         */
465 1390
        $iEnd = $this->last;
466
467
        /**
468
         * Whether last parsed character is a whitespace.
469
         *
470
         * @var bool
471
         */
472 1390
        $lastSpace = false;
473
474 1390
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
475
            // Composed keywords shouldn't have more than one whitespace between
476
            // keywords.
477 1390
            if (Context::isWhitespace($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

477
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
478 1364
                if ($lastSpace) {
479 264
                    --$j; // The size of the keyword didn't increase.
480 264
                    continue;
481
                }
482
483 1364
                $lastSpace = true;
484
            } else {
485 1390
                $lastSpace = false;
486
            }
487
488 1390
            $token .= $this->str[$this->last];
489 1390
            $flags = Context::isKeyword($token);
490
491 1390
            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last + 1] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

491
            if (($this->last + 1 !== $this->len && ! Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last + 1])) || ! $flags) {
Loading history...
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
492 1390
                continue;
493
            }
494
495 1356
            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
496 1356
            $iEnd = $this->last;
497
498
            // We don't break so we find longest keyword.
499
            // For example, `OR` and `ORDER` have a common prefix `OR`.
500
            // If we stopped at `OR`, the parsing would be invalid.
501
        }
502
503 1390
        $this->last = $iEnd;
504
505 1390
        return $ret;
506
    }
507
508
    /**
509
     * Parses a label.
510
     */
511 1050
    public function parseLabel(): Token|null
512
    {
513 1050
        $token = '';
514
515
        /**
516
         * Value to be returned.
517
         *
518
         * @var Token
519
         */
520 1050
        $ret = null;
521
522
        /**
523
         * The value of `$this->last` where `$token` ends in `$this->str`.
524
         */
525 1050
        $iEnd = $this->last;
526 1050
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
527 1050
            if ($this->str[$this->last] === ':' && $j > 1) {
528
                // End of label
529 4
                $token .= $this->str[$this->last];
530 4
                $ret = new Token($token, Token::TYPE_LABEL);
531 4
                $iEnd = $this->last;
532 4
                break;
533
            }
534
535 1050
            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

535
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last]) && $j > 1) {
Loading history...
536
                // Whitespace between label and :
537
                // The size of the keyword didn't increase.
538 818
                --$j;
539 1050
            } elseif (Context::isSeparator($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

539
            } elseif (Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
540
                // Any other separator
541 800
                break;
542
            }
543
544 1048
            $token .= $this->str[$this->last];
545
        }
546
547 1050
        $this->last = $iEnd;
548
549 1050
        return $ret;
550
    }
551
552
    /**
553
     * Parses an operator.
554
     */
555 1408
    public function parseOperator(): Token|null
556
    {
557 1408
        $token = '';
558
559
        /**
560
         * Value to be returned.
561
         *
562
         * @var Token
563
         */
564 1408
        $ret = null;
565
566
        /**
567
         * The value of `$this->last` where `$token` ends in `$this->str`.
568
         */
569 1408
        $iEnd = $this->last;
570
571 1408
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
572 1408
            $token .= $this->str[$this->last];
573 1408
            $flags = Context::isOperator($token);
574
575 1408
            if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
576 1404
                continue;
577
            }
578
579 1002
            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
580 1002
            $iEnd = $this->last;
581
        }
582
583 1408
        $this->last = $iEnd;
584
585 1408
        return $ret;
586
    }
587
588
    /**
589
     * Parses a whitespace.
590
     */
591 1408
    public function parseWhitespace(): Token|null
592
    {
593 1408
        $token = $this->str[$this->last];
594
595 1408
        if (! Context::isWhitespace($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

595
        if (! Context::isWhitespace(/** @scrutinizer ignore-type */ $token)) {
Loading history...
596 1408
            return null;
597
        }
598
599 1380
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
600 268
            $token .= $this->str[$this->last];
601
        }
602
603 1380
        --$this->last;
604
605 1380
        return new Token($token, Token::TYPE_WHITESPACE);
606
    }
607
608
    /**
609
     * Parses a comment.
610
     */
611 1408
    public function parseComment(): Token|null
612
    {
613 1408
        $iBak = $this->last;
614 1408
        $token = $this->str[$this->last];
615
616
        // Bash style comments. (#comment\n)
617 1408
        if (Context::isComment($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isComment() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

617
        if (Context::isComment(/** @scrutinizer ignore-type */ $token)) {
Loading history...
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
618 6
            while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
619 6
                $token .= $this->str[$this->last];
620
            }
621
622
            // Include trailing \n as whitespace token
623 6
            if ($this->last < $this->len) {
624 6
                --$this->last;
625
            }
626
627 6
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
628
        }
629
630
        // C style comments. (/*comment*\/)
631 1408
        if (++$this->last < $this->len) {
632 1404
            $token .= $this->str[$this->last];
633 1404
            if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
634
                // There might be a conflict with "*" operator here, when string is "*/*".
635
                // This can occurs in the following statements:
636
                // - "SELECT */* comment */ FROM ..."
637
                // - "SELECT 2*/* comment */3 AS `six`;"
638 100
                $next = $this->last + 1;
639 100
                if (($next < $this->len) && $this->str[$next] === '*') {
640
                    // Conflict in "*/*": first "*" was not for ending a comment.
641
                    // Stop here and let other parsing method define the true behavior of that first star.
642 2
                    $this->last = $iBak;
643
644 2
                    return null;
645
                }
646
647 100
                $flags = Token::FLAG_COMMENT_C;
648
649
                // This comment already ended. It may be a part of a
650
                // previous MySQL specific command.
651 100
                if ($token === '*/') {
652 34
                    return new Token($token, Token::TYPE_COMMENT, $flags);
653
                }
654
655
                // Checking if this is a MySQL-specific command.
656 100
                if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') {
657 34
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
658 34
                    $token .= $this->str[++$this->last];
659
660
                    while (
661 34
                        ++$this->last < $this->len
662 34
                        && $this->str[$this->last] >= '0'
663 34
                        && $this->str[$this->last] <= '9'
664
                    ) {
665 32
                        $token .= $this->str[$this->last];
666
                    }
667
668 34
                    --$this->last;
669
670
                    // We split this comment and parse only its beginning
671
                    // here.
672 34
                    return new Token($token, Token::TYPE_COMMENT, $flags);
673
                }
674
675
                // Parsing the comment.
676
                while (
677 70
                    ++$this->last < $this->len
678 70
                    && (
679 70
                        $this->str[$this->last - 1] !== '*'
680 70
                        || $this->str[$this->last] !== '/'
681 70
                    )
682
                ) {
683 70
                    $token .= $this->str[$this->last];
684
                }
685
686
                // Adding the ending.
687 70
                if ($this->last < $this->len) {
688 70
                    $token .= $this->str[$this->last];
689
                }
690
691 70
                return new Token($token, Token::TYPE_COMMENT, $flags);
692
            }
693
        }
694
695
        // SQL style comments. (-- comment\n)
696 1408
        if (++$this->last < $this->len) {
697 1402
            $token .= $this->str[$this->last];
698 1402
            $end = false;
699
        } else {
700 410
            --$this->last;
701 410
            $end = true;
702
        }
703
704 1408
        if (Context::isComment($token, $end)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Con...isComment($token, $end) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
705
            // Checking if this comment did not end already (```--\n```).
706 70
            if ($this->str[$this->last] !== "\n") {
707 70
                while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
708 70
                    $token .= $this->str[$this->last];
709
                }
710
            }
711
712
            // Include trailing \n as whitespace token
713 70
            if ($this->last < $this->len) {
714 62
                --$this->last;
715
            }
716
717 70
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
718
        }
719
720 1408
        $this->last = $iBak;
721
722 1408
        return null;
723
    }
724
725
    /**
726
     * Parses a boolean.
727
     */
728 1392
    public function parseBool(): Token|null
729
    {
730 1392
        if ($this->last + 3 >= $this->len) {
731
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
732
            // required.
733 306
            return null;
734
        }
735
736 1392
        $iBak = $this->last;
737 1392
        $token = $this->str[$this->last] . $this->str[++$this->last]
738 1392
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
739
740 1392
        if (Context::isBool($token)) {
741 4
            return new Token($token, Token::TYPE_BOOL);
742
        }
743
744 1392
        if (++$this->last < $this->len) {
745 1390
            $token .= $this->str[$this->last]; // fals_E_
746 1390
            if (Context::isBool($token)) {
747 6
                return new Token($token, Token::TYPE_BOOL, 1);
748
            }
749
        }
750
751 1392
        $this->last = $iBak;
752
753 1392
        return null;
754
    }
755
756
    /**
757
     * Parses a number.
758
     */
759 1408
    public function parseNumber(): Token|null
760
    {
761
        // A rudimentary state machine is being used to parse numbers due to
762
        // the various forms of their notation.
763
        //
764
        // Below are the states of the machines and the conditions to change
765
        // the state.
766
        //
767
        //      1 --------------------[ + or - ]-------------------> 1
768
        //      1 -------------------[ 0x or 0X ]------------------> 2
769
        //      1 --------------------[ 0 to 9 ]-------------------> 3
770
        //      1 -----------------------[ . ]---------------------> 4
771
        //      1 -----------------------[ b ]---------------------> 7
772
        //
773
        //      2 --------------------[ 0 to F ]-------------------> 2
774
        //
775
        //      3 --------------------[ 0 to 9 ]-------------------> 3
776
        //      3 -----------------------[ . ]---------------------> 4
777
        //      3 --------------------[ e or E ]-------------------> 5
778
        //
779
        //      4 --------------------[ 0 to 9 ]-------------------> 4
780
        //      4 --------------------[ e or E ]-------------------> 5
781
        //
782
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
783
        //
784
        //      7 -----------------------[ ' ]---------------------> 8
785
        //
786
        //      8 --------------------[ 0 or 1 ]-------------------> 8
787
        //      8 -----------------------[ ' ]---------------------> 9
788
        //
789
        // State 1 may be reached by negative numbers.
790
        // State 2 is reached only by hex numbers.
791
        // State 4 is reached only by float numbers.
792
        // State 5 is reached only by numbers in approximate form.
793
        // State 7 is reached only by numbers in bit representation.
794
        //
795
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
796
        // state other than these is invalid.
797
        // Also, negative states are invalid states.
798 1408
        $iBak = $this->last;
799 1408
        $token = '';
800 1408
        $flags = 0;
801 1408
        $state = 1;
802 1408
        for (; $this->last < $this->len; ++$this->last) {
803 1408
            if ($state === 1) {
804 1408
                if ($this->str[$this->last] === '-') {
805 70
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
806
                } elseif (
807 1408
                    $this->last + 1 < $this->len
808 1408
                    && $this->str[$this->last] === '0'
809
                    && (
810 1408
                        $this->str[$this->last + 1] === 'x'
811 1408
                        || $this->str[$this->last + 1] === 'X'
812
                    )
813
                ) {
814 4
                    $token .= $this->str[$this->last++];
815 4
                    $state = 2;
816 1408
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
817 626
                    $state = 3;
818 1408
                } elseif ($this->str[$this->last] === '.') {
819 220
                    $state = 4;
820 1408
                } elseif ($this->str[$this->last] === 'b') {
821 110
                    $state = 7;
822 1408
                } elseif ($this->str[$this->last] !== '+') {
823
                    // `+` is a valid character in a number.
824 1408
                    break;
825
                }
826 726
            } elseif ($state === 2) {
827 4
                $flags |= Token::FLAG_NUMBER_HEX;
828
                if (
829
                    ! (
830 4
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
831 4
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
832 4
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
833
                    )
834
                ) {
835 4
                    break;
836
                }
837 726
            } elseif ($state === 3) {
838 566
                if ($this->str[$this->last] === '.') {
839 12
                    $state = 4;
840 564
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
841 2
                    $state = 5;
842
                } elseif (
843 564
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
844 564
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
845
                ) {
846
                    // A number can't be directly followed by a letter
847 6
                    $state = -$state;
848 562
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
849
                    // Just digits and `.`, `e` and `E` are valid characters.
850 552
                    break;
851
                }
852 314
            } elseif ($state === 4) {
853 230
                $flags |= Token::FLAG_NUMBER_FLOAT;
854 230
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
855 14
                    $state = 5;
856
                } elseif (
857 230
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
858 230
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
859
                ) {
860
                    // A number can't be directly followed by a letter
861 172
                    $state = -$state;
862 90
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
863
                    // Just digits, `e` and `E` are valid characters.
864 88
                    break;
865
                }
866 264
            } elseif ($state === 5) {
867 14
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
868
                if (
869 14
                    $this->str[$this->last] === '+' || $this->str[$this->last] === '-'
870 14
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
871
                ) {
872 2
                    $state = 6;
873
                } elseif (
874 14
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
875 14
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
876
                ) {
877
                    // A number can't be directly followed by a letter
878 14
                    $state = -$state;
879
                } else {
880
                    break;
881
                }
882 264
            } elseif ($state === 6) {
883 2
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
884
                    // Just digits are valid characters.
885 2
                    break;
886
                }
887 264
            } elseif ($state === 7) {
888 106
                $flags |= Token::FLAG_NUMBER_BINARY;
889 106
                if ($this->str[$this->last] !== '\'') {
890 104
                    break;
891
                }
892
893 2
                $state = 8;
894 178
            } elseif ($state === 8) {
895 2
                if ($this->str[$this->last] === '\'') {
896 2
                    $state = 9;
897 2
                } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
898 2
                    break;
899
                }
900 178
            } elseif ($state === 9) {
901 2
                break;
902
            }
903
904 810
            $token .= $this->str[$this->last];
905
        }
906
907 1408
        if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) {
908 626
            --$this->last;
909
910 626
            return new Token($token, Token::TYPE_NUMBER, $flags);
911
        }
912
913 1408
        $this->last = $iBak;
914
915 1408
        return null;
916
    }
917
918
    /**
919
     * Parses a string.
920
     *
921
     * @param string $quote additional starting symbol
922
     *
923
     * @throws LexerException
924
     */
925 1392
    public function parseString($quote = ''): Token|null
926
    {
927 1392
        $token = $this->str[$this->last];
928 1392
        $flags = Context::isString($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isString() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

928
        $flags = Context::isString(/** @scrutinizer ignore-type */ $token);
Loading history...
929
930 1392
        if (! $flags && $token !== $quote) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
931 1392
            return null;
932
        }
933
934 686
        $quote = $token;
935
936 686
        while (++$this->last < $this->len) {
937
            if (
938 686
                $this->last + 1 < $this->len
939
                && (
940 686
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
941 686
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
942
                )
943
            ) {
944 30
                $token .= $this->str[$this->last] . $this->str[++$this->last];
945
            } else {
946 686
                if ($this->str[$this->last] === $quote) {
947 682
                    break;
948
                }
949
950 680
                $token .= $this->str[$this->last];
951
            }
952
        }
953
954 686
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
955 14
            $this->error(
956 14
                sprintf(
957 14
                    Translator::gettext('Ending quote %1$s was expected.'),
958 14
                    $quote
959 14
                ),
960 14
                '',
961 14
                $this->last
962 14
            );
963
        } else {
964 682
            $token .= $this->str[$this->last];
965
        }
966
967 686
        return new Token($token, Token::TYPE_STRING, $flags);
968
    }
969
970
    /**
971
     * Parses a symbol.
972
     *
973
     * @throws LexerException
974
     */
975 1392
    public function parseSymbol(): Token|null
976
    {
977 1392
        $token = $this->str[$this->last];
978 1392
        $flags = Context::isSymbol($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSymbol() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

978
        $flags = Context::isSymbol(/** @scrutinizer ignore-type */ $token);
Loading history...
979
980 1392
        if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
981 1390
            return null;
982
        }
983
984 452
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
985 122
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
986
                // This is a system variable (e.g. `@@hostname`).
987 26
                $token .= $this->str[$this->last++];
988 26
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
989
            }
990 362
        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
991 6
            if ($token !== '?' && $this->last + 1 < $this->len) {
992 4
                ++$this->last;
993
            }
994
        } else {
995 358
            $token = '';
996
        }
997
998 452
        $str = null;
999
1000 452
        if ($this->last < $this->len) {
1001 452
            $str = $this->parseString('`');
1002
1003 452
            if ($str === null) {
1004 88
                $str = $this->parseUnknown();
1005
1006 88
                if ($str === null) {
1007 6
                    $this->error('Variable name was expected.', $this->str[$this->last], $this->last);
1008
                }
1009
            }
1010
        }
1011
1012 452
        if ($str !== null) {
1013 448
            $token .= $str->token;
1014
        }
1015
1016 452
        return new Token($token, Token::TYPE_SYMBOL, $flags);
1017
    }
1018
1019
    /**
1020
     * Parses unknown parts of the query.
1021
     */
1022 1074
    public function parseUnknown(): Token|null
1023
    {
1024 1074
        $token = $this->str[$this->last];
1025 1074
        if (Context::isSeparator($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1025
        if (Context::isSeparator(/** @scrutinizer ignore-type */ $token)) {
Loading history...
1026 10
            return null;
1027
        }
1028
1029 1072
        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1030 1040
            $token .= $this->str[$this->last];
1031
1032
            // Test if end of token equals the current delimiter. If so, remove it from the token.
1033 1040
            if (str_ends_with($token, $this->delimiter)) {
1034 4
                $token = substr($token, 0, -$this->delimiterLen);
1035 4
                $this->last -= $this->delimiterLen - 1;
1036 4
                break;
1037
            }
1038
        }
1039
1040 1072
        --$this->last;
1041
1042 1072
        return new Token($token);
1043
    }
1044
1045
    /**
1046
     * Parses the delimiter of the query.
1047
     */
1048 1408
    public function parseDelimiter(): Token|null
1049
    {
1050 1408
        $idx = 0;
1051
1052 1408
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1053 1408
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1054 1408
                return null;
1055
            }
1056
1057 566
            ++$idx;
1058
        }
1059
1060 566
        $this->last += $this->delimiterLen - 1;
1061
1062 566
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1063
    }
1064
}
1065