Passed
Pull Request — master (#504)
by
unknown
02:50
created

Lexer::getTokens()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 3
dl 0
loc 5
ccs 3
cts 3
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
8
9
use function in_array;
10
use function mb_strlen;
11
use function sprintf;
12
use function str_ends_with;
13
use function strlen;
14
use function substr;
15
16
/**
17
 * Defines the lexer of the library.
18
 *
19
 * This is one of the most important components, along with the parser.
20
 *
21
 * Depends on context to extract lexemes.
22
 *
23
 * Performs lexical analysis over a SQL statement and splits it in multiple tokens.
24
 *
25
 * The output of the lexer is affected by the context of the SQL statement.
26
 *
27
 * @see Context
28
 */
29
class Lexer extends Core
30
{
31
    /**
32
     * A list of methods that are used in lexing the SQL query.
33
     */
34
    private const PARSER_METHODS = [
35
        // It is best to put the parsers in order of their complexity
36
        // (ascending) and their occurrence rate (descending).
37
        //
38
        // Conflicts:
39
        //
40
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
41
        // They fight over delimiter. The delimiter may be a keyword, a
42
        // number or almost any character which makes the delimiter one of
43
        // the first tokens that must be parsed.
44
        //
45
        // 1. `parseNumber` and `parseOperator`
46
        // They fight over `+` and `-`.
47
        //
48
        // 2. `parseComment` and `parseOperator`
49
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
50
        //
51
        // 3. `parseBool` and `parseKeyword`
52
        // They fight over `TRUE` and `FALSE`.
53
        //
54
        // 4. `parseKeyword` and `parseUnknown`
55
        // They fight over words. `parseUnknown` does not know about
56
        // keywords.
57
58
        'parseDelimiter',
59
        'parseWhitespace',
60
        'parseNumber',
61
        'parseComment',
62
        'parseOperator',
63
        'parseBool',
64
        'parseString',
65
        'parseSymbol',
66
        'parseKeyword',
67
        'parseLabel',
68
        'parseUnknown',
69
    ];
70
71
    /**
72
     * A list of keywords that indicate that the function keyword
73
     * is not used as a function
74
     */
75
    private const KEYWORD_NAME_INDICATORS = [
76
        'FROM',
77
        'SET',
78
        'WHERE',
79
    ];
80
81
    /**
82
     * A list of operators that indicate that the function keyword
83
     * is not used as a function
84
     */
85
    private const OPERATOR_NAME_INDICATORS = [
86
        ',',
87
        '.',
88
    ];
89
90
    /**
91
     * The string to be parsed.
92
     *
93
     * @var string|UtfString
94
     */
95
    public $str = '';
96
97
    /**
98
     * The length of `$str`.
99
     *
100
     * By storing its length, a lot of time is saved, because parsing methods
101
     * would call `strlen` everytime.
102
     *
103
     * @var int
104
     */
105
    public $len = 0;
106
107
    /**
108
     * The index of the last parsed character.
109
     *
110
     * @var int
111
     */
112
    public $last = 0;
113
114
    /**
115
     * Tokens extracted from given strings.
116
     *
117
     * @var TokensList
118
     */
119
    public $list;
120
121
    /**
122
     * The default delimiter. This is used, by default, in all new instances.
123
     *
124
     * @var string
125
     */
126
    public static $defaultDelimiter = ';';
127
128
    /**
129
     * Statements delimiter.
130
     * This may change during lexing.
131
     *
132
     * @var string
133
     */
134
    public $delimiter;
135
136
    /**
137
     * The length of the delimiter.
138
     *
139
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
140
     * calls to `strlen`, which might affect performance when the delimiter is
141
     * big.
142
     *
143
     * @var int
144
     */
145
    public $delimiterLen;
146
147
    /**
148
     * @param string|UtfString $str       the query to be lexed
149
     * @param bool             $strict    whether strict mode should be
150
     *                                    enabled or not
151
     * @param string           $delimiter the delimiter to be used
152
     */
153 1426
    public function __construct($str, $strict = false, $delimiter = null)
154
    {
155 1426
        parent::__construct();
156
157
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
158
        // parse each byte of the input.
159 1426
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
160
161
        // For multi-byte strings, a new instance of `UtfString` is initialized.
162 1426
        if (! $str instanceof UtfString && $len !== mb_strlen($str, 'UTF-8')) {
163 10
            $str = new UtfString($str);
164
        }
165
166 1426
        $this->str = $str;
167 1426
        $this->len = $str instanceof UtfString ? $str->length() : $len;
168
169 1426
        $this->strict = $strict;
170
171
        // Setting the delimiter.
172 1426
        $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$defaultDelimiter);
173
174 1426
        $this->lex();
175
    }
176
177
    /**
178
     * Sets the delimiter.
179
     *
180
     * @param string $delimiter the new delimiter
181
     */
182 1426
    public function setDelimiter($delimiter): void
183
    {
184 1426
        $this->delimiter = $delimiter;
185 1426
        $this->delimiterLen = strlen($delimiter);
186
    }
187
188
    /**
189
     * Parses the string and extracts lexemes.
190
     */
191 1426
    public function lex(): void
192
    {
193
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
194
        // is* functions. For a better performance, some rules can be deduced
195
        // from context.
196
        // For example, in `parseBool` there is no need to compare the token
197
        // every time with `true` and `false`. The first step would be to
198
        // compare with 'true' only and just after that add another letter from
199
        // context and compare again with `false`.
200
        // Another example is `parseComment`.
201
202 1426
        $list = new TokensList();
203
204
        /**
205
         * Last processed token.
206
         *
207
         * @var Token
208
         */
209 1426
        $lastToken = null;
210
211 1426
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
212
            /**
213
             * The new token.
214
             *
215
             * @var Token
216
             */
217 1416
            $token = null;
218
219 1416
            foreach (self::PARSER_METHODS as $method) {
220 1416
                $token = $this->$method();
221
222 1416
                if ($token) {
223 1416
                    break;
224
                }
225
            }
226
227 1416
            if ($token === null) {
228
                // @assert($this->last === $lastIdx);
229 6
                $token = new Token($this->str[$this->last]);
230 6
                $this->error('Unexpected character.', $this->str[$this->last], $this->last);
231
            } elseif (
232 1416
                $lastToken !== null
233 1416
                && $token->type === Token::TYPE_SYMBOL
234 1416
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
235
                && (
236 1416
                    $lastToken->type === Token::TYPE_STRING
237 1416
                    || (
238 1416
                        $lastToken->type === Token::TYPE_SYMBOL
239 1416
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
240 1416
                    )
241
                )
242
            ) {
243
                // Handles ```... FROM 'user'@'%' ...```.
244 46
                $lastToken->token .= $token->token;
245 46
                $lastToken->type = Token::TYPE_SYMBOL;
246 46
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
247 46
                $lastToken->value .= '@' . $token->value;
248 46
                continue;
249
            } elseif (
250 1416
                $lastToken !== null
251 1416
                && $token->type === Token::TYPE_KEYWORD
252 1416
                && $lastToken->type === Token::TYPE_OPERATOR
253 1416
                && $lastToken->value === '.'
254
            ) {
255
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
256
                // a reserved word.
257 30
                $token->type = Token::TYPE_NONE;
258 30
                $token->flags = 0;
259 30
                $token->value = $token->token;
260
            }
261
262 1416
            $token->position = $lastIdx;
263
264 1416
            $list->tokens[$list->count++] = $token;
265
266
            // Handling delimiters.
267 1416
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
268 36
                if ($this->last + 1 >= $this->len) {
269 2
                    $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1);
270 2
                    continue;
271
                }
272
273
                // Skipping last R (from `delimiteR`) and whitespaces between
274
                // the keyword `DELIMITER` and the actual delimiter.
275 34
                $pos = ++$this->last;
276 34
                $token = $this->parseWhitespace();
277
278 34
                if ($token !== null) {
279 32
                    $token->position = $pos;
280 32
                    $list->tokens[$list->count++] = $token;
281
                }
282
283
                // Preparing the token that holds the new delimiter.
284 34
                if ($this->last + 1 >= $this->len) {
285 2
                    $this->error('Expected delimiter.', '', $this->last + 1);
286 2
                    continue;
287
                }
288
289 32
                $pos = $this->last + 1;
290
291
                // Parsing the delimiter.
292 32
                $this->delimiter = null;
293 32
                $delimiterLen = 0;
294
                while (
295 32
                    ++$this->last < $this->len
296 32
                    && ! Context::isWhitespace($this->str[$this->last])
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

296
                    && ! Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])
Loading history...
297 32
                    && $delimiterLen < 15
298
                ) {
299 30
                    $this->delimiter .= $this->str[$this->last];
300 30
                    ++$delimiterLen;
301
                }
302
303 32
                if (empty($this->delimiter)) {
304 2
                    $this->error('Expected delimiter.', '', $this->last);
305 2
                    $this->delimiter = ';';
306
                }
307
308 32
                --$this->last;
309
310
                // Saving the delimiter and its token.
311 32
                $this->delimiterLen = strlen($this->delimiter);
312 32
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
313 32
                $token->position = $pos;
314 32
                $list->tokens[$list->count++] = $token;
315
            }
316
317 1412
            $lastToken = $token;
318
        }
319
320
        // Adding a final delimiter to mark the ending.
321 1426
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
322
323
        // Saving the tokens list.
324 1426
        $this->list = $list;
325
326 1426
        $this->solveAmbiguityOnStarOperator();
327 1426
        $this->solveAmbiguityOnFunctionKeywords();
328
    }
329
330
    /**
331
     * Resolves the ambiguity when dealing with the "*" operator.
332
     *
333
     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
334
     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
335
     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
336
     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
337
     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
338
     * - "," (a comma separator like in "SELECT *, field FROM...");
339
     * - ")" (a closing parenthesis like in "COUNT(*)").
340
     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
341
     * default flag (arithmetic) will be kept.
342
     */
343 1426
    private function solveAmbiguityOnStarOperator(): void
344
    {
345 1426
        $iBak = $this->list->idx;
346 1426
        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
347
            // getNext() already gets rid of whitespaces and comments.
348 198
            $next = $this->list->getNext();
349
350 198
            if ($next === null) {
351
                continue;
352
            }
353
354
            if (
355 198
                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
356 198
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
357
            ) {
358 16
                continue;
359
            }
360
361 184
            $starToken->flags = Token::FLAG_OPERATOR_SQL;
362
        }
363
364 1426
        $this->list->idx = $iBak;
365
    }
366
367
    /**
368
     * Resolves the ambiguity when dealing with the functions keywords.
369
     *
370
     * In SQL statements, the function keywords might be used as table names or columns names.
371
     * To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
372
     * comments, right after the function keyword position. The function keyword is for sure used
373
     * as column name or table name if the next token found is any of:
374
     *
375
     * - "FROM" (the FROM keyword like in "SELECT Country x, AverageSalary avg FROM...");
376
     * - "WHERE" (the WHERE keyword like in "DELETE FROM emp x WHERE x.salary = 20");
377
     * - "SET" (the SET keyword like in "UPDATE Country x, City y set x.Name=x.Name");
378
     * - "," (a comma separator like 'x,' in "UPDATE Country x, City y set x.Name=x.Name");
379
     * - "." (a dot separator like in "x.asset_id FROM (SELECT evt.asset_id FROM evt)".
380
     * - "NULL" (when used as a table alias like in "avg.col FROM (SELECT ev.col FROM ev) avg").
381
     *
382
     * This method will change the flag of the function keyword tokens when any of those
383
     * condition above is true. Otherwise, the
384
     * default flag (function keyword) will be kept.
385
     */
386 1426
    private function solveAmbiguityOnFunctionKeywords(): void
387
    {
388 1426
        $iBak = $this->list->idx;
389 1426
        $keywordFunction = Token::TYPE_KEYWORD | Token::FLAG_KEYWORD_FUNCTION;
390 1426
        while (($keywordToken = $this->list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, $keywordFunction)) !== null) {
391 214
            $next = $this->list->getNext();
392
            if (
393 214
                ($next->type !== Token::TYPE_KEYWORD
394 214
                    || ! in_array($next->value, self::KEYWORD_NAME_INDICATORS, true)
395
                )
396 214
                && ($next->type !== Token::TYPE_OPERATOR
397 214
                    || ! in_array($next->value, self::OPERATOR_NAME_INDICATORS, true)
398
                )
399 214
                && ($next->value !== null)
400
            ) {
401 204
                continue;
402
            }
403
404 12
            $keywordToken->type = Token::TYPE_NONE;
405 12
            $keywordToken->flags = Token::TYPE_NONE;
406 12
            $keywordToken->keyword = $keywordToken->value;
407
        }
408
409 1426
        $this->list->idx = $iBak;
410
    }
411
412
    /**
413
     * Creates a new error log.
414
     *
415
     * @param string $msg  the error message
416
     * @param string $str  the character that produced the error
417
     * @param int    $pos  the position of the character
418
     * @param int    $code the code of the error
419
     *
420
     * @throws LexerException throws the exception, if strict mode is enabled.
421
     */
422 36
    public function error($msg, $str = '', $pos = 0, $code = 0): void
423
    {
424 36
        $error = new LexerException(
425 36
            Translator::gettext($msg),
426 36
            $str,
427 36
            $pos,
428 36
            $code
429 36
        );
430 36
        parent::error($error);
431
    }
432
433
    /**
434
     * Parses a keyword.
435
     */
436 1398
    public function parseKeyword(): Token|null
437
    {
438 1398
        $token = '';
439
440
        /**
441
         * Value to be returned.
442
         *
443
         * @var Token
444
         */
445 1398
        $ret = null;
446
447
        /**
448
         * The value of `$this->last` where `$token` ends in `$this->str`.
449
         */
450 1398
        $iEnd = $this->last;
451
452
        /**
453
         * Whether last parsed character is a whitespace.
454
         *
455
         * @var bool
456
         */
457 1398
        $lastSpace = false;
458
459 1398
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
460
            // Composed keywords shouldn't have more than one whitespace between
461
            // keywords.
462 1398
            if (Context::isWhitespace($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

462
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
463 1362
                if ($lastSpace) {
464 264
                    --$j; // The size of the keyword didn't increase.
465 264
                    continue;
466
                }
467
468 1362
                $lastSpace = true;
469
            } else {
470 1398
                $lastSpace = false;
471
            }
472
473 1398
            $token .= $this->str[$this->last];
474 1398
            $flags = Context::isKeyword($token);
475
476 1398
            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
Bug introduced by
It seems like $this->str[$this->last + 1] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

476
            if (($this->last + 1 !== $this->len && ! Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last + 1])) || ! $flags) {
Loading history...
477 1398
                continue;
478
            }
479
480 1362
            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
481 1362
            $iEnd = $this->last;
482
483
            // We don't break so we find longest keyword.
484
            // For example, `OR` and `ORDER` have a common prefix `OR`.
485
            // If we stopped at `OR`, the parsing would be invalid.
486
        }
487
488 1398
        $this->last = $iEnd;
489
490 1398
        return $ret;
491
    }
492
493
    /**
494
     * Parses a label.
495
     */
496 1054
    public function parseLabel(): Token|null
497
    {
498 1054
        $token = '';
499
500
        /**
501
         * Value to be returned.
502
         *
503
         * @var Token
504
         */
505 1054
        $ret = null;
506
507
        /**
508
         * The value of `$this->last` where `$token` ends in `$this->str`.
509
         */
510 1054
        $iEnd = $this->last;
511 1054
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
512 1054
            if ($this->str[$this->last] === ':' && $j > 1) {
513
                // End of label
514 4
                $token .= $this->str[$this->last];
515 4
                $ret = new Token($token, Token::TYPE_LABEL);
516 4
                $iEnd = $this->last;
517 4
                break;
518
            }
519
520 1054
            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

520
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last]) && $j > 1) {
Loading history...
521
                // Whitespace between label and :
522
                // The size of the keyword didn't increase.
523 818
                --$j;
524 1054
            } elseif (Context::isSeparator($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

524
            } elseif (Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
525
                // Any other separator
526 802
                break;
527
            }
528
529 1050
            $token .= $this->str[$this->last];
530
        }
531
532 1054
        $this->last = $iEnd;
533
534 1054
        return $ret;
535
    }
536
537
    /**
538
     * Parses an operator.
539
     */
540 1416
    public function parseOperator(): Token|null
541
    {
542 1416
        $token = '';
543
544
        /**
545
         * Value to be returned.
546
         *
547
         * @var Token
548
         */
549 1416
        $ret = null;
550
551
        /**
552
         * The value of `$this->last` where `$token` ends in `$this->str`.
553
         */
554 1416
        $iEnd = $this->last;
555
556 1416
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
557 1416
            $token .= $this->str[$this->last];
558 1416
            $flags = Context::isOperator($token);
559
560 1416
            if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
561 1412
                continue;
562
            }
563
564 1008
            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
565 1008
            $iEnd = $this->last;
566
        }
567
568 1416
        $this->last = $iEnd;
569
570 1416
        return $ret;
571
    }
572
573
    /**
574
     * Parses a whitespace.
575
     */
576 1416
    public function parseWhitespace(): Token|null
577
    {
578 1416
        $token = $this->str[$this->last];
579
580 1416
        if (! Context::isWhitespace($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

580
        if (! Context::isWhitespace(/** @scrutinizer ignore-type */ $token)) {
Loading history...
581 1416
            return null;
582
        }
583
584 1378
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
585 268
            $token .= $this->str[$this->last];
586
        }
587
588 1378
        --$this->last;
589
590 1378
        return new Token($token, Token::TYPE_WHITESPACE);
591
    }
592
593
    /**
594
     * Parses a comment.
595
     */
596 1416
    public function parseComment(): Token|null
597
    {
598 1416
        $iBak = $this->last;
599 1416
        $token = $this->str[$this->last];
600
601
        // Bash style comments. (#comment\n)
602 1416
        if (Context::isComment($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isComment() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

602
        if (Context::isComment(/** @scrutinizer ignore-type */ $token)) {
Loading history...
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
603 6
            while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
604 6
                $token .= $this->str[$this->last];
605
            }
606
607
            // Include trailing \n as whitespace token
608 6
            if ($this->last < $this->len) {
609 6
                --$this->last;
610
            }
611
612 6
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
613
        }
614
615
        // C style comments. (/*comment*\/)
616 1416
        if (++$this->last < $this->len) {
617 1412
            $token .= $this->str[$this->last];
618 1412
            if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
619
                // There might be a conflict with "*" operator here, when string is "*/*".
620
                // This can occurs in the following statements:
621
                // - "SELECT */* comment */ FROM ..."
622
                // - "SELECT 2*/* comment */3 AS `six`;"
623 100
                $next = $this->last + 1;
624 100
                if (($next < $this->len) && $this->str[$next] === '*') {
625
                    // Conflict in "*/*": first "*" was not for ending a comment.
626
                    // Stop here and let other parsing method define the true behavior of that first star.
627 2
                    $this->last = $iBak;
628
629 2
                    return null;
630
                }
631
632 100
                $flags = Token::FLAG_COMMENT_C;
633
634
                // This comment already ended. It may be a part of a
635
                // previous MySQL specific command.
636 100
                if ($token === '*/') {
637 36
                    return new Token($token, Token::TYPE_COMMENT, $flags);
638
                }
639
640
                // Checking if this is a MySQL-specific command.
641 98
                if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') {
642 34
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
643 34
                    $token .= $this->str[++$this->last];
644
645
                    while (
646 34
                        ++$this->last < $this->len
647 34
                        && $this->str[$this->last] >= '0'
648 34
                        && $this->str[$this->last] <= '9'
649
                    ) {
650 32
                        $token .= $this->str[$this->last];
651
                    }
652
653 34
                    --$this->last;
654
655
                    // We split this comment and parse only its beginning
656
                    // here.
657 34
                    return new Token($token, Token::TYPE_COMMENT, $flags);
658
                }
659
660
                // Parsing the comment.
661
                while (
662 68
                    ++$this->last < $this->len
663 68
                    && (
664 68
                        $this->str[$this->last - 1] !== '*'
665 68
                        || $this->str[$this->last] !== '/'
666 68
                    )
667
                ) {
668 68
                    $token .= $this->str[$this->last];
669
                }
670
671
                // Adding the ending.
672 68
                if ($this->last < $this->len) {
673 68
                    $token .= $this->str[$this->last];
674
                }
675
676 68
                return new Token($token, Token::TYPE_COMMENT, $flags);
677
            }
678
        }
679
680
        // SQL style comments. (-- comment\n)
681 1416
        if (++$this->last < $this->len) {
682 1410
            $token .= $this->str[$this->last];
683 1410
            $end = false;
684
        } else {
685 412
            --$this->last;
686 412
            $end = true;
687
        }
688
689 1416
        if (Context::isComment($token, $end)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Con...isComment($token, $end) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
690
            // Checking if this comment did not end already (```--\n```).
691 70
            if ($this->str[$this->last] !== "\n") {
692 70
                while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
693 70
                    $token .= $this->str[$this->last];
694
                }
695
            }
696
697
            // Include trailing \n as whitespace token
698 70
            if ($this->last < $this->len) {
699 62
                --$this->last;
700
            }
701
702 70
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
703
        }
704
705 1416
        $this->last = $iBak;
706
707 1416
        return null;
708
    }
709
710
    /**
711
     * Parses a boolean.
712
     */
713 1400
    public function parseBool(): Token|null
714
    {
715 1400
        if ($this->last + 3 >= $this->len) {
716
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
717
            // required.
718 310
            return null;
719
        }
720
721 1400
        $iBak = $this->last;
722 1400
        $token = $this->str[$this->last] . $this->str[++$this->last]
723 1400
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
724
725 1400
        if (Context::isBool($token)) {
726 4
            return new Token($token, Token::TYPE_BOOL);
727
        }
728
729 1400
        if (++$this->last < $this->len) {
730 1396
            $token .= $this->str[$this->last]; // fals_E_
731 1396
            if (Context::isBool($token)) {
732 6
                return new Token($token, Token::TYPE_BOOL, 1);
733
            }
734
        }
735
736 1400
        $this->last = $iBak;
737
738 1400
        return null;
739
    }
740
741
    /**
742
     * Parses a number.
743
     */
744 1416
    public function parseNumber(): Token|null
745
    {
746
        // A rudimentary state machine is being used to parse numbers due to
747
        // the various forms of their notation.
748
        //
749
        // Below are the states of the machines and the conditions to change
750
        // the state.
751
        //
752
        //      1 --------------------[ + or - ]-------------------> 1
753
        //      1 -------------------[ 0x or 0X ]------------------> 2
754
        //      1 --------------------[ 0 to 9 ]-------------------> 3
755
        //      1 -----------------------[ . ]---------------------> 4
756
        //      1 -----------------------[ b ]---------------------> 7
757
        //
758
        //      2 --------------------[ 0 to F ]-------------------> 2
759
        //
760
        //      3 --------------------[ 0 to 9 ]-------------------> 3
761
        //      3 -----------------------[ . ]---------------------> 4
762
        //      3 --------------------[ e or E ]-------------------> 5
763
        //
764
        //      4 --------------------[ 0 to 9 ]-------------------> 4
765
        //      4 --------------------[ e or E ]-------------------> 5
766
        //
767
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
768
        //
769
        //      7 -----------------------[ ' ]---------------------> 8
770
        //
771
        //      8 --------------------[ 0 or 1 ]-------------------> 8
772
        //      8 -----------------------[ ' ]---------------------> 9
773
        //
774
        // State 1 may be reached by negative numbers.
775
        // State 2 is reached only by hex numbers.
776
        // State 4 is reached only by float numbers.
777
        // State 5 is reached only by numbers in approximate form.
778
        // State 7 is reached only by numbers in bit representation.
779
        //
780
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
781
        // state other than these is invalid.
782
        // Also, negative states are invalid states.
783 1416
        $iBak = $this->last;
784 1416
        $token = '';
785 1416
        $flags = 0;
786 1416
        $state = 1;
787 1416
        for (; $this->last < $this->len; ++$this->last) {
788 1416
            if ($state === 1) {
789 1416
                if ($this->str[$this->last] === '-') {
790 70
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
791
                } elseif (
792 1416
                    $this->last + 1 < $this->len
793 1416
                    && $this->str[$this->last] === '0'
794 1416
                    && $this->str[$this->last + 1] === 'x'
795
                ) {
796 4
                    $token .= $this->str[$this->last++];
797 4
                    $state = 2;
798 1416
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
799 628
                    $state = 3;
800 1414
                } elseif ($this->str[$this->last] === '.') {
801 220
                    $state = 4;
802 1414
                } elseif ($this->str[$this->last] === 'b') {
803 108
                    $state = 7;
804 1414
                } elseif ($this->str[$this->last] !== '+') {
805
                    // `+` is a valid character in a number.
806 1415
                    break;
807
                }
808 728
            } elseif ($state === 2) {
809 4
                $flags |= Token::FLAG_NUMBER_HEX;
810
                if (
811
                    ! (
812 4
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
813 4
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
814 4
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
815
                    )
816
                ) {
817 4
                    break;
818
                }
819 728
            } elseif ($state === 3) {
820 568
                if ($this->str[$this->last] === '.') {
821 12
                    $state = 4;
822 566
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
823 2
                    $state = 5;
824
                } elseif (
825 566
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
826 566
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
827
                ) {
828
                    // A number can't be directly followed by a letter
829 10
                    $state = -$state;
830 562
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
831
                    // Just digits and `.`, `e` and `E` are valid characters.
832 560
                    break;
833
                }
834 316
            } elseif ($state === 4) {
835 230
                $flags |= Token::FLAG_NUMBER_FLOAT;
836 230
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
837 14
                    $state = 5;
838
                } elseif (
839 230
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
840 230
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
841
                ) {
842
                    // A number can't be directly followed by a letter
843 170
                    $state = -$state;
844 92
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
845
                    // Just digits, `e` and `E` are valid characters.
846 160
                    break;
847
                }
848 266
            } elseif ($state === 5) {
849 14
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
850
                if (
851 14
                    $this->str[$this->last] === '+' || $this->str[$this->last] === '-'
852 14
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
853
                ) {
854 2
                    $state = 6;
855
                } elseif (
856 14
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
857 14
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
858
                ) {
859
                    // A number can't be directly followed by a letter
860 14
                    $state = -$state;
861
                } else {
862 7
                    break;
863
                }
864 266
            } elseif ($state === 6) {
865 2
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
866
                    // Just digits are valid characters.
867 2
                    break;
868
                }
869 266
            } elseif ($state === 7) {
870 106
                $flags |= Token::FLAG_NUMBER_BINARY;
871 106
                if ($this->str[$this->last] !== '\'') {
872 104
                    break;
873
                }
874
875 2
                $state = 8;
876 180
            } elseif ($state === 8) {
877 2
                if ($this->str[$this->last] === '\'') {
878 2
                    $state = 9;
879 2
                } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
880 2
                    break;
881
                }
882 180
            } elseif ($state === 9) {
883 2
                break;
884
            }
885
886 812
            $token .= $this->str[$this->last];
887
        }
888
889 1416
        if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) {
890 628
            --$this->last;
891
892 628
            return new Token($token, Token::TYPE_NUMBER, $flags);
893
        }
894
895 1416
        $this->last = $iBak;
896
897 1416
        return null;
898
    }
899
900
    /**
901
     * Parses a string.
902
     *
903
     * @param string $quote additional starting symbol
904
     *
905
     * @throws LexerException
906
     */
907 1400
    public function parseString($quote = ''): Token|null
908
    {
909 1400
        $token = $this->str[$this->last];
910 1400
        $flags = Context::isString($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isString() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

910
        $flags = Context::isString(/** @scrutinizer ignore-type */ $token);
Loading history...
911
912 1400
        if (! $flags && $token !== $quote) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
913 1400
            return null;
914
        }
915
916 686
        $quote = $token;
917
918 686
        while (++$this->last < $this->len) {
919
            if (
920 686
                $this->last + 1 < $this->len
921
                && (
922 686
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
923 686
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
924
                )
925
            ) {
926 30
                $token .= $this->str[$this->last] . $this->str[++$this->last];
927
            } else {
928 686
                if ($this->str[$this->last] === $quote) {
929 682
                    break;
930
                }
931
932 680
                $token .= $this->str[$this->last];
933
            }
934
        }
935
936 686
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
937 14
            $this->error(
938 14
                sprintf(
939 14
                    Translator::gettext('Ending quote %1$s was expected.'),
940 14
                    $quote
941 14
                ),
942 14
                '',
943 14
                $this->last
944 14
            );
945
        } else {
946 682
            $token .= $this->str[$this->last];
947
        }
948
949 686
        return new Token($token, Token::TYPE_STRING, $flags);
950
    }
951
952
    /**
953
     * Parses a symbol.
954
     *
955
     * @throws LexerException
956
     */
957 1400
    public function parseSymbol(): Token|null
958
    {
959 1400
        $token = $this->str[$this->last];
960 1400
        $flags = Context::isSymbol($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSymbol() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

960
        $flags = Context::isSymbol(/** @scrutinizer ignore-type */ $token);
Loading history...
961
962 1400
        if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
963 1398
            return null;
964
        }
965
966 452
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
967 122
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
968
                // This is a system variable (e.g. `@@hostname`).
969 26
                $token .= $this->str[$this->last++];
970 74
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
971
            }
972 362
        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
973 6
            if ($token !== '?' && $this->last + 1 < $this->len) {
974 5
                ++$this->last;
975
            }
976
        } else {
977 358
            $token = '';
978
        }
979
980 452
        $str = null;
981
982 452
        if ($this->last < $this->len) {
983 452
            $str = $this->parseString('`');
984
985 452
            if ($str === null) {
986 88
                $str = $this->parseUnknown();
987
988 88
                if ($str === null) {
989 6
                    $this->error('Variable name was expected.', $this->str[$this->last], $this->last);
990
                }
991
            }
992
        }
993
994 452
        if ($str !== null) {
995 448
            $token .= $str->token;
996
        }
997
998 452
        return new Token($token, Token::TYPE_SYMBOL, $flags);
999
    }
1000
1001
    /**
1002
     * Parses unknown parts of the query.
1003
     */
1004 1078
    public function parseUnknown(): Token|null
1005
    {
1006 1078
        $token = $this->str[$this->last];
1007 1078
        if (Context::isSeparator($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1007
        if (Context::isSeparator(/** @scrutinizer ignore-type */ $token)) {
Loading history...
1008 12
            return null;
1009
        }
1010
1011 1074
        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1012 1042
            $token .= $this->str[$this->last];
1013
1014
            // Test if end of token equals the current delimiter. If so, remove it from the token.
1015 1042
            if (str_ends_with($token, $this->delimiter)) {
1016 4
                $token = substr($token, 0, -$this->delimiterLen);
1017 4
                $this->last -= $this->delimiterLen - 1;
1018 4
                break;
1019
            }
1020
        }
1021
1022 1074
        --$this->last;
1023
1024 1074
        return new Token($token);
1025
    }
1026
1027
    /**
1028
     * Parses the delimiter of the query.
1029
     */
1030 1416
    public function parseDelimiter(): Token|null
1031
    {
1032 1416
        $idx = 0;
1033
1034 1416
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1035 1416
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1036 1416
                return null;
1037
            }
1038
1039 566
            ++$idx;
1040
        }
1041
1042 566
        $this->last += $this->delimiterLen - 1;
1043
1044 566
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1045
    }
1046
}
1047