Passed
Push — master ( fd9e59...dec0a2 )
by William
12:38 queued 11s
created

Lexer::error()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
cc 1
eloc 6
nc 1
nop 4
dl 0
loc 9
ccs 2
cts 2
cp 1
crap 1
rs 10
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
8
9
use function define;
10
use function defined;
11
use function in_array;
12
use function mb_strlen;
13
use function sprintf;
14
use function str_ends_with;
15
use function strlen;
16
use function substr;
17
18 7
if (! defined('USE_UTF_STRINGS')) {
19
    // NOTE: In previous versions of PHP (5.5 and older) the default
20
    // internal encoding is "ISO-8859-1".
21
    // All `mb_` functions must specify the correct encoding, which is
22
    // 'UTF-8' in order to work properly.
23
24
    /*
25
     * Forces usage of `UtfString` if the string is multibyte.
26
     * `UtfString` may be slower, but it gives better results.
27
     *
28
     * @var bool
29
     */
30 7
    define('USE_UTF_STRINGS', true);
31
}
32
33
/**
34
 * Defines the lexer of the library.
35
 *
36
 * This is one of the most important components, along with the parser.
37
 *
38
 * Depends on context to extract lexemes.
39
 *
40
 * Performs lexical analysis over a SQL statement and splits it in multiple tokens.
41
 *
42
 * The output of the lexer is affected by the context of the SQL statement.
43
 *
44
 * @see Context
45
 */
46
class Lexer extends Core
47
{
48
    /**
49
     * A list of methods that are used in lexing the SQL query.
50
     *
51
     * @var string[]
52
     */
53
    public static $PARSER_METHODS = [
54
        // It is best to put the parsers in order of their complexity
55
        // (ascending) and their occurrence rate (descending).
56
        //
57
        // Conflicts:
58
        //
59
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
60
        // They fight over delimiter. The delimiter may be a keyword, a
61
        // number or almost any character which makes the delimiter one of
62
        // the first tokens that must be parsed.
63
        //
64
        // 1. `parseNumber` and `parseOperator`
65
        // They fight over `+` and `-`.
66
        //
67
        // 2. `parseComment` and `parseOperator`
68
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
69
        //
70
        // 3. `parseBool` and `parseKeyword`
71
        // They fight over `TRUE` and `FALSE`.
72
        //
73
        // 4. `parseKeyword` and `parseUnknown`
74
        // They fight over words. `parseUnknown` does not know about
75
        // keywords.
76
77
        'parseDelimiter',
78
        'parseWhitespace',
79
        'parseNumber',
80
        'parseComment',
81
        'parseOperator',
82
        'parseBool',
83
        'parseString',
84
        'parseSymbol',
85
        'parseKeyword',
86
        'parseLabel',
87
        'parseUnknown',
88
    ];
89
90
91
    /**
92
     * A list of keywords that indicate that the function keyword
93
     * is not used as a function
94
     *
95
     * @var string[]
96
     */
97
    public $KEYWORD_NAME_INDICATORS = [
98
        'FROM',
99
        'SET',
100
        'WHERE',
101
    ];
102
103
    /**
104
     * A list of operators that indicate that the function keyword
105
     * is not used as a function
106
     *
107
     * @var string[]
108
     */
109
    public $OPERATOR_NAME_INDICATORS = [
110
        ',',
111
        '.',
112
    ];
113
114
    /**
115
     * The string to be parsed.
116
     *
117
     * @var string|UtfString
118
     */
119
    public $str = '';
120
121
    /**
122
     * The length of `$str`.
123
     *
124
     * By storing its length, a lot of time is saved, because parsing methods
125
     * would call `strlen` everytime.
126
     *
127
     * @var int
128
     */
129
    public $len = 0;
130
131
    /**
132
     * The index of the last parsed character.
133
     *
134
     * @var int
135
     */
136
    public $last = 0;
137
138
    /**
139
     * Tokens extracted from given strings.
140
     *
141
     * @var TokensList
142
     */
143
    public $list;
144
145
    /**
146
     * The default delimiter. This is used, by default, in all new instances.
147
     *
148
     * @var string
149
     */
150
    public static $DEFAULT_DELIMITER = ';';
151
152
    /**
153
     * Statements delimiter.
154
     * This may change during lexing.
155
     *
156
     * @var string
157 4
     */
158
    public $delimiter;
159 4
160
    /**
161 4
     * The length of the delimiter.
162
     *
163
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
164
     * calls to `strlen`, which might affect performance when the delimiter is
165
     * big.
166
     *
167
     * @var int
168
     */
169
    public $delimiterLen;
170 2340
171
    /**
172
     * Gets the tokens list parsed by a new instance of a lexer.
173
     *
174 2340
     * @param string|UtfString $str       the query to be lexed
175
     * @param bool             $strict    whether strict mode should be
176
     *                                    enabled or not
177
     * @param string           $delimiter the delimiter to be used
178 2340
     *
179 4
     * @return TokensList
180
     */
181
    public static function getTokens($str, $strict = false, $delimiter = null)
182 2340
    {
183 2340
        $lexer = new self($str, $strict, $delimiter);
184
185 2340
        return $lexer->list;
186
    }
187
188 2340
    /**
189
     * @param string|UtfString $str       the query to be lexed
190 2340
     * @param bool             $strict    whether strict mode should be
191 585
     *                                    enabled or not
192
     * @param string           $delimiter the delimiter to be used
193
     */
194
    public function __construct($str, $strict = false, $delimiter = null)
195
    {
196
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
197
        // parse each byte of the input.
198
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
199
200 2340
        // For multi-byte strings, a new instance of `UtfString` is
201
        // initialized (only if `UtfString` usage is forced.
202 2340
        if (! $str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) {
203 2340
            $str = new UtfString($str);
204 585
        }
205
206
        $this->str = $str;
207
        $this->len = $str instanceof UtfString ? $str->length() : $len;
208
209
        $this->strict = $strict;
210
211 2340
        // Setting the delimiter.
212
        $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER);
213
214
        $this->lex();
215
    }
216
217
    /**
218
     * Sets the delimiter.
219
     *
220
     * @param string $delimiter the new delimiter
221
     *
222 2340
     * @return void
223
     */
224
    public function setDelimiter($delimiter)
225
    {
226
        $this->delimiter = $delimiter;
227
        $this->delimiterLen = strlen($delimiter);
228
    }
229 2340
230
    /**
231 2340
     * Parses the string and extracts lexemes.
232
     *
233
     * @return void
234
     */
235
    public function lex()
236
    {
237 2316
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
238
        // is* functions. For a better performance, some rules can be deduced
239 2316
        // from context.
240 2316
        // For example, in `parseBool` there is no need to compare the token
241
        // every time with `true` and `false`. The first step would be to
242 2316
        // compare with 'true' only and just after that add another letter from
243 2316
        // context and compare again with `false`.
244
        // Another example is `parseComment`.
245
246
        $list = new TokensList();
247 2316
248
        /**
249 8
         * Last processed token.
250 8
         *
251
         * @var Token
252 2316
         */
253 2316
        $lastToken = null;
254 2316
255
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
256 1774
            /**
257 1737
             * The new token.
258 1768
             *
259 2316
             * @var Token
260 1737
             */
261
            $token = null;
262
263
            foreach (static::$PARSER_METHODS as $method) {
264 52
                $token = $this->$method();
265 52
266 52
                if ($token) {
267 52
                    break;
268 52
                }
269
            }
270 2316
271 2316
            if ($token === null) {
272 2316
                // @assert($this->last === $lastIdx);
273 2316
                $token = new Token($this->str[$this->last]);
274
                $this->error('Unexpected character.', $this->str[$this->last], $this->last);
275
            } elseif (
276
                $lastToken !== null
277 36
                && $token->type === Token::TYPE_SYMBOL
278 36
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
279 36
                && (
280
                    $lastToken->type === Token::TYPE_STRING
281
                    || (
282 2316
                        $lastToken->type === Token::TYPE_SYMBOL
283
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
284 2316
                    )
285
                )
286
            ) {
287 2316
                // Handles ```... FROM 'user'@'%' ...```.
288 32
                $lastToken->token .= $token->token;
289 4
                $lastToken->type = Token::TYPE_SYMBOL;
290 4
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
291
                $lastToken->value .= '@' . $token->value;
292
                continue;
293
            } elseif (
294
                $lastToken !== null
295 28
                && $token->type === Token::TYPE_KEYWORD
296 28
                && $lastToken->type === Token::TYPE_OPERATOR
297
                && $lastToken->value === '.'
298 28
            ) {
299 24
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
300 24
                // a reserved word.
301
                $token->type = Token::TYPE_NONE;
302
                $token->flags = 0;
303
                $token->value = $token->token;
304 28
            }
305 4
306 4
            $token->position = $lastIdx;
307
308
            $list->tokens[$list->count++] = $token;
309 24
310
            // Handling delimiters.
311
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
312 24
                if ($this->last + 1 >= $this->len) {
313 24
                    $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1);
314
                    continue;
315 24
                }
316 24
317 24
                // Skipping last R (from `delimiteR`) and whitespaces between
318
                // the keyword `DELIMITER` and the actual delimiter.
319 20
                $pos = ++$this->last;
320 20
                $token = $this->parseWhitespace();
321
322
                if ($token !== null) {
323 24
                    $token->position = $pos;
324 4
                    $list->tokens[$list->count++] = $token;
325 4
                }
326
327
                // Preparing the token that holds the new delimiter.
328 24
                if ($this->last + 1 >= $this->len) {
329
                    $this->error('Expected delimiter.', '', $this->last + 1);
330
                    continue;
331 24
                }
332 24
333 24
                $pos = $this->last + 1;
334 24
335
                // Parsing the delimiter.
336
                $this->delimiter = null;
337 2308
                $delimiterLen = 0;
338
                while (
339
                    ++$this->last < $this->len
340
                    && ! Context::isWhitespace($this->str[$this->last])
341 2340
                    && $delimiterLen < 15
342
                ) {
343
                    $this->delimiter .= $this->str[$this->last];
344 2340
                    ++$delimiterLen;
345
                }
346 2340
347 585
                if (empty($this->delimiter)) {
348
                    $this->error('Expected delimiter.', '', $this->last);
349
                    $this->delimiter = ';';
350
                }
351
352
                --$this->last;
353
354
                // Saving the delimiter and its token.
355
                $this->delimiterLen = strlen($this->delimiter);
356
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
357
                $token->position = $pos;
358
                $list->tokens[$list->count++] = $token;
359
            }
360
361
            $lastToken = $token;
362
        }
363
364 2340
        // Adding a final delimiter to mark the ending.
365
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
366 2340
367 2340
        // Saving the tokens list.
368
        $this->list = $list;
369 372
370
        $this->solveAmbiguityOnStarOperator();
371 372
        $this->solveAmbiguityOnFunctionKeywords();
372
    }
373
374
    /**
375
     * Resolves the ambiguity when dealing with the "*" operator.
376 372
     *
377 372
     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
378
     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
379 32
     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
380
     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
381
     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
382 344
     * - "," (a comma separator like in "SELECT *, field FROM...");
383
     * - ")" (a closing parenthesis like in "COUNT(*)").
384
     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
385 2340
     * default flag (arithmetic) will be kept.
386 585
     */
387
    private function solveAmbiguityOnStarOperator(): void
388
    {
389
        $iBak = $this->list->idx;
390
        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
391
            // getNext() already gets rid of whitespaces and comments.
392
            $next = $this->list->getNext();
393
394
            if ($next === null) {
395
                continue;
396
            }
397
398
            if (
399
                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
400 68
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
401
            ) {
402 68
                continue;
403 68
            }
404 68
405 68
            $starToken->flags = Token::FLAG_OPERATOR_SQL;
406 68
        }
407 51
408 68
        $this->list->idx = $iBak;
409 16
    }
410
411
    /**
412
     * Resolves the ambiguity when dealing with the functions keywords.
413
     *
414
     * In SQL statements, the function keywords might be used as table names or columns names.
415
     * To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
416 2280
     * comments, right after the function keyword position. The function keyword is for sure used
417
     * as column name or table name if the next token found is any of:
418 2280
     *
419
     * - "FROM" (the FROM keyword like in "SELECT Country x, AverageSalary avg FROM...");
420
     * - "WHERE" (the WHERE keyword like in "DELETE FROM emp x WHERE x.salary = 20");
421
     * - "SET" (the SET keyword like in "UPDATE Country x, City y set x.Name=x.Name");
422
     * - "," (a comma separator like 'x,' in "UPDATE Country x, City y set x.Name=x.Name");
423
     * - "." (a dot separator like in "x.asset_id FROM (SELECT evt.asset_id FROM evt)".
424
     * - "NULL" (when used as a table alias like in "avg.col FROM (SELECT ev.col FROM ev) avg").
425 2280
     *
426
     * This method will change the flag of the function keyword tokens when any of those
427
     * condition above is true. Otherwise, the
428
     * default flag (function keyword) will be kept.
429
     */
430 2280
    private function solveAmbiguityOnFunctionKeywords(): void
431
    {
432
        $iBak = $this->list->idx;
433
        $keywordFunction = Token::TYPE_KEYWORD | Token::FLAG_KEYWORD_FUNCTION;
434
        while (($keywordToken = $this->list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, $keywordFunction)) !== null) {
435
            $next = $this->list->getNext();
436
            if (
437 2280
                ($next->type !== Token::TYPE_KEYWORD
438
                    || ! in_array($next->value, $this->KEYWORD_NAME_INDICATORS, true)
439 2280
                )
440
                && ($next->type !== Token::TYPE_OPERATOR
441
                    || ! in_array($next->value, $this->OPERATOR_NAME_INDICATORS, true)
442 2280
                )
443 2232
                && ($next->value !== null)
444 384
            ) {
445 384
                continue;
446
            }
447
448 2232
            $keywordToken->type = Token::TYPE_NONE;
449
            $keywordToken->flags = Token::TYPE_NONE;
450 2280
            $keywordToken->keyword = $keywordToken->value;
451
        }
452
453 2280
        $this->list->idx = $iBak;
454 2280
    }
455
456 2280
    /**
457 2280
     * Creates a new error log.
458
     *
459
     * @param string $msg  the error message
460 2212
     * @param string $str  the character that produced the error
461 2212
     * @param int    $pos  the position of the character
462
     * @param int    $code the code of the error
463
     *
464
     * @return void
465
     *
466
     * @throws LexerException throws the exception, if strict mode is enabled.
467
     */
468 2280
    public function error($msg, $str = '', $pos = 0, $code = 0)
469
    {
470 2280
        $error = new LexerException(
471
            Translator::gettext($msg),
472
            $str,
473
            $pos,
474
            $code
475
        );
476
        parent::error($error);
477
    }
478 1684
479
    /**
480 1684
     * Parses a keyword.
481
     *
482
     * @return Token|null
483
     */
484
    public function parseKeyword()
485
    {
486
        $token = '';
487 1684
488
        /**
489
         * Value to be returned.
490
         *
491
         * @var Token
492 1684
         */
493 1684
        $ret = null;
494 1684
495
        /**
496 8
         * The value of `$this->last` where `$token` ends in `$this->str`.
497 8
         */
498 8
        $iEnd = $this->last;
499 8
500
        /**
501
         * Whether last parsed character is a whitespace.
502 1684
         *
503
         * @var bool
504
         */
505 1316
        $lastSpace = false;
506 1684
507
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
508 1288
            // Composed keywords shouldn't have more than one whitespace between
509
            // keywords.
510
            if (Context::isWhitespace($this->str[$this->last])) {
511 1680
                if ($lastSpace) {
512
                    --$j; // The size of the keyword didn't increase.
513
                    continue;
514 1684
                }
515
516 1684
                $lastSpace = true;
517
            } else {
518
                $lastSpace = false;
519
            }
520
521
            $token .= $this->str[$this->last];
522
            $flags = Context::isKeyword($token);
523
524 2316
            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
525
                continue;
526 2316
            }
527
528
            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
529
            $iEnd = $this->last;
530
531
            // We don't break so we find longest keyword.
532
            // For example, `OR` and `ORDER` have a common prefix `OR`.
533 2316
            // If we stopped at `OR`, the parsing would be invalid.
534
        }
535
536
        $this->last = $iEnd;
537
538 2316
        return $ret;
539
    }
540 2316
541 2316
    /**
542 2316
     * Parses a label.
543
     *
544 2316
     * @return Token|null
545 2308
     */
546
    public function parseLabel()
547
    {
548 1636
        $token = '';
549 1636
550
        /**
551
         * Value to be returned.
552 2316
         *
553
         * @var Token
554 2316
         */
555
        $ret = null;
556
557
        /**
558
         * The value of `$this->last` where `$token` ends in `$this->str`.
559
         */
560
        $iEnd = $this->last;
561
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
562 2316
            if ($this->str[$this->last] === ':' && $j > 1) {
563
                // End of label
564 2316
                $token .= $this->str[$this->last];
565
                $ret = new Token($token, Token::TYPE_LABEL);
566 2316
                $iEnd = $this->last;
567 2316
                break;
568
            }
569
570 2264
            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
571 384
                // Whitespace between label and :
572
                // The size of the keyword didn't increase.
573
                --$j;
574 2264
            } elseif (Context::isSeparator($this->str[$this->last])) {
575
                // Any other separator
576 2264
                break;
577
            }
578
579
            $token .= $this->str[$this->last];
580
        }
581
582
        $this->last = $iEnd;
583
584 2316
        return $ret;
585
    }
586 2316
587 2316
    /**
588
     * Parses an operator.
589
     *
590 2316
     * @return Token|null
591 12
     */
592 12
    public function parseOperator()
593
    {
594
        $token = '';
595
596 12
        /**
597 12
         * Value to be returned.
598
         *
599
         * @var Token
600 12
         */
601
        $ret = null;
602
603
        /**
604 2316
         * The value of `$this->last` where `$token` ends in `$this->str`.
605 2308
         */
606 2308
        $iEnd = $this->last;
607
608
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
609
            $token .= $this->str[$this->last];
610
            $flags = Context::isOperator($token);
611 132
612 132
            if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
613
                continue;
614
            }
615 4
616
            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
617 4
            $iEnd = $this->last;
618
        }
619
620 132
        $this->last = $iEnd;
621
622
        return $ret;
623
    }
624 132
625 12
    /**
626
     * Parses a whitespace.
627
     *
628
     * @return Token|null
629 132
     */
630 12
    public function parseWhitespace()
631 12
    {
632
        $token = $this->str[$this->last];
633
634 12
        if (! Context::isWhitespace($token)) {
635 12
            return null;
636 12
        }
637
638 8
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
639
            $token .= $this->str[$this->last];
640
        }
641 12
642
        --$this->last;
643
644
        return new Token($token, Token::TYPE_WHITESPACE);
645 12
    }
646
647
    /**
648
     * Parses a comment.
649
     *
650 128
     * @return Token|null
651 96
     */
652 128
    public function parseComment()
653 128
    {
654 96
        $iBak = $this->last;
655
        $token = $this->str[$this->last];
656 128
657
        // Bash style comments. (#comment\n)
658
        if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
659
            while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
660 128
                $token .= $this->str[$this->last];
661 128
            }
662
663
            // Include trailing \n as whitespace token
664 128
            if ($this->last < $this->len) {
665
                --$this->last;
666
            }
667
668
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
669 2316
        }
670 2304
671 2304
        // C style comments. (/*comment*\/)
672
        if (++$this->last < $this->len) {
673 720
            $token .= $this->str[$this->last];
674 720
            if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
675
                // There might be a conflict with "*" operator here, when string is "*/*".
676
                // This can occurs in the following statements:
677 2316
                // - "SELECT */* comment */ FROM ..."
678
                // - "SELECT 2*/* comment */3 AS `six`;"
679 96
                $next = $this->last + 1;
680 96
                if (($next < $this->len) && $this->str[$next] === '*') {
681 96
                    // Conflict in "*/*": first "*" was not for ending a comment.
682
                    // Stop here and let other parsing method define the true behavior of that first star.
683
                    $this->last = $iBak;
684
685
                    return null;
686 96
                }
687 80
688
                $flags = Token::FLAG_COMMENT_C;
689
690 96
                // This comment already ended. It may be a part of a
691
                // previous MySQL specific command.
692
                if ($token === '*/') {
693 2316
                    return new Token($token, Token::TYPE_COMMENT, $flags);
694
                }
695 2316
696
                // Checking if this is a MySQL-specific command.
697
                if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') {
698
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
699
                    $token .= $this->str[++$this->last];
700
701
                    while (
702
                        ++$this->last < $this->len
703 2284
                        && $this->str[$this->last] >= '0'
704
                        && $this->str[$this->last] <= '9'
705 2284
                    ) {
706
                        $token .= $this->str[$this->last];
707
                    }
708 560
709
                    --$this->last;
710
711 2284
                    // We split this comment and parse only its beginning
712 2284
                    // here.
713 2284
                    return new Token($token, Token::TYPE_COMMENT, $flags);
714
                }
715 2284
716 8
                // Parsing the comment.
717
                while (
718
                    ++$this->last < $this->len
719 2284
                    && (
720 2280
                        $this->str[$this->last - 1] !== '*'
721 2280
                        || $this->str[$this->last] !== '/'
722 12
                    )
723
                ) {
724
                    $token .= $this->str[$this->last];
725
                }
726 2284
727
                // Adding the ending.
728 2284
                if ($this->last < $this->len) {
729
                    $token .= $this->str[$this->last];
730
                }
731
732
                return new Token($token, Token::TYPE_COMMENT, $flags);
733
            }
734
        }
735
736 2316
        // SQL style comments. (-- comment\n)
737
        if (++$this->last < $this->len) {
738
            $token .= $this->str[$this->last];
739
            $end = false;
740
        } else {
741
            --$this->last;
742
            $end = true;
743
        }
744
745
        if (Context::isComment($token, $end)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Con...isComment($token, $end) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
746
            // Checking if this comment did not end already (```--\n```).
747
            if ($this->str[$this->last] !== "\n") {
748
                while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
749
                    $token .= $this->str[$this->last];
750
                }
751
            }
752
753
            // Include trailing \n as whitespace token
754
            if ($this->last < $this->len) {
755
                --$this->last;
756
            }
757
758
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
759
        }
760
761
        $this->last = $iBak;
762
763
        return null;
764
    }
765
766
    /**
767
     * Parses a boolean.
768
     *
769
     * @return Token|null
770
     */
771
    public function parseBool()
772
    {
773
        if ($this->last + 3 >= $this->len) {
774
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
775 2316
            // required.
776 2316
            return null;
777 2316
        }
778 2316
779 2316
        $iBak = $this->last;
780 2316
        $token = $this->str[$this->last] . $this->str[++$this->last]
781 2316
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
782 96
783
        if (Context::isBool($token)) {
784 2316
            return new Token($token, Token::TYPE_BOOL);
785 2316
        }
786
787 1772
        if (++$this->last < $this->len) {
788 2316
            $token .= $this->str[$this->last]; // fals_E_
789
            if (Context::isBool($token)) {
790
                return new Token($token, Token::TYPE_BOOL, 1);
791 8
            }
792 8
        }
793 2316
794 1060
        $this->last = $iBak;
795 2316
796 356
        return null;
797 2316
    }
798 172
799 2316
    /**
800
     * Parses a number.
801 2316
     *
802
     * @return Token|null
803 1196
     */
804 8
    public function parseNumber()
805
    {
806
        // A rudimentary state machine is being used to parse numbers due to
807 8
        // the various forms of their notation.
808 8
        //
809 8
        // Below are the states of the machines and the conditions to change
810
        // the state.
811
        //
812 8
        //      1 --------------------[ + or - ]-------------------> 1
813
        //      1 -------------------[ 0x or 0X ]------------------> 2
814 1196
        //      1 --------------------[ 0 to 9 ]-------------------> 3
815 956
        //      1 -----------------------[ . ]---------------------> 4
816 24
        //      1 -----------------------[ b ]---------------------> 7
817 952
        //
818 4
        //      2 --------------------[ 0 to F ]-------------------> 2
819
        //
820 952
        //      3 --------------------[ 0 to 9 ]-------------------> 3
821 952
        //      3 -----------------------[ . ]---------------------> 4
822
        //      3 --------------------[ e or E ]-------------------> 5
823
        //
824 12
        //      4 --------------------[ 0 to 9 ]-------------------> 4
825 948
        //      4 --------------------[ e or E ]-------------------> 5
826
        //
827 956
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
828
        //
829 512
        //      7 -----------------------[ ' ]---------------------> 8
830 376
        //
831 376
        //      8 --------------------[ 0 or 1 ]-------------------> 8
832 28
        //      8 -----------------------[ ' ]---------------------> 9
833
        //
834 376
        // State 1 may be reached by negative numbers.
835 376
        // State 2 is reached only by hex numbers.
836
        // State 4 is reached only by float numbers.
837
        // State 5 is reached only by numbers in approximate form.
838 264
        // State 7 is reached only by numbers in bit representation.
839 168
        //
840
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
841 376
        // state other than these is invalid.
842
        // Also, negative states are invalid states.
843 416
        $iBak = $this->last;
844 28
        $token = '';
845
        $flags = 0;
846 28
        $state = 1;
847 28
        for (; $this->last < $this->len; ++$this->last) {
848
            if ($state === 1) {
849 4
                if ($this->str[$this->last] === '-') {
850
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
851 28
                } elseif (
852 28
                    $this->last + 1 < $this->len
853
                    && $this->str[$this->last] === '0'
854
                    && (
855 28
                        $this->str[$this->last + 1] === 'x'
856
                        || $this->str[$this->last + 1] === 'X'
857 28
                    )
858
                ) {
859 416
                    $token .= $this->str[$this->last++];
860 4
                    $state = 2;
861
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
862 4
                    $state = 3;
863
                } elseif ($this->str[$this->last] === '.') {
864 416
                    $state = 4;
865 168
                } elseif ($this->str[$this->last] === 'b') {
866 168
                    $state = 7;
867 164
                } elseif ($this->str[$this->last] !== '+') {
868
                    // `+` is a valid character in a number.
869
                    break;
870 4
                }
871 276
            } elseif ($state === 2) {
872 4
                $flags |= Token::FLAG_NUMBER_HEX;
873 4
                if (
874 4
                    ! (
875 4
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
876
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
877 276
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
878 4
                    )
879
                ) {
880
                    break;
881 1328
                }
882
            } elseif ($state === 3) {
883
                if ($this->str[$this->last] === '.') {
884 2316
                    $state = 4;
885 1060
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
886
                    $state = 5;
887 1060
                } elseif (
888
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
889
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
890 2316
                ) {
891
                    // A number can't be directly followed by a letter
892 2316
                    $state = -$state;
893
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
894
                    // Just digits and `.`, `e` and `E` are valid characters.
895
                    break;
896
                }
897
            } elseif ($state === 4) {
898
                $flags |= Token::FLAG_NUMBER_FLOAT;
899
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
900
                    $state = 5;
901
                } elseif (
902
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
903
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
904 2284
                ) {
905
                    // A number can't be directly followed by a letter
906 2284
                    $state = -$state;
907 2284
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
908
                    // Just digits, `e` and `E` are valid characters.
909 2284
                    break;
910 2284
                }
911
            } elseif ($state === 5) {
912
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
913 1124
                if (
914
                    $this->str[$this->last] === '+' || $this->str[$this->last] === '-'
915 1124
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
916
                ) {
917 1124
                    $state = 6;
918
                } elseif (
919 1123
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
920 1124
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
921
                ) {
922
                    // A number can't be directly followed by a letter
923 48
                    $state = -$state;
924
                } else {
925 1124
                    break;
926 1116
                }
927
            } elseif ($state === 6) {
928
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
929 1116
                    // Just digits are valid characters.
930
                    break;
931
                }
932
            } elseif ($state === 7) {
933 1124
                $flags |= Token::FLAG_NUMBER_BINARY;
934 28
                if ($this->str[$this->last] !== '\'') {
935 28
                    break;
936 28
                }
937 28
938 21
                $state = 8;
939 28
            } elseif ($state === 8) {
940 28
                if ($this->str[$this->last] === '\'') {
941 21
                    $state = 9;
942
                } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
943 1116
                    break;
944
                }
945
            } elseif ($state === 9) {
946 1124
                break;
947
            }
948
949
            $token .= $this->str[$this->last];
950
        }
951
952
        if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) {
953
            --$this->last;
954
955
            return new Token($token, Token::TYPE_NUMBER, $flags);
956 2284
        }
957
958 2284
        $this->last = $iBak;
959 2284
960
        return null;
961 2284
    }
962 2280
963
    /**
964
     * Parses a string.
965 712
     *
966 148
     * @param string $quote additional starting symbol
967
     *
968 8
     * @return Token|null
969 148
     *
970
     * @throws LexerException
971 608
     */
972 12
    public function parseString($quote = '')
973 12
    {
974
        $token = $this->str[$this->last];
975
        $flags = Context::isString($token);
976 600
977
        if (! $flags && $token !== $quote) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
978
            return null;
979 712
        }
980
981 712
        $quote = $token;
982 712
983
        while (++$this->last < $this->len) {
984 712
            if (
985 108
                $this->last + 1 < $this->len
986
                && (
987 108
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
988 12
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
989
                )
990
            ) {
991
                $token .= $this->str[$this->last] . $this->str[++$this->last];
992
            } else {
993 712
                if ($this->str[$this->last] === $quote) {
994 704
                    break;
995
                }
996
997 712
                $token .= $this->str[$this->last];
998
            }
999
        }
1000
1001
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
1002
            $this->error(
1003
                sprintf(
1004
                    Translator::gettext('Ending quote %1$s was expected.'),
1005 1708
                    $quote
1006
                ),
1007 1708
                '',
1008 1708
                $this->last
1009 20
            );
1010
        } else {
1011
            $token .= $this->str[$this->last];
1012 1704
        }
1013 1660
1014
        return new Token($token, Token::TYPE_STRING, $flags);
1015
    }
1016 1660
1017 4
    /**
1018 4
     * Parses a symbol.
1019 4
     *
1020
     * @return Token|null
1021
     *
1022
     * @throws LexerException
1023 1704
     */
1024
    public function parseSymbol()
1025 1704
    {
1026
        $token = $this->str[$this->last];
1027
        $flags = Context::isSymbol($token);
1028
1029
        if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1030
            return null;
1031
        }
1032
1033 2316
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
1034
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
1035 2316
                // This is a system variable (e.g. `@@hostname`).
1036
                $token .= $this->str[$this->last++];
1037 2316
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
1038 2316
            }
1039 2316
        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
1040
            if ($token !== '?' && $this->last + 1 < $this->len) {
1041
                ++$this->last;
1042 776
            }
1043
        } else {
1044
            $token = '';
1045 776
        }
1046
1047 776
        $str = null;
1048
1049
        if ($this->last < $this->len) {
1050
            $str = $this->parseString('`');
1051
1052
            if ($str === null) {
1053
                $str = $this->parseUnknown();
1054
1055
                if ($str === null) {
1056
                    $this->error('Variable name was expected.', $this->str[$this->last], $this->last);
1057
                }
1058
            }
1059
        }
1060
1061
        if ($str !== null) {
1062
            $token .= $str->token;
1063
        }
1064
1065
        return new Token($token, Token::TYPE_SYMBOL, $flags);
1066
    }
1067
1068
    /**
1069
     * Parses unknown parts of the query.
1070
     *
1071
     * @return Token|null
1072
     */
1073
    public function parseUnknown()
1074
    {
1075
        $token = $this->str[$this->last];
1076
        if (Context::isSeparator($token)) {
1077
            return null;
1078
        }
1079
1080
        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1081
            $token .= $this->str[$this->last];
1082
1083
            // Test if end of token equals the current delimiter. If so, remove it from the token.
1084
            if (str_ends_with($token, $this->delimiter)) {
1085
                $token = substr($token, 0, -$this->delimiterLen);
1086
                $this->last -= $this->delimiterLen - 1;
1087
                break;
1088
            }
1089
        }
1090
1091
        --$this->last;
1092
1093
        return new Token($token);
1094
    }
1095
1096
    /**
1097
     * Parses the delimiter of the query.
1098
     *
1099
     * @return Token|null
1100
     */
1101
    public function parseDelimiter()
1102
    {
1103
        $idx = 0;
1104
1105
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1106
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1107
                return null;
1108
            }
1109
1110
            ++$idx;
1111
        }
1112
1113
        $this->last += $this->delimiterLen - 1;
1114
1115
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1116
    }
1117
}
1118