Passed
Pull Request — master (#385)
by
unknown
02:47
created

Lexer::parseUnknown()   A

Complexity

Conditions 5
Paths 4

Size

Total Lines 21
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 12
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 11
nc 4
nop 0
dl 0
loc 21
ccs 12
cts 12
cp 1
crap 5
rs 9.6111
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
8
9
use function define;
10
use function defined;
11
use function in_array;
12
use function mb_strlen;
13
use function sprintf;
14
use function str_ends_with;
15
use function strlen;
16
use function substr;
17
18 7
if (! defined('USE_UTF_STRINGS')) {
19
    // NOTE: In previous versions of PHP (5.5 and older) the default
20
    // internal encoding is "ISO-8859-1".
21
    // All `mb_` functions must specify the correct encoding, which is
22
    // 'UTF-8' in order to work properly.
23
24
    /*
25
     * Forces usage of `UtfString` if the string is multibyte.
26
     * `UtfString` may be slower, but it gives better results.
27
     *
28
     * @var bool
29
     */
30 7
    define('USE_UTF_STRINGS', true);
31
}
32
33
/**
34
 * Defines the lexer of the library.
35
 *
36
 * This is one of the most important components, along with the parser.
37
 *
38
 * Depends on context to extract lexemes.
39
 *
40
 * Performs lexical analysis over a SQL statement and splits it in multiple tokens.
41
 *
42
 * The output of the lexer is affected by the context of the SQL statement.
43
 *
44
 * @see Context
45
 */
46
class Lexer extends Core
47
{
48
    /**
49
     * A list of methods that are used in lexing the SQL query.
50
     *
51
     * @var string[]
52
     */
53
    public static $PARSER_METHODS = [
54
        // It is best to put the parsers in order of their complexity
55
        // (ascending) and their occurrence rate (descending).
56
        //
57
        // Conflicts:
58
        //
59
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
60
        // They fight over delimiter. The delimiter may be a keyword, a
61
        // number or almost any character which makes the delimiter one of
62
        // the first tokens that must be parsed.
63
        //
64
        // 1. `parseNumber` and `parseOperator`
65
        // They fight over `+` and `-`.
66
        //
67
        // 2. `parseComment` and `parseOperator`
68
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
69
        //
70
        // 3. `parseBool` and `parseKeyword`
71
        // They fight over `TRUE` and `FALSE`.
72
        //
73
        // 4. `parseKeyword` and `parseUnknown`
74
        // They fight over words. `parseUnknown` does not know about
75
        // keywords.
76
77
        'parseDelimiter',
78
        'parseWhitespace',
79
        'parseNumber',
80
        'parseComment',
81
        'parseOperator',
82
        'parseBool',
83
        'parseString',
84
        'parseSymbol',
85
        'parseKeyword',
86
        'parseLabel',
87
        'parseUnknown',
88
    ];
89
90
    /**
91
     * The string to be parsed.
92
     *
93
     * @var string|UtfString
94
     */
95
    public $str = '';
96
97
    /**
98
     * The length of `$str`.
99
     *
100
     * By storing its length, a lot of time is saved, because parsing methods
101
     * would call `strlen` everytime.
102
     *
103
     * @var int
104
     */
105
    public $len = 0;
106
107
    /**
108
     * The index of the last parsed character.
109
     *
110
     * @var int
111
     */
112
    public $last = 0;
113
114
    /**
115
     * Tokens extracted from given strings.
116
     *
117
     * @var TokensList
118
     */
119
    public $list;
120
121
    /**
122
     * The default delimiter. This is used, by default, in all new instances.
123
     *
124
     * @var string
125
     */
126
    public static $DEFAULT_DELIMITER = ';';
127
128
    /**
129
     * Statements delimiter.
130
     * This may change during lexing.
131
     *
132
     * @var string
133
     */
134
    public $delimiter;
135
136
    /**
137
     * The length of the delimiter.
138
     *
139
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
140
     * calls to `strlen`, which might affect performance when the delimiter is
141
     * big.
142
     *
143
     * @var int
144
     */
145
    public $delimiterLen;
146
147
    /**
148
     * Gets the tokens list parsed by a new instance of a lexer.
149
     *
150
     * @param string|UtfString $str       the query to be lexed
151
     * @param bool             $strict    whether strict mode should be
152
     *                                    enabled or not
153
     * @param string           $delimiter the delimiter to be used
154
     *
155
     * @return TokensList
156
     */
157 4
    public static function getTokens($str, $strict = false, $delimiter = null)
158
    {
159 4
        $lexer = new self($str, $strict, $delimiter);
160
161 4
        return $lexer->list;
162
    }
163
164
    /**
165
     * @param string|UtfString $str       the query to be lexed
166
     * @param bool             $strict    whether strict mode should be
167
     *                                    enabled or not
168
     * @param string           $delimiter the delimiter to be used
169
     */
170 2284
    public function __construct($str, $strict = false, $delimiter = null)
171
    {
172
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
173
        // parse each byte of the input.
174 2284
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
175
176
        // For multi-byte strings, a new instance of `UtfString` is
177
        // initialized (only if `UtfString` usage is forced.
178 2284
        if (! $str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) {
179 4
            $str = new UtfString($str);
180
        }
181
182 2284
        $this->str = $str;
183 2284
        $this->len = $str instanceof UtfString ? $str->length() : $len;
184
185 2284
        $this->strict = $strict;
186
187
        // Setting the delimiter.
188 2284
        $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER);
189
190 2284
        $this->lex();
191 571
    }
192
193
    /**
194
     * Sets the delimiter.
195
     *
196
     * @param string $delimiter the new delimiter
197
     *
198
     * @return void
199
     */
200 2284
    public function setDelimiter($delimiter)
201
    {
202 2284
        $this->delimiter = $delimiter;
203 2284
        $this->delimiterLen = strlen($delimiter);
204 571
    }
205
206
    /**
207
     * Parses the string and extracts lexemes.
208
     *
209
     * @return void
210
     */
211 2284
    public function lex()
212
    {
213
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
214
        // is* functions. For a better performance, some rules can be deduced
215
        // from context.
216
        // For example, in `parseBool` there is no need to compare the token
217
        // every time with `true` and `false`. The first step would be to
218
        // compare with 'true' only and just after that add another letter from
219
        // context and compare again with `false`.
220
        // Another example is `parseComment`.
221
222 2284
        $list = new TokensList();
223
224
        /**
225
         * Last processed token.
226
         *
227
         * @var Token
228
         */
229 2284
        $lastToken = null;
230
231 2284
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
232
            /**
233
             * The new token.
234
             *
235
             * @var Token
236
             */
237 2260
            $token = null;
238
239 2260
            foreach (static::$PARSER_METHODS as $method) {
240 2260
                $token = $this->$method();
241
242 2260
                if ($token) {
243 2260
                    break;
244
                }
245
            }
246
247 2260
            if ($token === null) {
248
                // @assert($this->last === $lastIdx);
249 8
                $token = new Token($this->str[$this->last]);
250 8
                $this->error('Unexpected character.', $this->str[$this->last], $this->last);
251
            } elseif (
252 565
                $lastToken !== null
253 2260
                && $token->type === Token::TYPE_SYMBOL
254 2260
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
255
                && (
256 148
                    $lastToken->type === Token::TYPE_STRING
257
                    || (
258 124
                        $lastToken->type === Token::TYPE_SYMBOL
259 2260
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
260
                    )
261
                )
262
            ) {
263
                // Handles ```... FROM 'user'@'%' ...```.
264 52
                $lastToken->token .= $token->token;
265 52
                $lastToken->type = Token::TYPE_SYMBOL;
266 52
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
267 52
                $lastToken->value .= '@' . $token->value;
268 52
                continue;
269
            } elseif (
270 565
                $lastToken !== null
271 2260
                && $token->type === Token::TYPE_KEYWORD
272 2260
                && $lastToken->type === Token::TYPE_OPERATOR
273 2260
                && $lastToken->value === '.'
274
            ) {
275
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
276
                // a reserved word.
277 32
                $token->type = Token::TYPE_NONE;
278 32
                $token->flags = 0;
279 32
                $token->value = $token->token;
280
            }
281
282 2260
            $token->position = $lastIdx;
283
284 2260
            $list->tokens[$list->count++] = $token;
285
286
            // Handling delimiters.
287 2260
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
288 32
                if ($this->last + 1 >= $this->len) {
289 4
                    $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1);
290 4
                    continue;
291
                }
292
293
                // Skipping last R (from `delimiteR`) and whitespaces between
294
                // the keyword `DELIMITER` and the actual delimiter.
295 28
                $pos = ++$this->last;
296 28
                $token = $this->parseWhitespace();
297
298 28
                if ($token !== null) {
299 24
                    $token->position = $pos;
300 24
                    $list->tokens[$list->count++] = $token;
301
                }
302
303
                // Preparing the token that holds the new delimiter.
304 28
                if ($this->last + 1 >= $this->len) {
305 4
                    $this->error('Expected delimiter.', '', $this->last + 1);
306 4
                    continue;
307
                }
308
309 24
                $pos = $this->last + 1;
310
311
                // Parsing the delimiter.
312 24
                $this->delimiter = null;
313 24
                $delimiterLen = 0;
314
                while (
315 24
                    ++$this->last < $this->len
316 24
                    && ! Context::isWhitespace($this->str[$this->last])
317 24
                    && $delimiterLen < 15
318
                ) {
319 20
                    $this->delimiter .= $this->str[$this->last];
320 20
                    ++$delimiterLen;
321
                }
322
323 24
                if (empty($this->delimiter)) {
324 4
                    $this->error('Expected delimiter.', '', $this->last);
325 4
                    $this->delimiter = ';';
326
                }
327
328 24
                --$this->last;
329
330
                // Saving the delimiter and its token.
331 24
                $this->delimiterLen = strlen($this->delimiter);
332 24
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
333 24
                $token->position = $pos;
334 24
                $list->tokens[$list->count++] = $token;
335
            }
336
337 2252
            $lastToken = $token;
338
        }
339
340
        // Adding a final delimiter to mark the ending.
341 2284
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
342
343
        // Saving the tokens list.
344 2284
        $this->list = $list;
345
346 2284
        $this->solveAmbiguityOnStarOperator();
347 2284
        $this->solveAmbiguityOnFunctionKeywords();
348 571
    }
349
350
    /**
351
     * Resolves the ambiguity when dealing with the "*" operator.
352
     *
353
     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
354
     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
355
     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
356
     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
357
     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
358
     * - "," (a comma separator like in "SELECT *, field FROM...");
359
     * - ")" (a closing parenthesis like in "COUNT(*)").
360
     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
361
     * default flag (arithmetic) will be kept.
362
     *
363
     * @return void
364
     */
365 2284
    private function solveAmbiguityOnStarOperator()
366
    {
367 2284
        $iBak = $this->list->idx;
368 2284
        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
369
            // getNext() already gets rid of whitespaces and comments.
370 352
            $next = $this->list->getNext();
371
372 352
            if ($next === null) {
373
                continue;
374
            }
375
376
            if (
377 352
                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
378 352
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
379
            ) {
380 28
                continue;
381
            }
382
383 328
            $starToken->flags = Token::FLAG_OPERATOR_SQL;
384
        }
385
386 2284
        $this->list->idx = $iBak;
387 571
    }
388
389
    /**
390
     * Resolves the ambiguity when dealing with the functions keywords.
391
     *
392
     * In SQL statements, the function keywords might be used as table names or columns names.
393
     * To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
394
     * comments, right after the function keyword position. The function keyword is for sure used
395
     * as column name or table name if the next token found is any of:
396
     *
397
     * - "FROM" (the FROM keyword like in "SELECT Country x, AverageSalary avg FROM...");
398
     * - "WHERE" (the WHERE keyword like in "DELETE FROM emp x WHERE x.salary = 20");
399
     * - "SET" (the SET keyword like in "UPDATE Country x, City y set x.Name=x.Name");
400
     * - "," (a comma separator like 'x,' in "UPDATE Country x, City y set x.Name=x.Name");
401
     * - "." (a dot separator like in "x.asset_id FROM (SELECT evt.asset_id FROM evt)".
402
     * - "NULL" (when used as a table alias like in "avg.col FROM (SELECT ev.col FROM ev) avg").
403
     *
404
     * This method will change the flag of the function keyword tokens when any of those
405
     * condition above is true. Otherwise, the
406
     * default flag (function keyword) will be kept.
407
     *
408
     * @return void
409
     */
410 2284
    private function solveAmbiguityOnFunctionKeywords()
411
    {
412 2284
        $iBak = $this->list->idx;
413 2284
        $keywordFunction = Token::TYPE_KEYWORD | Token::FLAG_KEYWORD_FUNCTION;
414 2284
        while (($keywordToken = $this->list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, $keywordFunction)) !== null) {
415 272
            $next = $this->list->getNext();
416
            if (
417 272
                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'SET', 'WHERE'], true))
418 272
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, ['.', ','], true))
419 272
                && ($next->value !== null)
420
            ) {
421 256
                continue;
422
            }
423
424 16
            $keywordToken->type = Token::TYPE_NONE;
425 16
            $keywordToken->flags = Token::TYPE_NONE;
426 16
            $keywordToken->keyword = $keywordToken->value;
427
        }
428
429 2284
        $this->list->idx = $iBak;
430 571
    }
431
432
    /**
433
     * Creates a new error log.
434
     *
435
     * @param string $msg  the error message
436
     * @param string $str  the character that produced the error
437
     * @param int    $pos  the position of the character
438
     * @param int    $code the code of the error
439
     *
440
     * @return void
441
     *
442
     * @throws LexerException throws the exception, if strict mode is enabled.
443
     */
444 68
    public function error($msg, $str = '', $pos = 0, $code = 0)
445
    {
446 68
        $error = new LexerException(
447 68
            Translator::gettext($msg),
448 17
            $str,
449 17
            $pos,
450 17
            $code
451
        );
452 68
        parent::error($error);
453 16
    }
454
455
    /**
456
     * Parses a keyword.
457
     *
458
     * @return Token|null
459
     */
460 2224
    public function parseKeyword()
461
    {
462 2224
        $token = '';
463
464
        /**
465
         * Value to be returned.
466
         *
467
         * @var Token
468
         */
469 2224
        $ret = null;
470
471
        /**
472
         * The value of `$this->last` where `$token` ends in `$this->str`.
473
         */
474 2224
        $iEnd = $this->last;
475
476
        /**
477
         * Whether last parsed character is a whitespace.
478
         *
479
         * @var bool
480
         */
481 2224
        $lastSpace = false;
482
483 2224
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
484
            // Composed keywords shouldn't have more than one whitespace between
485
            // keywords.
486 2224
            if (Context::isWhitespace($this->str[$this->last])) {
487 2176
                if ($lastSpace) {
488 376
                    --$j; // The size of the keyword didn't increase.
489 376
                    continue;
490
                }
491
492 2176
                $lastSpace = true;
493
            } else {
494 2224
                $lastSpace = false;
495
            }
496
497 2224
            $token .= $this->str[$this->last];
498 2224
            $flags = Context::isKeyword($token);
499
500 2224
            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
501 2224
                continue;
502
            }
503
504 2156
            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
505 2156
            $iEnd = $this->last;
506
507
            // We don't break so we find longest keyword.
508
            // For example, `OR` and `ORDER` have a common prefix `OR`.
509
            // If we stopped at `OR`, the parsing would be invalid.
510
        }
511
512 2224
        $this->last = $iEnd;
513
514 2224
        return $ret;
515
    }
516
517
    /**
518
     * Parses a label.
519
     *
520
     * @return Token|null
521
     */
522 1648
    public function parseLabel()
523
    {
524 1648
        $token = '';
525
526
        /**
527
         * Value to be returned.
528
         *
529
         * @var Token
530
         */
531 1648
        $ret = null;
532
533
        /**
534
         * The value of `$this->last` where `$token` ends in `$this->str`.
535
         */
536 1648
        $iEnd = $this->last;
537 1648
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
538 1648
            if ($this->str[$this->last] === ':' && $j > 1) {
539
                // End of label
540 8
                $token .= $this->str[$this->last];
541 8
                $ret = new Token($token, Token::TYPE_LABEL);
542 8
                $iEnd = $this->last;
543 8
                break;
544
            }
545
546 1648
            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
547
                // Whitespace between label and :
548
                // The size of the keyword didn't increase.
549 1280
                --$j;
550 1648
            } elseif (Context::isSeparator($this->str[$this->last])) {
551
                // Any other separator
552 1272
                break;
553
            }
554
555 1644
            $token .= $this->str[$this->last];
556
        }
557
558 1648
        $this->last = $iEnd;
559
560 1648
        return $ret;
561
    }
562
563
    /**
564
     * Parses an operator.
565
     *
566
     * @return Token|null
567
     */
568 2260
    public function parseOperator()
569
    {
570 2260
        $token = '';
571
572
        /**
573
         * Value to be returned.
574
         *
575
         * @var Token
576
         */
577 2260
        $ret = null;
578
579
        /**
580
         * The value of `$this->last` where `$token` ends in `$this->str`.
581
         */
582 2260
        $iEnd = $this->last;
583
584 2260
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
585 2260
            $token .= $this->str[$this->last];
586 2260
            $flags = Context::isOperator($token);
587
588 2260
            if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
589 2252
                continue;
590
            }
591
592 1604
            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
593 1604
            $iEnd = $this->last;
594
        }
595
596 2260
        $this->last = $iEnd;
597
598 2260
        return $ret;
599
    }
600
601
    /**
602
     * Parses a whitespace.
603
     *
604
     * @return Token|null
605
     */
606 2260
    public function parseWhitespace()
607
    {
608 2260
        $token = $this->str[$this->last];
609
610 2260
        if (! Context::isWhitespace($token)) {
611 2260
            return null;
612
        }
613
614 2208
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
615 376
            $token .= $this->str[$this->last];
616
        }
617
618 2208
        --$this->last;
619
620 2208
        return new Token($token, Token::TYPE_WHITESPACE);
621
    }
622
623
    /**
624
     * Parses a comment.
625
     *
626
     * @return Token|null
627
     */
628 2260
    public function parseComment()
629
    {
630 2260
        $iBak = $this->last;
631 2260
        $token = $this->str[$this->last];
632
633
        // Bash style comments. (#comment\n)
634 2260
        if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
635 12
            while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
636 12
                $token .= $this->str[$this->last];
637
            }
638
639
            // Include trailing \n as whitespace token
640 12
            if ($this->last < $this->len) {
641 12
                --$this->last;
642
            }
643
644 12
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
645
        }
646
647
        // C style comments. (/*comment*\/)
648 2260
        if (++$this->last < $this->len) {
649 2252
            $token .= $this->str[$this->last];
650 2252
            if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
651
                // There might be a conflict with "*" operator here, when string is "*/*".
652
                // This can occurs in the following statements:
653
                // - "SELECT */* comment */ FROM ..."
654
                // - "SELECT 2*/* comment */3 AS `six`;"
655 132
                $next = $this->last + 1;
656 132
                if (($next < $this->len) && $this->str[$next] === '*') {
657
                    // Conflict in "*/*": first "*" was not for ending a comment.
658
                    // Stop here and let other parsing method define the true behavior of that first star.
659 4
                    $this->last = $iBak;
660
661 4
                    return null;
662
                }
663
664 132
                $flags = Token::FLAG_COMMENT_C;
665
666
                // This comment already ended. It may be a part of a
667
                // previous MySQL specific command.
668 132
                if ($token === '*/') {
669 12
                    return new Token($token, Token::TYPE_COMMENT, $flags);
670
                }
671
672
                // Checking if this is a MySQL-specific command.
673 132
                if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') {
674 12
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
675 12
                    $token .= $this->str[++$this->last];
676
677
                    while (
678 12
                        ++$this->last < $this->len
679 12
                        && $this->str[$this->last] >= '0'
680 12
                        && $this->str[$this->last] <= '9'
681
                    ) {
682 8
                        $token .= $this->str[$this->last];
683
                    }
684
685 12
                    --$this->last;
686
687
                    // We split this comment and parse only its beginning
688
                    // here.
689 12
                    return new Token($token, Token::TYPE_COMMENT, $flags);
690
                }
691
692
                // Parsing the comment.
693
                while (
694 128
                    ++$this->last < $this->len
695
                    && (
696 128
                        $this->str[$this->last - 1] !== '*'
697 128
                        || $this->str[$this->last] !== '/'
698
                    )
699
                ) {
700 128
                    $token .= $this->str[$this->last];
701
                }
702
703
                // Adding the ending.
704 128
                if ($this->last < $this->len) {
705 128
                    $token .= $this->str[$this->last];
706
                }
707
708 128
                return new Token($token, Token::TYPE_COMMENT, $flags);
709
            }
710
        }
711
712
        // SQL style comments. (-- comment\n)
713 2260
        if (++$this->last < $this->len) {
714 2248
            $token .= $this->str[$this->last];
715 2248
            $end = false;
716
        } else {
717 716
            --$this->last;
718 716
            $end = true;
719
        }
720
721 2260
        if (Context::isComment($token, $end)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Con...isComment($token, $end) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
722
            // Checking if this comment did not end already (```--\n```).
723 96
            if ($this->str[$this->last] !== "\n") {
724 96
                while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
725 96
                    $token .= $this->str[$this->last];
726
                }
727
            }
728
729
            // Include trailing \n as whitespace token
730 96
            if ($this->last < $this->len) {
731 80
                --$this->last;
732
            }
733
734 96
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
735
        }
736
737 2260
        $this->last = $iBak;
738
739 2260
        return null;
740
    }
741
742
    /**
743
     * Parses a boolean.
744
     *
745
     * @return Token|null
746
     */
747 2228
    public function parseBool()
748
    {
749 2228
        if ($this->last + 3 >= $this->len) {
750
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
751
            // required.
752 552
            return null;
753
        }
754
755 2228
        $iBak = $this->last;
756 2228
        $token = $this->str[$this->last] . $this->str[++$this->last]
757 2228
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
758
759 2228
        if (Context::isBool($token)) {
760 8
            return new Token($token, Token::TYPE_BOOL);
761
        }
762
763 2228
        if (++$this->last < $this->len) {
764 2224
            $token .= $this->str[$this->last]; // fals_E_
765 2224
            if (Context::isBool($token)) {
766 12
                return new Token($token, Token::TYPE_BOOL, 1);
767
            }
768
        }
769
770 2228
        $this->last = $iBak;
771
772 2228
        return null;
773
    }
774
775
    /**
776
     * Parses a number.
777
     *
778
     * @return Token|null
779
     */
780 2260
    public function parseNumber()
781
    {
782
        // A rudimentary state machine is being used to parse numbers due to
783
        // the various forms of their notation.
784
        //
785
        // Below are the states of the machines and the conditions to change
786
        // the state.
787
        //
788
        //      1 --------------------[ + or - ]-------------------> 1
789
        //      1 -------------------[ 0x or 0X ]------------------> 2
790
        //      1 --------------------[ 0 to 9 ]-------------------> 3
791
        //      1 -----------------------[ . ]---------------------> 4
792
        //      1 -----------------------[ b ]---------------------> 7
793
        //
794
        //      2 --------------------[ 0 to F ]-------------------> 2
795
        //
796
        //      3 --------------------[ 0 to 9 ]-------------------> 3
797
        //      3 -----------------------[ . ]---------------------> 4
798
        //      3 --------------------[ e or E ]-------------------> 5
799
        //
800
        //      4 --------------------[ 0 to 9 ]-------------------> 4
801
        //      4 --------------------[ e or E ]-------------------> 5
802
        //
803
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
804
        //
805
        //      7 -----------------------[ ' ]---------------------> 8
806
        //
807
        //      8 --------------------[ 0 or 1 ]-------------------> 8
808
        //      8 -----------------------[ ' ]---------------------> 9
809
        //
810
        // State 1 may be reached by negative numbers.
811
        // State 2 is reached only by hex numbers.
812
        // State 4 is reached only by float numbers.
813
        // State 5 is reached only by numbers in approximate form.
814
        // State 7 is reached only by numbers in bit representation.
815
        //
816
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
817
        // state other than these is invalid.
818
        // Also, negative states are invalid states.
819 2260
        $iBak = $this->last;
820 2260
        $token = '';
821 2260
        $flags = 0;
822 2260
        $state = 1;
823 2260
        for (; $this->last < $this->len; ++$this->last) {
824 2260
            if ($state === 1) {
825 2260
                if ($this->str[$this->last] === '-') {
826 96
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
827
                } elseif (
828 2260
                    $this->last + 1 < $this->len
829 2260
                    && $this->str[$this->last] === '0'
830
                    && (
831 132
                        $this->str[$this->last + 1] === 'x'
832 2260
                        || $this->str[$this->last + 1] === 'X'
833
                    )
834
                ) {
835 8
                    $token .= $this->str[$this->last++];
836 8
                    $state = 2;
837 2260
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
838 1040
                    $state = 3;
839 2260
                } elseif ($this->str[$this->last] === '.') {
840 344
                    $state = 4;
841 2260
                } elseif ($this->str[$this->last] === 'b') {
842 172
                    $state = 7;
843 2260
                } elseif ($this->str[$this->last] !== '+') {
844
                    // `+` is a valid character in a number.
845 2260
                    break;
846
                }
847 1172
            } elseif ($state === 2) {
848 8
                $flags |= Token::FLAG_NUMBER_HEX;
849
                if (
850
                    ! (
851 8
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
852 8
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
853 8
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
854
                    )
855
                ) {
856 8
                    break;
857
                }
858 1172
            } elseif ($state === 3) {
859 936
                if ($this->str[$this->last] === '.') {
860 24
                    $state = 4;
861 932
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
862 4
                    $state = 5;
863
                } elseif (
864 932
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
865 932
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
866
                ) {
867
                    // A number can't be directly followed by a letter
868 12
                    $state = -$state;
869 928
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
870
                    // Just digits and `.`, `e` and `E` are valid characters.
871 936
                    break;
872
                }
873 500
            } elseif ($state === 4) {
874 364
                $flags |= Token::FLAG_NUMBER_FLOAT;
875 364
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
876 28
                    $state = 5;
877
                } elseif (
878 364
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
879 364
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
880
                ) {
881
                    // A number can't be directly followed by a letter
882 252
                    $state = -$state;
883 160
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
884
                    // Just digits, `e` and `E` are valid characters.
885 364
                    break;
886
                }
887 404
            } elseif ($state === 5) {
888 28
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
889
                if (
890 28
                    $this->str[$this->last] === '+' || $this->str[$this->last] === '-'
891 28
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
892
                ) {
893 4
                    $state = 6;
894
                } elseif (
895 28
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
896 28
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
897
                ) {
898
                    // A number can't be directly followed by a letter
899 28
                    $state = -$state;
900
                } else {
901 28
                    break;
902
                }
903 404
            } elseif ($state === 6) {
904 4
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
905
                    // Just digits are valid characters.
906 4
                    break;
907
                }
908 404
            } elseif ($state === 7) {
909 168
                $flags |= Token::FLAG_NUMBER_BINARY;
910 168
                if ($this->str[$this->last] !== '\'') {
911 164
                    break;
912
                }
913
914 4
                $state = 8;
915 264
            } elseif ($state === 8) {
916 4
                if ($this->str[$this->last] === '\'') {
917 4
                    $state = 9;
918 4
                } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
919 4
                    break;
920
                }
921 264
            } elseif ($state === 9) {
922 4
                break;
923
            }
924
925 1304
            $token .= $this->str[$this->last];
926
        }
927
928 2260
        if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) {
929 1040
            --$this->last;
930
931 1040
            return new Token($token, Token::TYPE_NUMBER, $flags);
932
        }
933
934 2260
        $this->last = $iBak;
935
936 2260
        return null;
937
    }
938
939
    /**
940
     * Parses a string.
941
     *
942
     * @param string $quote additional starting symbol
943
     *
944
     * @return Token|null
945
     *
946
     * @throws LexerException
947
     */
948 2228
    public function parseString($quote = '')
949
    {
950 2228
        $token = $this->str[$this->last];
951 2228
        $flags = Context::isString($token);
952
953 2228
        if (! $flags && $token !== $quote) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
954 2228
            return null;
955
        }
956
957 1100
        $quote = $token;
958
959 1100
        while (++$this->last < $this->len) {
960
            if (
961 1100
                $this->last + 1 < $this->len
962
                && (
963 1096
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
964 1100
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
965
                )
966
            ) {
967 48
                $token .= $this->str[$this->last] . $this->str[++$this->last];
968
            } else {
969 1100
                if ($this->str[$this->last] === $quote) {
970 1092
                    break;
971
                }
972
973 1092
                $token .= $this->str[$this->last];
974
            }
975
        }
976
977 1100
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
978 28
            $this->error(
979 28
                sprintf(
980 28
                    Translator::gettext('Ending quote %1$s was expected.'),
981 7
                    $quote
982
                ),
983 7
                '',
984 28
                $this->last
985
            );
986
        } else {
987 1092
            $token .= $this->str[$this->last];
988
        }
989
990 1100
        return new Token($token, Token::TYPE_STRING, $flags);
991
    }
992
993
    /**
994
     * Parses a symbol.
995
     *
996
     * @return Token|null
997
     *
998
     * @throws LexerException
999
     */
1000 2228
    public function parseSymbol()
1001
    {
1002 2228
        $token = $this->str[$this->last];
1003 2228
        $flags = Context::isSymbol($token);
1004
1005 2228
        if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1006 2224
            return null;
1007
        }
1008
1009 700
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
1010 148
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
1011
                // This is a system variable (e.g. `@@hostname`).
1012 8
                $token .= $this->str[$this->last++];
1013 148
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
1014
            }
1015 596
        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
1016 12
            if ($token !== '?' && $this->last + 1 < $this->len) {
1017 12
                ++$this->last;
1018
            }
1019
        } else {
1020 588
            $token = '';
1021
        }
1022
1023 700
        $str = null;
1024
1025 700
        if ($this->last < $this->len) {
1026 700
            $str = $this->parseString('`');
1027
1028 700
            if ($str === null) {
1029 108
                $str = $this->parseUnknown();
1030
1031 108
                if ($str === null) {
1032 12
                    $this->error('Variable name was expected.', $this->str[$this->last], $this->last);
1033
                }
1034
            }
1035
        }
1036
1037 700
        if ($str !== null) {
1038 692
            $token .= $str->token;
1039
        }
1040
1041 700
        return new Token($token, Token::TYPE_SYMBOL, $flags);
1042
    }
1043
1044
    /**
1045
     * Parses unknown parts of the query.
1046
     *
1047
     * @return Token|null
1048
     */
1049 1672
    public function parseUnknown()
1050
    {
1051 1672
        $token = $this->str[$this->last];
1052 1672
        if (Context::isSeparator($token)) {
1053 20
            return null;
1054
        }
1055
1056 1668
        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1057 1624
            $token .= $this->str[$this->last];
1058
1059
            // Test if end of token equals the current delimiter. If so, remove it from the token.
1060 1624
            if (str_ends_with($token, $this->delimiter)) {
1061 4
                $token = substr($token, 0, -$this->delimiterLen);
1062 4
                $this->last -= $this->delimiterLen - 1;
1063 4
                break;
1064
            }
1065
        }
1066
1067 1668
        --$this->last;
1068
1069 1668
        return new Token($token);
1070
    }
1071
1072
    /**
1073
     * Parses the delimiter of the query.
1074
     *
1075
     * @return Token|null
1076
     */
1077 2260
    public function parseDelimiter()
1078
    {
1079 2260
        $idx = 0;
1080
1081 2260
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1082 2260
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1083 2260
                return null;
1084
            }
1085
1086 760
            ++$idx;
1087
        }
1088
1089 760
        $this->last += $this->delimiterLen - 1;
1090
1091 760
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1092
    }
1093
}
1094