Passed
Pull Request — master (#471)
by
unknown
09:17
created

Lexer::solveAmbiguityOnStarOperator()   B

Complexity

Conditions 7
Paths 4

Size

Total Lines 22
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 11
nc 4
nop 0
dl 0
loc 22
ccs 5
cts 5
cp 1
crap 7
rs 8.8333
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
8
9
use function in_array;
10
use function mb_strlen;
11
use function sprintf;
12
use function str_ends_with;
13
use function strlen;
14
use function substr;
15
16
/**
17
 * Defines the lexer of the library.
18 4
 *
19
 * This is one of the most important components, along with the parser.
20
 *
21
 * Depends on context to extract lexemes.
22
 *
23
 * Performs lexical analysis over a SQL statement and splits it in multiple tokens.
24
 *
25
 * The output of the lexer is affected by the context of the SQL statement.
26
 *
27
 * @see Context
28
 */
29
class Lexer extends Core
30 4
{
31
    /**
32
     * A list of methods that are used in lexing the SQL query.
33
     *
34
     * @var string[]
35
     */
36
    public static $parserMethods = [
37
        // It is best to put the parsers in order of their complexity
38
        // (ascending) and their occurrence rate (descending).
39
        //
40
        // Conflicts:
41
        //
42
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
43
        // They fight over delimiter. The delimiter may be a keyword, a
44
        // number or almost any character which makes the delimiter one of
45
        // the first tokens that must be parsed.
46
        //
47
        // 1. `parseNumber` and `parseOperator`
48
        // They fight over `+` and `-`.
49
        //
50
        // 2. `parseComment` and `parseOperator`
51
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
52
        //
53
        // 3. `parseBool` and `parseKeyword`
54
        // They fight over `TRUE` and `FALSE`.
55
        //
56
        // 4. `parseKeyword` and `parseUnknown`
57
        // They fight over words. `parseUnknown` does not know about
58
        // keywords.
59
60
        'parseDelimiter',
61
        'parseWhitespace',
62
        'parseNumber',
63
        'parseComment',
64
        'parseOperator',
65
        'parseBool',
66
        'parseString',
67
        'parseSymbol',
68
        'parseKeyword',
69
        'parseLabel',
70
        'parseUnknown',
71
    ];
72
73
74
    /**
75
     * A list of keywords that indicate that the function keyword
76
     * is not used as a function
77
     *
78
     * @var string[]
79
     */
80
    public $keywordNameIndicators = [
81
        'FROM',
82
        'SET',
83
        'WHERE',
84
    ];
85
86
    /**
87
     * A list of operators that indicate that the function keyword
88
     * is not used as a function
89
     *
90
     * @var string[]
91
     */
92
    public $operatorNameIndicators = [
93
        ',',
94
        '.',
95
    ];
96
97
    /**
98
     * The string to be parsed.
99
     *
100
     * @var string|UtfString
101
     */
102
    public $str = '';
103
104
    /**
105
     * The length of `$str`.
106
     *
107
     * By storing its length, a lot of time is saved, because parsing methods
108
     * would call `strlen` everytime.
109
     *
110
     * @var int
111
     */
112
    public $len = 0;
113
114
    /**
115
     * The index of the last parsed character.
116
     *
117
     * @var int
118
     */
119
    public $last = 0;
120
121
    /**
122
     * Tokens extracted from given strings.
123
     *
124
     * @var TokensList
125
     */
126
    public $list;
127
128
    /**
129
     * The default delimiter. This is used, by default, in all new instances.
130
     *
131
     * @var string
132
     */
133
    public static $defaultDelimiter = ';';
134
135
    /**
136
     * Statements delimiter.
137
     * This may change during lexing.
138
     *
139
     * @var string
140
     */
141
    public $delimiter;
142
143
    /**
144
     * The length of the delimiter.
145
     *
146
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
147
     * calls to `strlen`, which might affect performance when the delimiter is
148
     * big.
149
     *
150
     * @var int
151
     */
152
    public $delimiterLen;
153
154
    /**
155
     * Gets the tokens list parsed by a new instance of a lexer.
156
     *
157
     * @param string|UtfString $str       the query to be lexed
158
     * @param bool             $strict    whether strict mode should be
159
     *                                    enabled or not
160
     * @param string           $delimiter the delimiter to be used
161
     *
162
     * @return TokensList
163
     */
164
    public static function getTokens($str, $strict = false, $delimiter = null)
165
    {
166
        $lexer = new self($str, $strict, $delimiter);
167
168
        return $lexer->list;
169
    }
170
171
    /**
172
     * @param string|UtfString $str       the query to be lexed
173
     * @param bool             $strict    whether strict mode should be
174
     *                                    enabled or not
175
     * @param string           $delimiter the delimiter to be used
176
     */
177
    public function __construct($str, $strict = false, $delimiter = null)
178
    {
179
        parent::__construct();
180
181 2
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
182
        // parse each byte of the input.
183 2
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
184
185 2
        // For multi-byte strings, a new instance of `UtfString` is initialized.
186
        if (! $str instanceof UtfString && $len !== mb_strlen($str, 'UTF-8')) {
187
            $str = new UtfString($str);
188
        }
189
190
        $this->str = $str;
191
        $this->len = $str instanceof UtfString ? $str->length() : $len;
192
193
        $this->strict = $strict;
194 1404
195
        // Setting the delimiter.
196 1404
        $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$defaultDelimiter);
197
198
        $this->lex();
199
    }
200 1404
201
    /**
202
     * Sets the delimiter.
203
     *
204 1404
     * @param string $delimiter the new delimiter
205 10
     *
206
     * @return void
207
     */
208 1404
    public function setDelimiter($delimiter)
209 1404
    {
210
        $this->delimiter = $delimiter;
211 1404
        $this->delimiterLen = strlen($delimiter);
212
    }
213
214 1404
    /**
215
     * Parses the string and extracts lexemes.
216 1404
     *
217
     * @return void
218
     */
219
    public function lex()
220
    {
221
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
222
        // is* functions. For a better performance, some rules can be deduced
223
        // from context.
224
        // For example, in `parseBool` there is no need to compare the token
225
        // every time with `true` and `false`. The first step would be to
226 1404
        // compare with 'true' only and just after that add another letter from
227
        // context and compare again with `false`.
228 1404
        // Another example is `parseComment`.
229 1404
230
        $list = new TokensList();
231
232
        /**
233
         * Last processed token.
234
         *
235
         * @var Token
236
         */
237 1404
        $lastToken = null;
238
239
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
240
            /**
241
             * The new token.
242
             *
243
             * @var Token
244
             */
245
            $token = null;
246
247
            foreach (static::$parserMethods as $method) {
248 1404
                $token = $this->$method();
249
250
                if ($token) {
251
                    break;
252
                }
253
            }
254
255 1404
            if ($token === null) {
256
                // @assert($this->last === $lastIdx);
257 1404
                $token = new Token($this->str[$this->last]);
258
                $this->error('Unexpected character.', $this->str[$this->last], $this->last);
259
            } elseif (
260
                $lastToken !== null
261
                && $token->type === Token::TYPE_SYMBOL
262
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
263 1394
                && (
264
                    $lastToken->type === Token::TYPE_STRING
265 1394
                    || (
266 1394
                        $lastToken->type === Token::TYPE_SYMBOL
267
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
268 1394
                    )
269 1394
                )
270
            ) {
271
                // Handles ```... FROM 'user'@'%' ...```.
272
                $lastToken->token .= $token->token;
273 1394
                $lastToken->type = Token::TYPE_SYMBOL;
274
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
275 4
                $lastToken->value .= '@' . $token->value;
276 4
                continue;
277
            } elseif (
278 1394
                $lastToken !== null
279 1394
                && $token->type === Token::TYPE_KEYWORD
280 1394
                && $lastToken->type === Token::TYPE_OPERATOR
281
                && $lastToken->value === '.'
282 1394
            ) {
283 1394
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
284 1394
                // a reserved word.
285 1394
                $token->type = Token::TYPE_NONE;
286 1394
                $token->flags = 0;
287
                $token->value = $token->token;
288
            }
289
290 46
            $token->position = $lastIdx;
291 46
292 46
            $list->tokens[$list->count++] = $token;
293 46
294 46
            // Handling delimiters.
295
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
296 1394
                if ($this->last + 1 >= $this->len) {
297 1394
                    $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1);
298 1394
                    continue;
299 1394
                }
300
301
                // Skipping last R (from `delimiteR`) and whitespaces between
302
                // the keyword `DELIMITER` and the actual delimiter.
303 30
                $pos = ++$this->last;
304 30
                $token = $this->parseWhitespace();
305 30
306
                if ($token !== null) {
307
                    $token->position = $pos;
308 1394
                    $list->tokens[$list->count++] = $token;
309
                }
310 1394
311
                // Preparing the token that holds the new delimiter.
312
                if ($this->last + 1 >= $this->len) {
313 1394
                    $this->error('Expected delimiter.', '', $this->last + 1);
314 36
                    continue;
315 2
                }
316 2
317
                $pos = $this->last + 1;
318
319
                // Parsing the delimiter.
320
                $this->delimiter = null;
321 34
                $delimiterLen = 0;
322 34
                while (
323
                    ++$this->last < $this->len
324 34
                    && ! Context::isWhitespace($this->str[$this->last])
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

324
                    && ! Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])
Loading history...
325 32
                    && $delimiterLen < 15
326 32
                ) {
327
                    $this->delimiter .= $this->str[$this->last];
328
                    ++$delimiterLen;
329
                }
330 34
331 2
                if (empty($this->delimiter)) {
332 2
                    $this->error('Expected delimiter.', '', $this->last);
333
                    $this->delimiter = ';';
334
                }
335 32
336
                --$this->last;
337
338 32
                // Saving the delimiter and its token.
339 32
                $this->delimiterLen = strlen($this->delimiter);
340
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
341 32
                $token->position = $pos;
342 32
                $list->tokens[$list->count++] = $token;
343 32
            }
344
345 30
            $lastToken = $token;
346 30
        }
347
348
        // Adding a final delimiter to mark the ending.
349 32
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
350 2
351 2
        // Saving the tokens list.
352
        $this->list = $list;
353
354 32
        $this->solveAmbiguityOnStarOperator();
355
        $this->solveAmbiguityOnFunctionKeywords();
356
    }
357 32
358 32
    /**
359 32
     * Resolves the ambiguity when dealing with the "*" operator.
360 32
     *
361
     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
362
     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
363 1390
     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
364
     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
365
     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
366
     * - "," (a comma separator like in "SELECT *, field FROM...");
367 1404
     * - ")" (a closing parenthesis like in "COUNT(*)").
368
     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
369
     * default flag (arithmetic) will be kept.
370 1404
     */
371
    private function solveAmbiguityOnStarOperator(): void
372 1404
    {
373 1404
        $iBak = $this->list->idx;
374
        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
375
            // getNext() already gets rid of whitespaces and comments.
376
            $next = $this->list->getNext();
377
378
            if ($next === null) {
379
                continue;
380
            }
381
382
            if (
383
                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
384
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
385
            ) {
386
                continue;
387
            }
388
389 1404
            $starToken->flags = Token::FLAG_OPERATOR_SQL;
390
        }
391 1404
392 1404
        $this->list->idx = $iBak;
393
    }
394 198
395
    /**
396 198
     * Resolves the ambiguity when dealing with the functions keywords.
397
     *
398
     * In SQL statements, the function keywords might be used as table names or columns names.
399
     * To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
400
     * comments, right after the function keyword position. The function keyword is for sure used
401 198
     * as column name or table name if the next token found is any of:
402 198
     *
403
     * - "FROM" (the FROM keyword like in "SELECT Country x, AverageSalary avg FROM...");
404 16
     * - "WHERE" (the WHERE keyword like in "DELETE FROM emp x WHERE x.salary = 20");
405
     * - "SET" (the SET keyword like in "UPDATE Country x, City y set x.Name=x.Name");
406
     * - "," (a comma separator like 'x,' in "UPDATE Country x, City y set x.Name=x.Name");
407 184
     * - "." (a dot separator like in "x.asset_id FROM (SELECT evt.asset_id FROM evt)".
408
     * - "NULL" (when used as a table alias like in "avg.col FROM (SELECT ev.col FROM ev) avg").
409
     *
410 1404
     * This method will change the flag of the function keyword tokens when any of those
411
     * condition above is true. Otherwise, the
412
     * default flag (function keyword) will be kept.
413
     */
414
    private function solveAmbiguityOnFunctionKeywords(): void
415
    {
416
        $iBak = $this->list->idx;
417
        $keywordFunction = Token::TYPE_KEYWORD | Token::FLAG_KEYWORD_FUNCTION;
418
        while (($keywordToken = $this->list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, $keywordFunction)) !== null) {
419
            $next = $this->list->getNext();
420
            if (
421
                ($next->type !== Token::TYPE_KEYWORD
422
                    || ! in_array($next->value, $this->keywordNameIndicators, true)
423
                )
424
                && ($next->type !== Token::TYPE_OPERATOR
425
                    || ! in_array($next->value, $this->operatorNameIndicators, true)
426
                )
427
                && ($next->value !== null)
428
            ) {
429
                continue;
430
            }
431
432 1404
            $keywordToken->type = Token::TYPE_NONE;
433
            $keywordToken->flags = Token::TYPE_NONE;
434 1404
            $keywordToken->keyword = $keywordToken->value;
435 1404
        }
436 1404
437 210
        $this->list->idx = $iBak;
438
    }
439 210
440 210
    /**
441
     * Creates a new error log.
442 210
     *
443 210
     * @param string $msg  the error message
444
     * @param string $str  the character that produced the error
445 210
     * @param int    $pos  the position of the character
446
     * @param int    $code the code of the error
447 200
     *
448
     * @return void
449
     *
450 12
     * @throws LexerException throws the exception, if strict mode is enabled.
451 12
     */
452 12
    public function error($msg, $str = '', $pos = 0, $code = 0)
453
    {
454
        $error = new LexerException(
455 1404
            Translator::gettext($msg),
456
            $str,
457
            $pos,
458
            $code
459
        );
460
        parent::error($error);
461
    }
462
463
    /**
464
     * Parses a keyword.
465
     *
466
     * @return Token|null
467
     */
468
    public function parseKeyword()
469
    {
470 34
        $token = '';
471
472 34
        /**
473 34
         * Value to be returned.
474 34
         *
475 34
         * @var Token
476 34
         */
477 34
        $ret = null;
478 34
479
        /**
480
         * The value of `$this->last` where `$token` ends in `$this->str`.
481
         */
482
        $iEnd = $this->last;
483
484
        /**
485
         * Whether last parsed character is a whitespace.
486 1376
         *
487
         * @var bool
488 1376
         */
489
        $lastSpace = false;
490
491
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
492
            // Composed keywords shouldn't have more than one whitespace between
493
            // keywords.
494
            if (Context::isWhitespace($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

494
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
495 1376
                if ($lastSpace) {
496
                    --$j; // The size of the keyword didn't increase.
497
                    continue;
498
                }
499
500 1376
                $lastSpace = true;
501
            } else {
502
                $lastSpace = false;
503
            }
504
505
            $token .= $this->str[$this->last];
506
            $flags = Context::isKeyword($token);
507 1376
508
            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last + 1] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

508
            if (($this->last + 1 !== $this->len && ! Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last + 1])) || ! $flags) {
Loading history...
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
509 1376
                continue;
510
            }
511
512 1376
            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
513 1350
            $iEnd = $this->last;
514 262
515 262
            // We don't break so we find longest keyword.
516
            // For example, `OR` and `ORDER` have a common prefix `OR`.
517
            // If we stopped at `OR`, the parsing would be invalid.
518 1350
        }
519
520 1376
        $this->last = $iEnd;
521
522
        return $ret;
523 1376
    }
524 1376
525
    /**
526 1376
     * Parses a label.
527 1376
     *
528
     * @return Token|null
529
     */
530 1342
    public function parseLabel()
531 1342
    {
532
        $token = '';
533
534
        /**
535
         * Value to be returned.
536
         *
537
         * @var Token
538 1376
         */
539
        $ret = null;
540 1376
541
        /**
542
         * The value of `$this->last` where `$token` ends in `$this->str`.
543
         */
544
        $iEnd = $this->last;
545
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
546
            if ($this->str[$this->last] === ':' && $j > 1) {
547
                // End of label
548 1044
                $token .= $this->str[$this->last];
549
                $ret = new Token($token, Token::TYPE_LABEL);
550 1044
                $iEnd = $this->last;
551
                break;
552
            }
553
554
            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

554
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last]) && $j > 1) {
Loading history...
555
                // Whitespace between label and :
556
                // The size of the keyword didn't increase.
557 1044
                --$j;
558
            } elseif (Context::isSeparator($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

558
            } elseif (Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
559
                // Any other separator
560
                break;
561
            }
562 1044
563 1044
            $token .= $this->str[$this->last];
564 1044
        }
565
566 4
        $this->last = $iEnd;
567 4
568 4
        return $ret;
569 4
    }
570
571
    /**
572 1044
     * Parses an operator.
573
     *
574
     * @return Token|null
575 816
     */
576 1044
    public function parseOperator()
577
    {
578 794
        $token = '';
579
580
        /**
581 1042
         * Value to be returned.
582
         *
583
         * @var Token
584 1044
         */
585
        $ret = null;
586 1044
587
        /**
588
         * The value of `$this->last` where `$token` ends in `$this->str`.
589
         */
590
        $iEnd = $this->last;
591
592
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
593
            $token .= $this->str[$this->last];
594 1394
            $flags = Context::isOperator($token);
595
596 1394
            if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
597
                continue;
598
            }
599
600
            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
601
            $iEnd = $this->last;
602
        }
603 1394
604
        $this->last = $iEnd;
605
606
        return $ret;
607
    }
608 1394
609
    /**
610 1394
     * Parses a whitespace.
611 1394
     *
612 1394
     * @return Token|null
613
     */
614 1394
    public function parseWhitespace()
615 1390
    {
616
        $token = $this->str[$this->last];
617
618 988
        if (! Context::isWhitespace($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

618
        if (! Context::isWhitespace(/** @scrutinizer ignore-type */ $token)) {
Loading history...
619 988
            return null;
620
        }
621
622 1394
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
623
            $token .= $this->str[$this->last];
624 1394
        }
625
626
        --$this->last;
627
628
        return new Token($token, Token::TYPE_WHITESPACE);
629
    }
630
631
    /**
632 1394
     * Parses a comment.
633
     *
634 1394
     * @return Token|null
635
     */
636 1394
    public function parseComment()
637 1394
    {
638
        $iBak = $this->last;
639
        $token = $this->str[$this->last];
640 1366
641 266
        // Bash style comments. (#comment\n)
642
        if (Context::isComment($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isComment() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

642
        if (Context::isComment(/** @scrutinizer ignore-type */ $token)) {
Loading history...
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
643
            while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
644 1366
                $token .= $this->str[$this->last];
645
            }
646 1366
647
            // Include trailing \n as whitespace token
648
            if ($this->last < $this->len) {
649
                --$this->last;
650
            }
651
652
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
653
        }
654 1394
655
        // C style comments. (/*comment*\/)
656 1394
        if (++$this->last < $this->len) {
657 1394
            $token .= $this->str[$this->last];
658
            if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
659
                // There might be a conflict with "*" operator here, when string is "*/*".
660 1394
                // This can occurs in the following statements:
661 6
                // - "SELECT */* comment */ FROM ..."
662 6
                // - "SELECT 2*/* comment */3 AS `six`;"
663
                $next = $this->last + 1;
664
                if (($next < $this->len) && $this->str[$next] === '*') {
665
                    // Conflict in "*/*": first "*" was not for ending a comment.
666 6
                    // Stop here and let other parsing method define the true behavior of that first star.
667 6
                    $this->last = $iBak;
668
669
                    return null;
670 6
                }
671
672
                $flags = Token::FLAG_COMMENT_C;
673
674 1394
                // This comment already ended. It may be a part of a
675 1390
                // previous MySQL specific command.
676 1390
                if ($token === '*/') {
677
                    return new Token($token, Token::TYPE_COMMENT, $flags);
678
                }
679
680
                // Checking if this is a MySQL-specific command.
681 100
                if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') {
682 100
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
683
                    $token .= $this->str[++$this->last];
684
685 2
                    while (
686
                        ++$this->last < $this->len
687 2
                        && $this->str[$this->last] >= '0'
688
                        && $this->str[$this->last] <= '9'
689
                    ) {
690 100
                        $token .= $this->str[$this->last];
691
                    }
692
693
                    --$this->last;
694 100
695 34
                    // We split this comment and parse only its beginning
696
                    // here.
697
                    return new Token($token, Token::TYPE_COMMENT, $flags);
698
                }
699 100
700 34
                // Parsing the comment.
701 34
                while (
702
                    ++$this->last < $this->len
703
                    && (
704 34
                        $this->str[$this->last - 1] !== '*'
705 34
                        || $this->str[$this->last] !== '/'
706 34
                    )
707
                ) {
708 32
                    $token .= $this->str[$this->last];
709
                }
710
711 34
                // Adding the ending.
712
                if ($this->last < $this->len) {
713
                    $token .= $this->str[$this->last];
714
                }
715 34
716
                return new Token($token, Token::TYPE_COMMENT, $flags);
717
            }
718
        }
719
720 70
        // SQL style comments. (-- comment\n)
721 70
        if (++$this->last < $this->len) {
722 70
            $token .= $this->str[$this->last];
723 70
            $end = false;
724 70
        } else {
725
            --$this->last;
726 70
            $end = true;
727
        }
728
729
        if (Context::isComment($token, $end)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Con...isComment($token, $end) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
730 70
            // Checking if this comment did not end already (```--\n```).
731 70
            if ($this->str[$this->last] !== "\n") {
732
                while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
733
                    $token .= $this->str[$this->last];
734 70
                }
735
            }
736
737
            // Include trailing \n as whitespace token
738
            if ($this->last < $this->len) {
739 1394
                --$this->last;
740 1388
            }
741 1388
742
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
743 408
        }
744 408
745
        $this->last = $iBak;
746
747 1394
        return null;
748
    }
749 64
750 64
    /**
751 64
     * Parses a boolean.
752
     *
753
     * @return Token|null
754
     */
755
    public function parseBool()
756 64
    {
757 56
        if ($this->last + 3 >= $this->len) {
758
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
759
            // required.
760 64
            return null;
761
        }
762
763 1394
        $iBak = $this->last;
764
        $token = $this->str[$this->last] . $this->str[++$this->last]
765 1394
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
766
767
        if (Context::isBool($token)) {
768
            return new Token($token, Token::TYPE_BOOL);
769
        }
770
771
        if (++$this->last < $this->len) {
772
            $token .= $this->str[$this->last]; // fals_E_
773 1378
            if (Context::isBool($token)) {
774
                return new Token($token, Token::TYPE_BOOL, 1);
775 1378
            }
776
        }
777
778 306
        $this->last = $iBak;
779
780
        return null;
781 1378
    }
782 1378
783 1378
    /**
784
     * Parses a number.
785 1378
     *
786 4
     * @return Token|null
787
     */
788
    public function parseNumber()
789 1378
    {
790 1376
        // A rudimentary state machine is being used to parse numbers due to
791 1376
        // the various forms of their notation.
792 6
        //
793
        // Below are the states of the machines and the conditions to change
794
        // the state.
795
        //
796 1378
        //      1 --------------------[ + or - ]-------------------> 1
797
        //      1 -------------------[ 0x or 0X ]------------------> 2
798 1378
        //      1 --------------------[ 0 to 9 ]-------------------> 3
799
        //      1 -----------------------[ . ]---------------------> 4
800
        //      1 -----------------------[ b ]---------------------> 7
801
        //
802
        //      2 --------------------[ 0 to F ]-------------------> 2
803
        //
804
        //      3 --------------------[ 0 to 9 ]-------------------> 3
805
        //      3 -----------------------[ . ]---------------------> 4
806 1394
        //      3 --------------------[ e or E ]-------------------> 5
807
        //
808
        //      4 --------------------[ 0 to 9 ]-------------------> 4
809
        //      4 --------------------[ e or E ]-------------------> 5
810
        //
811
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
812
        //
813
        //      7 -----------------------[ ' ]---------------------> 8
814
        //
815
        //      8 --------------------[ 0 or 1 ]-------------------> 8
816
        //      8 -----------------------[ ' ]---------------------> 9
817
        //
818
        // State 1 may be reached by negative numbers.
819
        // State 2 is reached only by hex numbers.
820
        // State 4 is reached only by float numbers.
821
        // State 5 is reached only by numbers in approximate form.
822
        // State 7 is reached only by numbers in bit representation.
823
        //
824
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
825
        // state other than these is invalid.
826
        // Also, negative states are invalid states.
827
        $iBak = $this->last;
828
        $token = '';
829
        $flags = 0;
830
        $state = 1;
831
        for (; $this->last < $this->len; ++$this->last) {
832
            if ($state === 1) {
833
                if ($this->str[$this->last] === '-') {
834
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
835
                } elseif (
836
                    $this->last + 1 < $this->len
837
                    && $this->str[$this->last] === '0'
838
                    && (
839
                        $this->str[$this->last + 1] === 'x'
840
                        || $this->str[$this->last + 1] === 'X'
841
                    )
842
                ) {
843
                    $token .= $this->str[$this->last++];
844
                    $state = 2;
845 1394
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
846 1394
                    $state = 3;
847 1394
                } elseif ($this->str[$this->last] === '.') {
848 1394
                    $state = 4;
849 1394
                } elseif ($this->str[$this->last] === 'b') {
850 1394
                    $state = 7;
851 1394
                } elseif ($this->str[$this->last] !== '+') {
852 64
                    // `+` is a valid character in a number.
853
                    break;
854 1394
                }
855 1394
            } elseif ($state === 2) {
856
                $flags |= Token::FLAG_NUMBER_HEX;
857 1394
                if (
858 1394
                    ! (
859
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
860
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
861 4
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
862 4
                    )
863 1394
                ) {
864 622
                    break;
865 1394
                }
866 220
            } elseif ($state === 3) {
867 1394
                if ($this->str[$this->last] === '.') {
868 110
                    $state = 4;
869 1394
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
870
                    $state = 5;
871 1394
                } elseif (
872
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
873 722
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
874 4
                ) {
875
                    // A number can't be directly followed by a letter
876
                    $state = -$state;
877 4
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
878 4
                    // Just digits and `.`, `e` and `E` are valid characters.
879 4
                    break;
880
                }
881
            } elseif ($state === 4) {
882 4
                $flags |= Token::FLAG_NUMBER_FLOAT;
883
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
884 722
                    $state = 5;
885 562
                } elseif (
886 12
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
887 560
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
888 2
                ) {
889
                    // A number can't be directly followed by a letter
890 560
                    $state = -$state;
891 560
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
892
                    // Just digits, `e` and `E` are valid characters.
893
                    break;
894 6
                }
895 558
            } elseif ($state === 5) {
896
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
897 562
                if (
898
                    $this->str[$this->last] === '+' || $this->str[$this->last] === '-'
899 314
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
900 230
                ) {
901 230
                    $state = 6;
902 14
                } elseif (
903
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
904 230
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
905 230
                ) {
906
                    // A number can't be directly followed by a letter
907
                    $state = -$state;
908 172
                } else {
909 90
                    break;
910
                }
911 230
            } elseif ($state === 6) {
912
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
913 264
                    // Just digits are valid characters.
914 14
                    break;
915
                }
916 14
            } elseif ($state === 7) {
917 14
                $flags |= Token::FLAG_NUMBER_BINARY;
918
                if ($this->str[$this->last] !== '\'') {
919 2
                    break;
920
                }
921 14
922 14
                $state = 8;
923
            } elseif ($state === 8) {
924
                if ($this->str[$this->last] === '\'') {
925 14
                    $state = 9;
926
                } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
927 14
                    break;
928
                }
929 264
            } elseif ($state === 9) {
930 2
                break;
931
            }
932 2
933
            $token .= $this->str[$this->last];
934 264
        }
935 106
936 106
        if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) {
937 104
            --$this->last;
938
939
            return new Token($token, Token::TYPE_NUMBER, $flags);
940 2
        }
941 178
942 2
        $this->last = $iBak;
943 2
944 2
        return null;
945 2
    }
946
947 178
    /**
948 2
     * Parses a string.
949
     *
950
     * @param string $quote additional starting symbol
951 800
     *
952
     * @return Token|null
953
     *
954 1394
     * @throws LexerException
955 622
     */
956
    public function parseString($quote = '')
957 622
    {
958
        $token = $this->str[$this->last];
959
        $flags = Context::isString($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isString() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

959
        $flags = Context::isString(/** @scrutinizer ignore-type */ $token);
Loading history...
960 1394
961
        if (! $flags && $token !== $quote) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
962 1394
            return null;
963
        }
964
965
        $quote = $token;
966
967
        while (++$this->last < $this->len) {
968
            if (
969
                $this->last + 1 < $this->len
970
                && (
971
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
972
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
973
                )
974 1378
            ) {
975
                $token .= $this->str[$this->last] . $this->str[++$this->last];
976 1378
            } else {
977 1378
                if ($this->str[$this->last] === $quote) {
978
                    break;
979 1378
                }
980 1378
981
                $token .= $this->str[$this->last];
982
            }
983 674
        }
984
985 674
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
986
            $this->error(
987 674
                sprintf(
988
                    Translator::gettext('Ending quote %1$s was expected.'),
989 674
                    $quote
990 674
                ),
991
                '',
992
                $this->last
993 30
            );
994
        } else {
995 674
            $token .= $this->str[$this->last];
996 670
        }
997
998
        return new Token($token, Token::TYPE_STRING, $flags);
999 668
    }
1000
1001
    /**
1002
     * Parses a symbol.
1003 674
     *
1004 14
     * @return Token|null
1005 14
     *
1006 14
     * @throws LexerException
1007 14
     */
1008 14
    public function parseSymbol()
1009 14
    {
1010 14
        $token = $this->str[$this->last];
1011 14
        $flags = Context::isSymbol($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSymbol() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1011
        $flags = Context::isSymbol(/** @scrutinizer ignore-type */ $token);
Loading history...
1012
1013 670
        if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1014
            return null;
1015
        }
1016 674
1017
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
1018
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
1019
                // This is a system variable (e.g. `@@hostname`).
1020
                $token .= $this->str[$this->last++];
1021
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
1022
            }
1023
        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
1024
            if ($token !== '?' && $this->last + 1 < $this->len) {
1025
                ++$this->last;
1026 1378
            }
1027
        } else {
1028 1378
            $token = '';
1029 1378
        }
1030
1031 1378
        $str = null;
1032 1376
1033
        if ($this->last < $this->len) {
1034
            $str = $this->parseString('`');
1035 440
1036 120
            if ($str === null) {
1037
                $str = $this->parseUnknown();
1038 26
1039 120
                if ($str === null) {
1040
                    $this->error('Variable name was expected.', $this->str[$this->last], $this->last);
1041 352
                }
1042 6
            }
1043 6
        }
1044
1045
        if ($str !== null) {
1046 348
            $token .= $str->token;
1047
        }
1048
1049 440
        return new Token($token, Token::TYPE_SYMBOL, $flags);
1050
    }
1051 440
1052 440
    /**
1053
     * Parses unknown parts of the query.
1054 440
     *
1055 86
     * @return Token|null
1056
     */
1057 86
    public function parseUnknown()
1058 6
    {
1059
        $token = $this->str[$this->last];
1060
        if (Context::isSeparator($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1060
        if (Context::isSeparator(/** @scrutinizer ignore-type */ $token)) {
Loading history...
1061
            return null;
1062
        }
1063 440
1064 436
        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1065
            $token .= $this->str[$this->last];
1066
1067 440
            // Test if end of token equals the current delimiter. If so, remove it from the token.
1068
            if (str_ends_with($token, $this->delimiter)) {
1069
                $token = substr($token, 0, -$this->delimiterLen);
1070
                $this->last -= $this->delimiterLen - 1;
1071
                break;
1072
            }
1073
        }
1074
1075 1066
        --$this->last;
1076
1077 1066
        return new Token($token);
1078 1066
    }
1079 10
1080
    /**
1081
     * Parses the delimiter of the query.
1082 1064
     *
1083 1032
     * @return Token|null
1084
     */
1085
    public function parseDelimiter()
1086 1032
    {
1087 4
        $idx = 0;
1088 4
1089 4
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1090
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1091
                return null;
1092
            }
1093 1064
1094
            ++$idx;
1095 1064
        }
1096
1097
        $this->last += $this->delimiterLen - 1;
1098
1099
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1100
    }
1101
}
1102