Passed
Push — master ( d42b40...87af0c )
by Maurício
06:31 queued 03:28
created

Lexer::parseBool()   A

Complexity

Conditions 5
Paths 5

Size

Total Lines 26
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 5

Importance

Changes 0
Metric Value
cc 5
eloc 13
nc 5
nop 0
dl 0
loc 26
ccs 14
cts 14
cp 1
crap 5
rs 9.5222
c 0
b 0
f 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace PhpMyAdmin\SqlParser;
6
7
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
8
9
use function in_array;
10
use function mb_strlen;
11
use function sprintf;
12
use function str_ends_with;
13
use function strlen;
14
use function substr;
15
16
/**
17
 * Defines the lexer of the library.
18
 *
19
 * This is one of the most important components, along with the parser.
20
 *
21
 * Depends on context to extract lexemes.
22
 *
23
 * Performs lexical analysis over a SQL statement and splits it in multiple tokens.
24
 *
25
 * The output of the lexer is affected by the context of the SQL statement.
26
 *
27
 * @see Context
28
 */
29
class Lexer extends Core
30
{
31
    /**
32
     * A list of methods that are used in lexing the SQL query.
33
     *
34
     * @var string[]
35
     */
36
    public static $parserMethods = [
37
        // It is best to put the parsers in order of their complexity
38
        // (ascending) and their occurrence rate (descending).
39
        //
40
        // Conflicts:
41
        //
42
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
43
        // They fight over delimiter. The delimiter may be a keyword, a
44
        // number or almost any character which makes the delimiter one of
45
        // the first tokens that must be parsed.
46
        //
47
        // 1. `parseNumber` and `parseOperator`
48
        // They fight over `+` and `-`.
49
        //
50
        // 2. `parseComment` and `parseOperator`
51
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
52
        //
53
        // 3. `parseBool` and `parseKeyword`
54
        // They fight over `TRUE` and `FALSE`.
55
        //
56
        // 4. `parseKeyword` and `parseUnknown`
57
        // They fight over words. `parseUnknown` does not know about
58
        // keywords.
59
60
        'parseDelimiter',
61
        'parseWhitespace',
62
        'parseNumber',
63
        'parseComment',
64
        'parseOperator',
65
        'parseBool',
66
        'parseString',
67
        'parseSymbol',
68
        'parseKeyword',
69
        'parseLabel',
70
        'parseUnknown',
71
    ];
72
73
74
    /**
75
     * A list of keywords that indicate that the function keyword
76
     * is not used as a function
77
     *
78
     * @var string[]
79
     */
80
    public $keywordNameIndicators = [
81
        'FROM',
82
        'SET',
83
        'WHERE',
84
    ];
85
86
    /**
87
     * A list of operators that indicate that the function keyword
88
     * is not used as a function
89
     *
90
     * @var string[]
91
     */
92
    public $operatorNameIndicators = [
93
        ',',
94
        '.',
95
    ];
96
97
    /**
98
     * The string to be parsed.
99
     *
100
     * @var string|UtfString
101
     */
102
    public $str = '';
103
104
    /**
105
     * The length of `$str`.
106
     *
107
     * By storing its length, a lot of time is saved, because parsing methods
108
     * would call `strlen` everytime.
109
     *
110
     * @var int
111
     */
112
    public $len = 0;
113
114
    /**
115
     * The index of the last parsed character.
116
     *
117
     * @var int
118
     */
119
    public $last = 0;
120
121
    /**
122
     * Tokens extracted from given strings.
123
     *
124
     * @var TokensList
125
     */
126
    public $list;
127
128
    /**
129
     * The default delimiter. This is used, by default, in all new instances.
130
     *
131
     * @var string
132
     */
133
    public static $defaultDelimiter = ';';
134
135
    /**
136
     * Statements delimiter.
137
     * This may change during lexing.
138
     *
139
     * @var string
140
     */
141
    public $delimiter;
142
143
    /**
144
     * The length of the delimiter.
145
     *
146
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
147
     * calls to `strlen`, which might affect performance when the delimiter is
148
     * big.
149
     *
150
     * @var int
151
     */
152
    public $delimiterLen;
153
154
    /**
155
     * Gets the tokens list parsed by a new instance of a lexer.
156
     *
157
     * @param string|UtfString $str       the query to be lexed
158
     * @param bool             $strict    whether strict mode should be
159
     *                                    enabled or not
160
     * @param string           $delimiter the delimiter to be used
161
     *
162
     * @return TokensList
163
     */
164 2
    public static function getTokens($str, $strict = false, $delimiter = null)
165
    {
166 2
        $lexer = new self($str, $strict, $delimiter);
167
168 2
        return $lexer->list;
169
    }
170
171
    /**
172
     * @param string|UtfString $str       the query to be lexed
173
     * @param bool             $strict    whether strict mode should be
174
     *                                    enabled or not
175
     * @param string           $delimiter the delimiter to be used
176
     */
177 1406
    public function __construct($str, $strict = false, $delimiter = null)
178
    {
179 1406
        parent::__construct();
180
181
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
182
        // parse each byte of the input.
183 1406
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
184
185
        // For multi-byte strings, a new instance of `UtfString` is initialized.
186 1406
        if (! $str instanceof UtfString && $len !== mb_strlen($str, 'UTF-8')) {
187 10
            $str = new UtfString($str);
188
        }
189
190 1406
        $this->str = $str;
191 1406
        $this->len = $str instanceof UtfString ? $str->length() : $len;
192
193 1406
        $this->strict = $strict;
194
195
        // Setting the delimiter.
196 1406
        $this->setDelimiter(! empty($delimiter) ? $delimiter : static::$defaultDelimiter);
197
198 1406
        $this->lex();
199
    }
200
201
    /**
202
     * Sets the delimiter.
203
     *
204
     * @param string $delimiter the new delimiter
205
     */
206 1406
    public function setDelimiter($delimiter): void
207
    {
208 1406
        $this->delimiter = $delimiter;
209 1406
        $this->delimiterLen = strlen($delimiter);
210
    }
211
212
    /**
213
     * Parses the string and extracts lexemes.
214
     */
215 1406
    public function lex(): void
216
    {
217
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
218
        // is* functions. For a better performance, some rules can be deduced
219
        // from context.
220
        // For example, in `parseBool` there is no need to compare the token
221
        // every time with `true` and `false`. The first step would be to
222
        // compare with 'true' only and just after that add another letter from
223
        // context and compare again with `false`.
224
        // Another example is `parseComment`.
225
226 1406
        $list = new TokensList();
227
228
        /**
229
         * Last processed token.
230
         *
231
         * @var Token
232
         */
233 1406
        $lastToken = null;
234
235 1406
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
236
            /**
237
             * The new token.
238
             *
239
             * @var Token
240
             */
241 1396
            $token = null;
242
243 1396
            foreach (static::$parserMethods as $method) {
244 1396
                $token = $this->$method();
245
246 1396
                if ($token) {
247 1396
                    break;
248
                }
249
            }
250
251 1396
            if ($token === null) {
252
                // @assert($this->last === $lastIdx);
253 4
                $token = new Token($this->str[$this->last]);
254 4
                $this->error('Unexpected character.', $this->str[$this->last], $this->last);
255
            } elseif (
256 1396
                $lastToken !== null
257 1396
                && $token->type === Token::TYPE_SYMBOL
258 1396
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
259
                && (
260 1396
                    $lastToken->type === Token::TYPE_STRING
261 1396
                    || (
262 1396
                        $lastToken->type === Token::TYPE_SYMBOL
263 1396
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
264 1396
                    )
265
                )
266
            ) {
267
                // Handles ```... FROM 'user'@'%' ...```.
268 46
                $lastToken->token .= $token->token;
269 46
                $lastToken->type = Token::TYPE_SYMBOL;
270 46
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
271 46
                $lastToken->value .= '@' . $token->value;
272 46
                continue;
273
            } elseif (
274 1396
                $lastToken !== null
275 1396
                && $token->type === Token::TYPE_KEYWORD
276 1396
                && $lastToken->type === Token::TYPE_OPERATOR
277 1396
                && $lastToken->value === '.'
278
            ) {
279
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
280
                // a reserved word.
281 30
                $token->type = Token::TYPE_NONE;
282 30
                $token->flags = 0;
283 30
                $token->value = $token->token;
284
            }
285
286 1396
            $token->position = $lastIdx;
287
288 1396
            $list->tokens[$list->count++] = $token;
289
290
            // Handling delimiters.
291 1396
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
292 36
                if ($this->last + 1 >= $this->len) {
293 2
                    $this->error('Expected whitespace(s) before delimiter.', '', $this->last + 1);
294 2
                    continue;
295
                }
296
297
                // Skipping last R (from `delimiteR`) and whitespaces between
298
                // the keyword `DELIMITER` and the actual delimiter.
299 34
                $pos = ++$this->last;
300 34
                $token = $this->parseWhitespace();
301
302 34
                if ($token !== null) {
303 32
                    $token->position = $pos;
304 32
                    $list->tokens[$list->count++] = $token;
305
                }
306
307
                // Preparing the token that holds the new delimiter.
308 34
                if ($this->last + 1 >= $this->len) {
309 2
                    $this->error('Expected delimiter.', '', $this->last + 1);
310 2
                    continue;
311
                }
312
313 32
                $pos = $this->last + 1;
314
315
                // Parsing the delimiter.
316 32
                $this->delimiter = null;
317 32
                $delimiterLen = 0;
318
                while (
319 32
                    ++$this->last < $this->len
320 32
                    && ! Context::isWhitespace($this->str[$this->last])
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

320
                    && ! Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])
Loading history...
321 32
                    && $delimiterLen < 15
322
                ) {
323 30
                    $this->delimiter .= $this->str[$this->last];
324 30
                    ++$delimiterLen;
325
                }
326
327 32
                if (empty($this->delimiter)) {
328 2
                    $this->error('Expected delimiter.', '', $this->last);
329 2
                    $this->delimiter = ';';
330
                }
331
332 32
                --$this->last;
333
334
                // Saving the delimiter and its token.
335 32
                $this->delimiterLen = strlen($this->delimiter);
336 32
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
337 32
                $token->position = $pos;
338 32
                $list->tokens[$list->count++] = $token;
339
            }
340
341 1392
            $lastToken = $token;
342
        }
343
344
        // Adding a final delimiter to mark the ending.
345 1406
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
346
347
        // Saving the tokens list.
348 1406
        $this->list = $list;
349
350 1406
        $this->solveAmbiguityOnStarOperator();
351 1406
        $this->solveAmbiguityOnFunctionKeywords();
352
    }
353
354
    /**
355
     * Resolves the ambiguity when dealing with the "*" operator.
356
     *
357
     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
358
     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
359
     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
360
     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
361
     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
362
     * - "," (a comma separator like in "SELECT *, field FROM...");
363
     * - ")" (a closing parenthesis like in "COUNT(*)").
364
     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
365
     * default flag (arithmetic) will be kept.
366
     */
367 1406
    private function solveAmbiguityOnStarOperator(): void
368
    {
369 1406
        $iBak = $this->list->idx;
370 1406
        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
371
            // getNext() already gets rid of whitespaces and comments.
372 198
            $next = $this->list->getNext();
373
374 198
            if ($next === null) {
375
                continue;
376
            }
377
378
            if (
379 198
                ($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
380 198
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
381
            ) {
382 16
                continue;
383
            }
384
385 184
            $starToken->flags = Token::FLAG_OPERATOR_SQL;
386
        }
387
388 1406
        $this->list->idx = $iBak;
389
    }
390
391
    /**
392
     * Resolves the ambiguity when dealing with the functions keywords.
393
     *
394
     * In SQL statements, the function keywords might be used as table names or columns names.
395
     * To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
396
     * comments, right after the function keyword position. The function keyword is for sure used
397
     * as column name or table name if the next token found is any of:
398
     *
399
     * - "FROM" (the FROM keyword like in "SELECT Country x, AverageSalary avg FROM...");
400
     * - "WHERE" (the WHERE keyword like in "DELETE FROM emp x WHERE x.salary = 20");
401
     * - "SET" (the SET keyword like in "UPDATE Country x, City y set x.Name=x.Name");
402
     * - "," (a comma separator like 'x,' in "UPDATE Country x, City y set x.Name=x.Name");
403
     * - "." (a dot separator like in "x.asset_id FROM (SELECT evt.asset_id FROM evt)".
404
     * - "NULL" (when used as a table alias like in "avg.col FROM (SELECT ev.col FROM ev) avg").
405
     *
406
     * This method will change the flag of the function keyword tokens when any of those
407
     * condition above is true. Otherwise, the
408
     * default flag (function keyword) will be kept.
409
     */
410 1406
    private function solveAmbiguityOnFunctionKeywords(): void
411
    {
412 1406
        $iBak = $this->list->idx;
413 1406
        $keywordFunction = Token::TYPE_KEYWORD | Token::FLAG_KEYWORD_FUNCTION;
414 1406
        while (($keywordToken = $this->list->getNextOfTypeAndFlag(Token::TYPE_KEYWORD, $keywordFunction)) !== null) {
415 212
            $next = $this->list->getNext();
416
            if (
417 212
                ($next->type !== Token::TYPE_KEYWORD
418 212
                    || ! in_array($next->value, $this->keywordNameIndicators, true)
419
                )
420 212
                && ($next->type !== Token::TYPE_OPERATOR
421 212
                    || ! in_array($next->value, $this->operatorNameIndicators, true)
422
                )
423 212
                && ($next->value !== null)
424
            ) {
425 202
                continue;
426
            }
427
428 12
            $keywordToken->type = Token::TYPE_NONE;
429 12
            $keywordToken->flags = Token::TYPE_NONE;
430 12
            $keywordToken->keyword = $keywordToken->value;
431
        }
432
433 1406
        $this->list->idx = $iBak;
434
    }
435
436
    /**
437
     * Creates a new error log.
438
     *
439
     * @param string $msg  the error message
440
     * @param string $str  the character that produced the error
441
     * @param int    $pos  the position of the character
442
     * @param int    $code the code of the error
443
     *
444
     * @throws LexerException throws the exception, if strict mode is enabled.
445
     */
446 34
    public function error($msg, $str = '', $pos = 0, $code = 0): void
447
    {
448 34
        $error = new LexerException(
449 34
            Translator::gettext($msg),
450 34
            $str,
451 34
            $pos,
452 34
            $code
453 34
        );
454 34
        parent::error($error);
455
    }
456
457
    /**
458
     * Parses a keyword.
459
     *
460
     * @return Token|null
461
     */
462 1378
    public function parseKeyword()
463
    {
464 1378
        $token = '';
465
466
        /**
467
         * Value to be returned.
468
         *
469
         * @var Token
470
         */
471 1378
        $ret = null;
472
473
        /**
474
         * The value of `$this->last` where `$token` ends in `$this->str`.
475
         */
476 1378
        $iEnd = $this->last;
477
478
        /**
479
         * Whether last parsed character is a whitespace.
480
         *
481
         * @var bool
482
         */
483 1378
        $lastSpace = false;
484
485 1378
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
486
            // Composed keywords shouldn't have more than one whitespace between
487
            // keywords.
488 1378
            if (Context::isWhitespace($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

488
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
489 1352
                if ($lastSpace) {
490 262
                    --$j; // The size of the keyword didn't increase.
491 262
                    continue;
492
                }
493
494 1352
                $lastSpace = true;
495
            } else {
496 1378
                $lastSpace = false;
497
            }
498
499 1378
            $token .= $this->str[$this->last];
500 1378
            $flags = Context::isKeyword($token);
501
502 1378
            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
Bug introduced by
It seems like $this->str[$this->last + 1] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

502
            if (($this->last + 1 !== $this->len && ! Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last + 1])) || ! $flags) {
Loading history...
503 1378
                continue;
504
            }
505
506 1344
            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
507 1344
            $iEnd = $this->last;
508
509
            // We don't break so we find longest keyword.
510
            // For example, `OR` and `ORDER` have a common prefix `OR`.
511
            // If we stopped at `OR`, the parsing would be invalid.
512
        }
513
514 1378
        $this->last = $iEnd;
515
516 1378
        return $ret;
517
    }
518
519
    /**
520
     * Parses a label.
521
     *
522
     * @return Token|null
523
     */
524 1046
    public function parseLabel()
525
    {
526 1046
        $token = '';
527
528
        /**
529
         * Value to be returned.
530
         *
531
         * @var Token
532
         */
533 1046
        $ret = null;
534
535
        /**
536
         * The value of `$this->last` where `$token` ends in `$this->str`.
537
         */
538 1046
        $iEnd = $this->last;
539 1046
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
540 1046
            if ($this->str[$this->last] === ':' && $j > 1) {
541
                // End of label
542 4
                $token .= $this->str[$this->last];
543 4
                $ret = new Token($token, Token::TYPE_LABEL);
544 4
                $iEnd = $this->last;
545 4
                break;
546
            }
547
548 1046
            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

548
            if (Context::isWhitespace(/** @scrutinizer ignore-type */ $this->str[$this->last]) && $j > 1) {
Loading history...
549
                // Whitespace between label and :
550
                // The size of the keyword didn't increase.
551 816
                --$j;
552 1046
            } elseif (Context::isSeparator($this->str[$this->last])) {
0 ignored issues
show
Bug introduced by
It seems like $this->str[$this->last] can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

552
            } elseif (Context::isSeparator(/** @scrutinizer ignore-type */ $this->str[$this->last])) {
Loading history...
553
                // Any other separator
554 796
                break;
555
            }
556
557 1044
            $token .= $this->str[$this->last];
558
        }
559
560 1046
        $this->last = $iEnd;
561
562 1046
        return $ret;
563
    }
564
565
    /**
566
     * Parses an operator.
567
     *
568
     * @return Token|null
569
     */
570 1396
    public function parseOperator()
571
    {
572 1396
        $token = '';
573
574
        /**
575
         * Value to be returned.
576
         *
577
         * @var Token
578
         */
579 1396
        $ret = null;
580
581
        /**
582
         * The value of `$this->last` where `$token` ends in `$this->str`.
583
         */
584 1396
        $iEnd = $this->last;
585
586 1396
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
587 1396
            $token .= $this->str[$this->last];
588 1396
            $flags = Context::isOperator($token);
589
590 1396
            if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
591 1392
                continue;
592
            }
593
594 990
            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
595 990
            $iEnd = $this->last;
596
        }
597
598 1396
        $this->last = $iEnd;
599
600 1396
        return $ret;
601
    }
602
603
    /**
604
     * Parses a whitespace.
605
     *
606
     * @return Token|null
607
     */
608 1396
    public function parseWhitespace()
609
    {
610 1396
        $token = $this->str[$this->last];
611
612 1396
        if (! Context::isWhitespace($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isWhitespace() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

612
        if (! Context::isWhitespace(/** @scrutinizer ignore-type */ $token)) {
Loading history...
613 1396
            return null;
614
        }
615
616 1368
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
617 266
            $token .= $this->str[$this->last];
618
        }
619
620 1368
        --$this->last;
621
622 1368
        return new Token($token, Token::TYPE_WHITESPACE);
623
    }
624
625
    /**
626
     * Parses a comment.
627
     *
628
     * @return Token|null
629
     */
630 1396
    public function parseComment()
631
    {
632 1396
        $iBak = $this->last;
633 1396
        $token = $this->str[$this->last];
634
635
        // Bash style comments. (#comment\n)
636 1396
        if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isComment() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

636
        if (Context::isComment(/** @scrutinizer ignore-type */ $token)) {
Loading history...
637 6
            while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
638 6
                $token .= $this->str[$this->last];
639
            }
640
641
            // Include trailing \n as whitespace token
642 6
            if ($this->last < $this->len) {
643 6
                --$this->last;
644
            }
645
646 6
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
647
        }
648
649
        // C style comments. (/*comment*\/)
650 1396
        if (++$this->last < $this->len) {
651 1392
            $token .= $this->str[$this->last];
652 1392
            if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
653
                // There might be a conflict with "*" operator here, when string is "*/*".
654
                // This can occurs in the following statements:
655
                // - "SELECT */* comment */ FROM ..."
656
                // - "SELECT 2*/* comment */3 AS `six`;"
657 100
                $next = $this->last + 1;
658 100
                if (($next < $this->len) && $this->str[$next] === '*') {
659
                    // Conflict in "*/*": first "*" was not for ending a comment.
660
                    // Stop here and let other parsing method define the true behavior of that first star.
661 2
                    $this->last = $iBak;
662
663 2
                    return null;
664
                }
665
666 100
                $flags = Token::FLAG_COMMENT_C;
667
668
                // This comment already ended. It may be a part of a
669
                // previous MySQL specific command.
670 100
                if ($token === '*/') {
671 34
                    return new Token($token, Token::TYPE_COMMENT, $flags);
672
                }
673
674
                // Checking if this is a MySQL-specific command.
675 100
                if ($this->last + 1 < $this->len && $this->str[$this->last + 1] === '!') {
676 34
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
677 34
                    $token .= $this->str[++$this->last];
678
679
                    while (
680 34
                        ++$this->last < $this->len
681 34
                        && $this->str[$this->last] >= '0'
682 34
                        && $this->str[$this->last] <= '9'
683
                    ) {
684 32
                        $token .= $this->str[$this->last];
685
                    }
686
687 34
                    --$this->last;
688
689
                    // We split this comment and parse only its beginning
690
                    // here.
691 34
                    return new Token($token, Token::TYPE_COMMENT, $flags);
692
                }
693
694
                // Parsing the comment.
695
                while (
696 70
                    ++$this->last < $this->len
697 70
                    && (
698 70
                        $this->str[$this->last - 1] !== '*'
699 70
                        || $this->str[$this->last] !== '/'
700 70
                    )
701
                ) {
702 70
                    $token .= $this->str[$this->last];
703
                }
704
705
                // Adding the ending.
706 70
                if ($this->last < $this->len) {
707 70
                    $token .= $this->str[$this->last];
708
                }
709
710 70
                return new Token($token, Token::TYPE_COMMENT, $flags);
711
            }
712
        }
713
714
        // SQL style comments. (-- comment\n)
715 1396
        if (++$this->last < $this->len) {
716 1390
            $token .= $this->str[$this->last];
717 1390
            $end = false;
718
        } else {
719 410
            --$this->last;
720 410
            $end = true;
721
        }
722
723 1396
        if (Context::isComment($token, $end)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Con...isComment($token, $end) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
724
            // Checking if this comment did not end already (```--\n```).
725 64
            if ($this->str[$this->last] !== "\n") {
726 64
                while (++$this->last < $this->len && $this->str[$this->last] !== "\n") {
727 64
                    $token .= $this->str[$this->last];
728
                }
729
            }
730
731
            // Include trailing \n as whitespace token
732 64
            if ($this->last < $this->len) {
733 56
                --$this->last;
734
            }
735
736 64
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
737
        }
738
739 1396
        $this->last = $iBak;
740
741 1396
        return null;
742
    }
743
744
    /**
745
     * Parses a boolean.
746
     *
747
     * @return Token|null
748
     */
749 1380
    public function parseBool()
750
    {
751 1380
        if ($this->last + 3 >= $this->len) {
752
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
753
            // required.
754 306
            return null;
755
        }
756
757 1380
        $iBak = $this->last;
758 1380
        $token = $this->str[$this->last] . $this->str[++$this->last]
759 1380
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
760
761 1380
        if (Context::isBool($token)) {
762 4
            return new Token($token, Token::TYPE_BOOL);
763
        }
764
765 1380
        if (++$this->last < $this->len) {
766 1378
            $token .= $this->str[$this->last]; // fals_E_
767 1378
            if (Context::isBool($token)) {
768 6
                return new Token($token, Token::TYPE_BOOL, 1);
769
            }
770
        }
771
772 1380
        $this->last = $iBak;
773
774 1380
        return null;
775
    }
776
777
    /**
778
     * Parses a number.
779
     *
780
     * @return Token|null
781
     */
782 1396
    public function parseNumber()
783
    {
784
        // A rudimentary state machine is being used to parse numbers due to
785
        // the various forms of their notation.
786
        //
787
        // Below are the states of the machines and the conditions to change
788
        // the state.
789
        //
790
        //      1 --------------------[ + or - ]-------------------> 1
791
        //      1 -------------------[ 0x or 0X ]------------------> 2
792
        //      1 --------------------[ 0 to 9 ]-------------------> 3
793
        //      1 -----------------------[ . ]---------------------> 4
794
        //      1 -----------------------[ b ]---------------------> 7
795
        //
796
        //      2 --------------------[ 0 to F ]-------------------> 2
797
        //
798
        //      3 --------------------[ 0 to 9 ]-------------------> 3
799
        //      3 -----------------------[ . ]---------------------> 4
800
        //      3 --------------------[ e or E ]-------------------> 5
801
        //
802
        //      4 --------------------[ 0 to 9 ]-------------------> 4
803
        //      4 --------------------[ e or E ]-------------------> 5
804
        //
805
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
806
        //
807
        //      7 -----------------------[ ' ]---------------------> 8
808
        //
809
        //      8 --------------------[ 0 or 1 ]-------------------> 8
810
        //      8 -----------------------[ ' ]---------------------> 9
811
        //
812
        // State 1 may be reached by negative numbers.
813
        // State 2 is reached only by hex numbers.
814
        // State 4 is reached only by float numbers.
815
        // State 5 is reached only by numbers in approximate form.
816
        // State 7 is reached only by numbers in bit representation.
817
        //
818
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
819
        // state other than these is invalid.
820
        // Also, negative states are invalid states.
821 1396
        $iBak = $this->last;
822 1396
        $token = '';
823 1396
        $flags = 0;
824 1396
        $state = 1;
825 1396
        for (; $this->last < $this->len; ++$this->last) {
826 1396
            if ($state === 1) {
827 1396
                if ($this->str[$this->last] === '-') {
828 64
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
829
                } elseif (
830 1396
                    $this->last + 1 < $this->len
831 1396
                    && $this->str[$this->last] === '0'
832
                    && (
833 1396
                        $this->str[$this->last + 1] === 'x'
834 1396
                        || $this->str[$this->last + 1] === 'X'
835
                    )
836
                ) {
837 4
                    $token .= $this->str[$this->last++];
838 4
                    $state = 2;
839 1396
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
840 622
                    $state = 3;
841 1396
                } elseif ($this->str[$this->last] === '.') {
842 220
                    $state = 4;
843 1396
                } elseif ($this->str[$this->last] === 'b') {
844 110
                    $state = 7;
845 1396
                } elseif ($this->str[$this->last] !== '+') {
846
                    // `+` is a valid character in a number.
847 1396
                    break;
848
                }
849 722
            } elseif ($state === 2) {
850 4
                $flags |= Token::FLAG_NUMBER_HEX;
851
                if (
852
                    ! (
853 4
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
854 4
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
855 4
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
856
                    )
857
                ) {
858 4
                    break;
859
                }
860 722
            } elseif ($state === 3) {
861 562
                if ($this->str[$this->last] === '.') {
862 12
                    $state = 4;
863 560
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
864 2
                    $state = 5;
865
                } elseif (
866 560
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
867 560
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
868
                ) {
869
                    // A number can't be directly followed by a letter
870 6
                    $state = -$state;
871 558
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
872
                    // Just digits and `.`, `e` and `E` are valid characters.
873 562
                    break;
874
                }
875 314
            } elseif ($state === 4) {
876 230
                $flags |= Token::FLAG_NUMBER_FLOAT;
877 230
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
878 14
                    $state = 5;
879
                } elseif (
880 230
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
881 230
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
882
                ) {
883
                    // A number can't be directly followed by a letter
884 172
                    $state = -$state;
885 90
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
886
                    // Just digits, `e` and `E` are valid characters.
887 230
                    break;
888
                }
889 264
            } elseif ($state === 5) {
890 14
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
891
                if (
892 14
                    $this->str[$this->last] === '+' || $this->str[$this->last] === '-'
893 14
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
894
                ) {
895 2
                    $state = 6;
896
                } elseif (
897 14
                    ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
898 14
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')
899
                ) {
900
                    // A number can't be directly followed by a letter
901 14
                    $state = -$state;
902
                } else {
903 14
                    break;
904
                }
905 264
            } elseif ($state === 6) {
906 2
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
907
                    // Just digits are valid characters.
908 2
                    break;
909
                }
910 264
            } elseif ($state === 7) {
911 106
                $flags |= Token::FLAG_NUMBER_BINARY;
912 106
                if ($this->str[$this->last] !== '\'') {
913 104
                    break;
914
                }
915
916 2
                $state = 8;
917 178
            } elseif ($state === 8) {
918 2
                if ($this->str[$this->last] === '\'') {
919 2
                    $state = 9;
920 2
                } elseif ($this->str[$this->last] !== '0' && $this->str[$this->last] !== '1') {
921 2
                    break;
922
                }
923 178
            } elseif ($state === 9) {
924 2
                break;
925
            }
926
927 800
            $token .= $this->str[$this->last];
928
        }
929
930 1396
        if ($state === 2 || $state === 3 || ($token !== '.' && $state === 4) || $state === 6 || $state === 9) {
931 622
            --$this->last;
932
933 622
            return new Token($token, Token::TYPE_NUMBER, $flags);
934
        }
935
936 1396
        $this->last = $iBak;
937
938 1396
        return null;
939
    }
940
941
    /**
942
     * Parses a string.
943
     *
944
     * @param string $quote additional starting symbol
945
     *
946
     * @return Token|null
947
     *
948
     * @throws LexerException
949
     */
950 1380
    public function parseString($quote = '')
951
    {
952 1380
        $token = $this->str[$this->last];
953 1380
        $flags = Context::isString($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isString() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

953
        $flags = Context::isString(/** @scrutinizer ignore-type */ $token);
Loading history...
954
955 1380
        if (! $flags && $token !== $quote) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
956 1380
            return null;
957
        }
958
959 676
        $quote = $token;
960
961 676
        while (++$this->last < $this->len) {
962
            if (
963 676
                $this->last + 1 < $this->len
964
                && (
965 676
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
966 676
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
967
                )
968
            ) {
969 30
                $token .= $this->str[$this->last] . $this->str[++$this->last];
970
            } else {
971 676
                if ($this->str[$this->last] === $quote) {
972 672
                    break;
973
                }
974
975 670
                $token .= $this->str[$this->last];
976
            }
977
        }
978
979 676
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
980 14
            $this->error(
981 14
                sprintf(
982 14
                    Translator::gettext('Ending quote %1$s was expected.'),
983 14
                    $quote
984 14
                ),
985 14
                '',
986 14
                $this->last
987 14
            );
988
        } else {
989 672
            $token .= $this->str[$this->last];
990
        }
991
992 676
        return new Token($token, Token::TYPE_STRING, $flags);
993
    }
994
995
    /**
996
     * Parses a symbol.
997
     *
998
     * @return Token|null
999
     *
1000
     * @throws LexerException
1001
     */
1002 1380
    public function parseSymbol()
1003
    {
1004 1380
        $token = $this->str[$this->last];
1005 1380
        $flags = Context::isSymbol($token);
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSymbol() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1005
        $flags = Context::isSymbol(/** @scrutinizer ignore-type */ $token);
Loading history...
1006
1007 1380
        if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
1008 1378
            return null;
1009
        }
1010
1011 442
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
1012 120
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
1013
                // This is a system variable (e.g. `@@hostname`).
1014 26
                $token .= $this->str[$this->last++];
1015 120
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
1016
            }
1017 354
        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
1018 6
            if ($token !== '?' && $this->last + 1 < $this->len) {
1019 6
                ++$this->last;
1020
            }
1021
        } else {
1022 350
            $token = '';
1023
        }
1024
1025 442
        $str = null;
1026
1027 442
        if ($this->last < $this->len) {
1028 442
            $str = $this->parseString('`');
1029
1030 442
            if ($str === null) {
1031 86
                $str = $this->parseUnknown();
1032
1033 86
                if ($str === null) {
1034 6
                    $this->error('Variable name was expected.', $this->str[$this->last], $this->last);
1035
                }
1036
            }
1037
        }
1038
1039 442
        if ($str !== null) {
1040 438
            $token .= $str->token;
1041
        }
1042
1043 442
        return new Token($token, Token::TYPE_SYMBOL, $flags);
1044
    }
1045
1046
    /**
1047
     * Parses unknown parts of the query.
1048
     *
1049
     * @return Token|null
1050
     */
1051 1068
    public function parseUnknown()
1052
    {
1053 1068
        $token = $this->str[$this->last];
1054 1068
        if (Context::isSeparator($token)) {
0 ignored issues
show
Bug introduced by
It seems like $token can also be of type null; however, parameter $string of PhpMyAdmin\SqlParser\Context::isSeparator() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

1054
        if (Context::isSeparator(/** @scrutinizer ignore-type */ $token)) {
Loading history...
1055 10
            return null;
1056
        }
1057
1058 1066
        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1059 1034
            $token .= $this->str[$this->last];
1060
1061
            // Test if end of token equals the current delimiter. If so, remove it from the token.
1062 1034
            if (str_ends_with($token, $this->delimiter)) {
1063 4
                $token = substr($token, 0, -$this->delimiterLen);
1064 4
                $this->last -= $this->delimiterLen - 1;
1065 4
                break;
1066
            }
1067
        }
1068
1069 1066
        --$this->last;
1070
1071 1066
        return new Token($token);
1072
    }
1073
1074
    /**
1075
     * Parses the delimiter of the query.
1076
     *
1077
     * @return Token|null
1078
     */
1079 1396
    public function parseDelimiter()
1080
    {
1081 1396
        $idx = 0;
1082
1083 1396
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1084 1396
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1085 1396
                return null;
1086
            }
1087
1088 558
            ++$idx;
1089
        }
1090
1091 558
        $this->last += $this->delimiterLen - 1;
1092
1093 558
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1094
    }
1095
}
1096