Passed
Push — master ( c6dd2c...0efc01 )
by Maurício
02:43
created

Lexer::__construct()   B

Complexity

Conditions 7
Paths 8

Size

Total Lines 23
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 7

Importance

Changes 0
Metric Value
cc 7
eloc 9
nc 8
nop 3
dl 0
loc 23
ccs 10
cts 10
cp 1
crap 7
rs 8.8333
c 0
b 0
f 0
1
<?php
2
/**
3
 * Defines the lexer of the library.
4
 *
5
 * This is one of the most important components, along with the parser.
6
 *
7
 * Depends on context to extract lexemes.
8
 */
9
10
declare(strict_types=1);
11
12
namespace PhpMyAdmin\SqlParser;
13
14
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
15
use function define;
16
use function defined;
17
use function in_array;
18
use function mb_strlen;
19
use function sprintf;
20
use function strlen;
21
use function substr;
22
23 7
if (! defined('USE_UTF_STRINGS')) {
24
    // NOTE: In previous versions of PHP (5.5 and older) the default
25
    // internal encoding is "ISO-8859-1".
26
    // All `mb_` functions must specify the correct encoding, which is
27
    // 'UTF-8' in order to work properly.
28
29
    /*
30
     * Forces usage of `UtfString` if the string is multibyte.
31
     * `UtfString` may be slower, but it gives better results.
32
     *
33
     * @var bool
34
     */
35 7
    define('USE_UTF_STRINGS', true);
36
}
37
38
/**
39
 * Performs lexical analysis over a SQL statement and splits it in multiple
40
 * tokens.
41
 *
42
 * The output of the lexer is affected by the context of the SQL statement.
43
 *
44
 * @see      Context
45
 */
46
class Lexer extends Core
47
{
48
    /**
49
     * A list of methods that are used in lexing the SQL query.
50
     *
51
     * @var array
52
     */
53
    public static $PARSER_METHODS = [
54
        // It is best to put the parsers in order of their complexity
55
        // (ascending) and their occurrence rate (descending).
56
        //
57
        // Conflicts:
58
        //
59
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
60
        // They fight over delimiter. The delimiter may be a keyword, a
61
        // number or almost any character which makes the delimiter one of
62
        // the first tokens that must be parsed.
63
        //
64
        // 1. `parseNumber` and `parseOperator`
65
        // They fight over `+` and `-`.
66
        //
67
        // 2. `parseComment` and `parseOperator`
68
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
69
        //
70
        // 3. `parseBool` and `parseKeyword`
71
        // They fight over `TRUE` and `FALSE`.
72
        //
73
        // 4. `parseKeyword` and `parseUnknown`
74
        // They fight over words. `parseUnknown` does not know about
75
        // keywords.
76
77
        'parseDelimiter',
78
        'parseWhitespace',
79
        'parseNumber',
80
        'parseComment',
81
        'parseOperator',
82
        'parseBool',
83
        'parseString',
84
        'parseSymbol',
85
        'parseKeyword',
86
        'parseLabel',
87
        'parseUnknown',
88
    ];
89
90
    /**
91
     * The string to be parsed.
92
     *
93
     * @var string|UtfString
94
     */
95
    public $str = '';
96
97
    /**
98
     * The length of `$str`.
99
     *
100
     * By storing its length, a lot of time is saved, because parsing methods
101
     * would call `strlen` everytime.
102
     *
103
     * @var int
104
     */
105
    public $len = 0;
106
107
    /**
108
     * The index of the last parsed character.
109
     *
110
     * @var int
111
     */
112
    public $last = 0;
113
114
    /**
115
     * Tokens extracted from given strings.
116
     *
117
     * @var TokensList
118
     */
119
    public $list;
120
121
    /**
122
     * The default delimiter. This is used, by default, in all new instances.
123
     *
124
     * @var string
125
     */
126
    public static $DEFAULT_DELIMITER = ';';
127
128
    /**
129
     * Statements delimiter.
130
     * This may change during lexing.
131
     *
132
     * @var string
133
     */
134
    public $delimiter;
135
136
    /**
137
     * The length of the delimiter.
138
     *
139
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
140
     * calls to `strlen`, which might affect performance when the delimiter is
141
     * big.
142
     *
143
     * @var int
144
     */
145
    public $delimiterLen;
146
147
    /**
148
     * Gets the tokens list parsed by a new instance of a lexer.
149
     *
150
     * @param string|UtfString $str       the query to be lexed
151
     * @param bool             $strict    whether strict mode should be
152
     *                                    enabled or not
153
     * @param string           $delimiter the delimiter to be used
154
     *
155
     * @return TokensList
156
     */
157 4
    public static function getTokens($str, $strict = false, $delimiter = null)
158
    {
159 4
        $lexer = new self($str, $strict, $delimiter);
160
161 4
        return $lexer->list;
162
    }
163
164
    /**
165
     * @param string|UtfString $str       the query to be lexed
166
     * @param bool             $strict    whether strict mode should be
167
     *                                    enabled or not
168
     * @param string           $delimiter the delimiter to be used
169
     */
170 1964
    public function __construct($str, $strict = false, $delimiter = null)
171
    {
172
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
173
        // parse each byte of the input.
174 1964
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
175
176
        // For multi-byte strings, a new instance of `UtfString` is
177
        // initialized (only if `UtfString` usage is forced.
178 1964
        if (! $str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) {
179 4
            $str = new UtfString($str);
180
        }
181
182 1964
        $this->str = $str;
183 1964
        $this->len = $str instanceof UtfString ? $str->length() : $len;
184
185 1964
        $this->strict = $strict;
186
187
        // Setting the delimiter.
188 1964
        $this->setDelimiter(
189 1964
            ! empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER
190
        );
191
192 1964
        $this->lex();
193 1964
    }
194
195
    /**
196
     * Sets the delimiter.
197
     *
198
     * @param string $delimiter the new delimiter
199
     */
200 1964
    public function setDelimiter($delimiter)
201
    {
202 1964
        $this->delimiter = $delimiter;
203 1964
        $this->delimiterLen = strlen($delimiter);
204 1964
    }
205
206
    /**
207
     * Parses the string and extracts lexemes.
208
     */
209 1964
    public function lex()
210
    {
211
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
212
        // is* functions. For a better performance, some rules can be deduced
213
        // from context.
214
        // For example, in `parseBool` there is no need to compare the token
215
        // every time with `true` and `false`. The first step would be to
216
        // compare with 'true' only and just after that add another letter from
217
        // context and compare again with `false`.
218
        // Another example is `parseComment`.
219
220 1964
        $list = new TokensList();
221
222
        /**
223
         * Last processed token.
224
         *
225
         * @var Token
226
         */
227 1964
        $lastToken = null;
228
229 1964
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
230
            /**
231
             * The new token.
232
             *
233
             * @var Token
234
             */
235 1940
            $token = null;
236
237 1940
            foreach (static::$PARSER_METHODS as $method) {
238 1940
                $token = $this->$method();
239
240 1940
                if ($token) {
241 1940
                    break;
242
                }
243
            }
244
245 1940
            if ($token === null) {
246
                // @assert($this->last === $lastIdx);
247 8
                $token = new Token($this->str[$this->last]);
248 8
                $this->error(
249 8
                    'Unexpected character.',
250 8
                    $this->str[$this->last],
251 8
                    $this->last
252
                );
253 1940
            } elseif ($lastToken !== null
254 1940
                && $token->type === Token::TYPE_SYMBOL
255 1940
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
256
                && (
257 128
                    $lastToken->type === Token::TYPE_STRING
258
                    || (
259 112
                        $lastToken->type === Token::TYPE_SYMBOL
260 1940
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
261
                    )
262
                )
263
            ) {
264
                // Handles ```... FROM 'user'@'%' ...```.
265 32
                $lastToken->token .= $token->token;
266 32
                $lastToken->type = Token::TYPE_SYMBOL;
267 32
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
268 32
                $lastToken->value .= '@' . $token->value;
269 32
                continue;
270 1940
            } elseif ($lastToken !== null
271 1940
                && $token->type === Token::TYPE_KEYWORD
272 1940
                && $lastToken->type === Token::TYPE_OPERATOR
273 1940
                && $lastToken->value === '.'
274
            ) {
275
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
276
                // a reserved word.
277 16
                $token->type = Token::TYPE_NONE;
278 16
                $token->flags = 0;
279 16
                $token->value = $token->token;
280
            }
281
282 1940
            $token->position = $lastIdx;
283
284 1940
            $list->tokens[$list->count++] = $token;
285
286
            // Handling delimiters.
287 1940
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
288 32
                if ($this->last + 1 >= $this->len) {
289 4
                    $this->error(
290 4
                        'Expected whitespace(s) before delimiter.',
291 4
                        '',
292 4
                        $this->last + 1
293
                    );
294 4
                    continue;
295
                }
296
297
                // Skipping last R (from `delimiteR`) and whitespaces between
298
                // the keyword `DELIMITER` and the actual delimiter.
299 28
                $pos = ++$this->last;
300 28
                $token = $this->parseWhitespace();
301
302 28
                if ($token !== null) {
303 24
                    $token->position = $pos;
304 24
                    $list->tokens[$list->count++] = $token;
305
                }
306
307
                // Preparing the token that holds the new delimiter.
308 28
                if ($this->last + 1 >= $this->len) {
309 4
                    $this->error(
310 4
                        'Expected delimiter.',
311 4
                        '',
312 4
                        $this->last + 1
313
                    );
314 4
                    continue;
315
                }
316
317 24
                $pos = $this->last + 1;
318
319
                // Parsing the delimiter.
320 24
                $this->delimiter = null;
321 24
                $delimiterLen = 0;
322 24
                while (++$this->last < $this->len
323 24
                    && ! Context::isWhitespace($this->str[$this->last])
324 24
                    && $delimiterLen < 15
325
                ) {
326 20
                    $this->delimiter .= $this->str[$this->last];
327 20
                    ++$delimiterLen;
328
                }
329
330 24
                if (empty($this->delimiter)) {
331 4
                    $this->error(
332 4
                        'Expected delimiter.',
333 4
                        '',
334 4
                        $this->last
335
                    );
336 4
                    $this->delimiter = ';';
337
                }
338
339 24
                --$this->last;
340
341
                // Saving the delimiter and its token.
342 24
                $this->delimiterLen = strlen($this->delimiter);
343 24
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
344 24
                $token->position = $pos;
345 24
                $list->tokens[$list->count++] = $token;
346
            }
347
348 1932
            $lastToken = $token;
349
        }
350
351
        // Adding a final delimiter to mark the ending.
352 1964
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
353
354
        // Saving the tokens list.
355 1964
        $this->list = $list;
356
357 1964
        $this->solveAmbiguityOnStarOperator();
358 1964
    }
359
360
    /**
361
     * Resolves the ambiguity when dealing with the "*" operator.
362
     *
363
     * In SQL statements, the "*" operator can be an arithmetic operator (like in 2*3) or an SQL wildcard (like in
364
     * SELECT a.* FROM ...). To solve this ambiguity, the solution is to find the next token, excluding whitespaces and
365
     * comments, right after the "*" position. The "*" is for sure an SQL wildcard if the next token found is any of:
366
     * - "FROM" (the FROM keyword like in "SELECT * FROM...");
367
     * - "USING" (the USING keyword like in "DELETE table_name.* USING...");
368
     * - "," (a comma separator like in "SELECT *, field FROM...");
369
     * - ")" (a closing parenthesis like in "COUNT(*)").
370
     * This methods will change the flag of the "*" tokens when any of those condition above is true. Otherwise, the
371
     * default flag (arithmetic) will be kept.
372
     *
373
     * @return void
374
     */
375 1964
    private function solveAmbiguityOnStarOperator()
376
    {
377 1964
        $iBak = $this->list->idx;
378 1964
        while (($starToken = $this->list->getNextOfTypeAndValue(Token::TYPE_OPERATOR, '*')) !== null) {
379
            // getNext() already gets rid of whitespaces and comments.
380 304
            $next = $this->list->getNext();
381
382 304
            if ($next === null) {
383
                continue;
384
            }
385
386 304
            if (($next->type !== Token::TYPE_KEYWORD || ! in_array($next->value, ['FROM', 'USING'], true))
387 304
                && ($next->type !== Token::TYPE_OPERATOR || ! in_array($next->value, [',', ')'], true))
388
            ) {
389 28
                continue;
390
            }
391
392 280
            $starToken->flags = Token::FLAG_OPERATOR_SQL;
393
        }
394 1964
        $this->list->idx = $iBak;
395 1964
    }
396
397
    /**
398
     * Creates a new error log.
399
     *
400
     * @param string $msg  the error message
401
     * @param string $str  the character that produced the error
402
     * @param int    $pos  the position of the character
403
     * @param int    $code the code of the error
404
     *
405
     * @throws LexerException throws the exception, if strict mode is enabled.
406
     */
407 64
    public function error($msg, $str = '', $pos = 0, $code = 0)
408
    {
409 64
        $error = new LexerException(
410 64
            Translator::gettext($msg),
411 16
            $str,
412 16
            $pos,
413 16
            $code
414
        );
415 64
        parent::error($error);
416 60
    }
417
418
    /**
419
     * Parses a keyword.
420
     *
421
     * @return Token|null
422
     */
423 1904
    public function parseKeyword()
424
    {
425 1904
        $token = '';
426
427
        /**
428
         * Value to be returned.
429
         *
430
         * @var Token
431
         */
432 1904
        $ret = null;
433
434
        /**
435
         * The value of `$this->last` where `$token` ends in `$this->str`.
436
         *
437
         * @var int
438
         */
439 1904
        $iEnd = $this->last;
440
441
        /**
442
         * Whether last parsed character is a whitespace.
443
         *
444
         * @var bool
445
         */
446 1904
        $lastSpace = false;
447
448 1904
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
449
            // Composed keywords shouldn't have more than one whitespace between
450
            // keywords.
451 1904
            if (Context::isWhitespace($this->str[$this->last])) {
452 1856
                if ($lastSpace) {
453 276
                    --$j; // The size of the keyword didn't increase.
454 276
                    continue;
455
                }
456
457 1856
                $lastSpace = true;
458
            } else {
459 1904
                $lastSpace = false;
460
            }
461
462 1904
            $token .= $this->str[$this->last];
463 1904
            $flags = Context::isKeyword($token);
464
465 1904
            if (($this->last + 1 !== $this->len && ! Context::isSeparator($this->str[$this->last + 1])) || ! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
466 1904
                continue;
467
            }
468
469 1844
            $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
470 1844
            $iEnd = $this->last;
471
472
            // We don't break so we find longest keyword.
473
            // For example, `OR` and `ORDER` have a common prefix `OR`.
474
            // If we stopped at `OR`, the parsing would be invalid.
475
        }
476
477 1904
        $this->last = $iEnd;
478
479 1904
        return $ret;
480
    }
481
482
    /**
483
     * Parses a label.
484
     *
485
     * @return Token|null
486
     */
487 1400
    public function parseLabel()
488
    {
489 1400
        $token = '';
490
491
        /**
492
         * Value to be returned.
493
         *
494
         * @var Token
495
         */
496 1400
        $ret = null;
497
498
        /**
499
         * The value of `$this->last` where `$token` ends in `$this->str`.
500
         *
501
         * @var int
502
         */
503 1400
        $iEnd = $this->last;
504 1400
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
505 1400
            if ($this->str[$this->last] === ':' && $j > 1) {
506
                // End of label
507 8
                $token .= $this->str[$this->last];
508 8
                $ret = new Token($token, Token::TYPE_LABEL);
509 8
                $iEnd = $this->last;
510 8
                break;
511
            }
512
513 1400
            if (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
514
                // Whitespace between label and :
515
                // The size of the keyword didn't increase.
516 1100
                --$j;
517 1400
            } elseif (Context::isSeparator($this->str[$this->last])) {
518
                // Any other separator
519 1076
                break;
520
            }
521
522 1396
            $token .= $this->str[$this->last];
523
        }
524
525 1400
        $this->last = $iEnd;
526
527 1400
        return $ret;
528
    }
529
530
    /**
531
     * Parses an operator.
532
     *
533
     * @return Token|null
534
     */
535 1940
    public function parseOperator()
536
    {
537 1940
        $token = '';
538
539
        /**
540
         * Value to be returned.
541
         *
542
         * @var Token
543
         */
544 1940
        $ret = null;
545
546
        /**
547
         * The value of `$this->last` where `$token` ends in `$this->str`.
548
         *
549
         * @var int
550
         */
551 1940
        $iEnd = $this->last;
552
553 1940
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
554 1940
            $token .= $this->str[$this->last];
555 1940
            $flags = Context::isOperator($token);
556
557 1940
            if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
558 1932
                continue;
559
            }
560
561 1360
            $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
562 1360
            $iEnd = $this->last;
563
        }
564
565 1940
        $this->last = $iEnd;
566
567 1940
        return $ret;
568
    }
569
570
    /**
571
     * Parses a whitespace.
572
     *
573
     * @return Token|null
574
     */
575 1940
    public function parseWhitespace()
576
    {
577 1940
        $token = $this->str[$this->last];
578
579 1940
        if (! Context::isWhitespace($token)) {
580 1940
            return null;
581
        }
582
583 1888
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
584 276
            $token .= $this->str[$this->last];
585
        }
586
587 1888
        --$this->last;
588
589 1888
        return new Token($token, Token::TYPE_WHITESPACE);
590
    }
591
592
    /**
593
     * Parses a comment.
594
     *
595
     * @return Token|null
596
     */
597 1940
    public function parseComment()
598
    {
599 1940
        $iBak = $this->last;
600 1940
        $token = $this->str[$this->last];
601
602
        // Bash style comments. (#comment\n)
603 1940
        if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
604 12
            while (++$this->last < $this->len
605 12
                && $this->str[$this->last] !== "\n"
606
            ) {
607 12
                $token .= $this->str[$this->last];
608
            }
609
610
            // Include trailing \n as whitespace token
611 12
            if ($this->last < $this->len) {
612 12
                --$this->last;
613
            }
614
615 12
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
616
        }
617
618
        // C style comments. (/*comment*\/)
619 1940
        if (++$this->last < $this->len) {
620 1932
            $token .= $this->str[$this->last];
621 1932
            if (Context::isComment($token)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Context::isComment($token) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
622
                // There might be a conflict with "*" operator here, when string is "*/*".
623
                // This can occurs in the following statements:
624
                // - "SELECT */* comment */ FROM ..."
625
                // - "SELECT 2*/* comment */3 AS `six`;"
626 124
                $next = $this->last + 1;
627 124
                if (($next < $this->len) && $this->str[$next] === '*') {
628
                    // Conflict in "*/*": first "*" was not for ending a comment.
629
                    // Stop here and let other parsing method define the true behavior of that first star.
630 4
                    $this->last = $iBak;
631
632 4
                    return null;
633
                }
634
635 124
                $flags = Token::FLAG_COMMENT_C;
636
637
                // This comment already ended. It may be a part of a
638
                // previous MySQL specific command.
639 124
                if ($token === '*/') {
640 8
                    return new Token($token, Token::TYPE_COMMENT, $flags);
641
                }
642
643
                // Checking if this is a MySQL-specific command.
644 124
                if ($this->last + 1 < $this->len
645 124
                    && $this->str[$this->last + 1] === '!'
646
                ) {
647 8
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
648 8
                    $token .= $this->str[++$this->last];
649
650 8
                    while (++$this->last < $this->len
651 8
                        && $this->str[$this->last] >= '0'
652 8
                        && $this->str[$this->last] <= '9'
653
                    ) {
654 4
                        $token .= $this->str[$this->last];
655
                    }
656
657 8
                    --$this->last;
658
659
                    // We split this comment and parse only its beginning
660
                    // here.
661 8
                    return new Token($token, Token::TYPE_COMMENT, $flags);
662
                }
663
664
                // Parsing the comment.
665 124
                while (++$this->last < $this->len
666
                    && (
667 124
                        $this->str[$this->last - 1] !== '*'
668 124
                        || $this->str[$this->last] !== '/'
669
                    )
670
                ) {
671 124
                    $token .= $this->str[$this->last];
672
                }
673
674
                // Adding the ending.
675 124
                if ($this->last < $this->len) {
676 124
                    $token .= $this->str[$this->last];
677
                }
678
679 124
                return new Token($token, Token::TYPE_COMMENT, $flags);
680
            }
681
        }
682
683
        // SQL style comments. (-- comment\n)
684 1940
        if (++$this->last < $this->len) {
685 1928
            $token .= $this->str[$this->last];
686 1928
            $end = false;
687
        } else {
688 612
            --$this->last;
689 612
            $end = true;
690
        }
691
692 1940
        if (Context::isComment($token, $end)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression PhpMyAdmin\SqlParser\Con...isComment($token, $end) of type integer|null is loosely compared to true; this is ambiguous if the integer can be 0. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
693
            // Checking if this comment did not end already (```--\n```).
694 76
            if ($this->str[$this->last] !== "\n") {
695 76
                while (++$this->last < $this->len
696 76
                    && $this->str[$this->last] !== "\n"
697
                ) {
698 76
                    $token .= $this->str[$this->last];
699
                }
700
            }
701
702
            // Include trailing \n as whitespace token
703 76
            if ($this->last < $this->len) {
704 68
                --$this->last;
705
            }
706
707 76
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
708
        }
709
710 1940
        $this->last = $iBak;
711
712 1940
        return null;
713
    }
714
715
    /**
716
     * Parses a boolean.
717
     *
718
     * @return Token|null
719
     */
720 1908
    public function parseBool()
721
    {
722 1908
        if ($this->last + 3 >= $this->len) {
723
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
724
            // required.
725 472
            return null;
726
        }
727
728 1908
        $iBak = $this->last;
729 1908
        $token = $this->str[$this->last] . $this->str[++$this->last]
730 1908
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
731
732 1908
        if (Context::isBool($token)) {
733 4
            return new Token($token, Token::TYPE_BOOL);
734
        }
735
736 1908
        if (++$this->last < $this->len) {
737 1904
            $token .= $this->str[$this->last]; // fals_E_
738 1904
            if (Context::isBool($token)) {
739 8
                return new Token($token, Token::TYPE_BOOL, 1);
740
            }
741
        }
742
743 1908
        $this->last = $iBak;
744
745 1908
        return null;
746
    }
747
748
    /**
749
     * Parses a number.
750
     *
751
     * @return Token|null
752
     */
753 1940
    public function parseNumber()
754
    {
755
        // A rudimentary state machine is being used to parse numbers due to
756
        // the various forms of their notation.
757
        //
758
        // Below are the states of the machines and the conditions to change
759
        // the state.
760
        //
761
        //      1 --------------------[ + or - ]-------------------> 1
762
        //      1 -------------------[ 0x or 0X ]------------------> 2
763
        //      1 --------------------[ 0 to 9 ]-------------------> 3
764
        //      1 -----------------------[ . ]---------------------> 4
765
        //      1 -----------------------[ b ]---------------------> 7
766
        //
767
        //      2 --------------------[ 0 to F ]-------------------> 2
768
        //
769
        //      3 --------------------[ 0 to 9 ]-------------------> 3
770
        //      3 -----------------------[ . ]---------------------> 4
771
        //      3 --------------------[ e or E ]-------------------> 5
772
        //
773
        //      4 --------------------[ 0 to 9 ]-------------------> 4
774
        //      4 --------------------[ e or E ]-------------------> 5
775
        //
776
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
777
        //
778
        //      7 -----------------------[ ' ]---------------------> 8
779
        //
780
        //      8 --------------------[ 0 or 1 ]-------------------> 8
781
        //      8 -----------------------[ ' ]---------------------> 9
782
        //
783
        // State 1 may be reached by negative numbers.
784
        // State 2 is reached only by hex numbers.
785
        // State 4 is reached only by float numbers.
786
        // State 5 is reached only by numbers in approximate form.
787
        // State 7 is reached only by numbers in bit representation.
788
        //
789
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
790
        // state other than these is invalid.
791
        // Also, negative states are invalid states.
792 1940
        $iBak = $this->last;
793 1940
        $token = '';
794 1940
        $flags = 0;
795 1940
        $state = 1;
796 1940
        for (; $this->last < $this->len; ++$this->last) {
797 1940
            if ($state === 1) {
798 1940
                if ($this->str[$this->last] === '-') {
799 76
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
800 1940
                } elseif ($this->last + 1 < $this->len
801 1940
                    && $this->str[$this->last] === '0'
802
                    && (
803 128
                        $this->str[$this->last + 1] === 'x'
804 1940
                        || $this->str[$this->last + 1] === 'X'
805
                    )
806
                ) {
807 8
                    $token .= $this->str[$this->last++];
808 8
                    $state = 2;
809 1940
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
810 904
                    $state = 3;
811 1940
                } elseif ($this->str[$this->last] === '.') {
812 300
                    $state = 4;
813 1940
                } elseif ($this->str[$this->last] === 'b') {
814 168
                    $state = 7;
815 1940
                } elseif ($this->str[$this->last] !== '+') {
816
                    // `+` is a valid character in a number.
817 1940
                    break;
818
                }
819 1008
            } elseif ($state === 2) {
820 8
                $flags |= Token::FLAG_NUMBER_HEX;
821
                if (! (
822 8
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
823 8
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
824 8
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
825
                    )
826
                ) {
827 8
                    break;
828
                }
829 1008
            } elseif ($state === 3) {
830 800
                if ($this->str[$this->last] === '.') {
831 20
                    $state = 4;
832 800
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
833 4
                    $state = 5;
834 800
                } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
835 800
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) {
836
                    // A number can't be directly followed by a letter
837 12
                    $state = -$state;
838 796
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
839
                    // Just digits and `.`, `e` and `E` are valid characters.
840 800
                    break;
841
                }
842 448
            } elseif ($state === 4) {
843 316
                $flags |= Token::FLAG_NUMBER_FLOAT;
844 316
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
845 28
                    $state = 5;
846 316
                } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
847 316
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) {
848
                    // A number can't be directly followed by a letter
849 220
                    $state = -$state;
850 132
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
851
                    // Just digits, `e` and `E` are valid characters.
852 316
                    break;
853
                }
854 368
            } elseif ($state === 5) {
855 28
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
856 28
                if ($this->str[$this->last] === '+' || $this->str[$this->last] === '-'
857 28
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
858
                ) {
859 4
                    $state = 6;
860 28
                } elseif (($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'z')
861 28
                    || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'Z')) {
862
                    // A number can't be directly followed by a letter
863 28
                    $state = -$state;
864
                } else {
865 28
                    break;
866
                }
867 368
            } elseif ($state === 6) {
868 4
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
869
                    // Just digits are valid characters.
870 4
                    break;
871
                }
872 368
            } elseif ($state === 7) {
873 164
                $flags |= Token::FLAG_NUMBER_BINARY;
874 164
                if ($this->str[$this->last] !== '\'') {
875 160
                    break;
876
                }
877
878 4
                $state = 8;
879 232
            } elseif ($state === 8) {
880 4
                if ($this->str[$this->last] === '\'') {
881 4
                    $state = 9;
882 4
                } elseif ($this->str[$this->last] !== '0'
883 4
                    && $this->str[$this->last] !== '1'
884
                ) {
885 4
                    break;
886
                }
887 232
            } elseif ($state === 9) {
888 4
                break;
889
            }
890
891 1128
            $token .= $this->str[$this->last];
892
        }
893
894 1940
        if ($state === 2 || $state === 3
895 1940
            || ($token !== '.' && $state === 4)
896 1940
            || $state === 6 || $state === 9
897
        ) {
898 904
            --$this->last;
899
900 904
            return new Token($token, Token::TYPE_NUMBER, $flags);
901
        }
902
903 1940
        $this->last = $iBak;
904
905 1940
        return null;
906
    }
907
908
    /**
909
     * Parses a string.
910
     *
911
     * @param string $quote additional starting symbol
912
     *
913
     * @return Token|null
914
     *
915
     * @throws LexerException
916
     */
917 1908
    public function parseString($quote = '')
918
    {
919 1908
        $token = $this->str[$this->last];
920 1908
        $flags = Context::isString($token);
921
922 1908
        if (! $flags && $token !== $quote) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
923 1908
            return null;
924
        }
925
926 900
        $quote = $token;
927
928 900
        while (++$this->last < $this->len) {
929 900
            if ($this->last + 1 < $this->len
930
                && (
931 900
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
932 900
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
933
                )
934
            ) {
935 40
                $token .= $this->str[$this->last] . $this->str[++$this->last];
936
            } else {
937 900
                if ($this->str[$this->last] === $quote) {
938 896
                    break;
939
                }
940
941 892
                $token .= $this->str[$this->last];
942
            }
943
        }
944
945 900
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
946 24
            $this->error(
947 24
                sprintf(
948 24
                    Translator::gettext('Ending quote %1$s was expected.'),
949 24
                    $quote
950
                ),
951 24
                '',
952 24
                $this->last
953
            );
954
        } else {
955 896
            $token .= $this->str[$this->last];
956
        }
957
958 900
        return new Token($token, Token::TYPE_STRING, $flags);
959
    }
960
961
    /**
962
     * Parses a symbol.
963
     *
964
     * @return Token|null
965
     *
966
     * @throws LexerException
967
     */
968 1908
    public function parseSymbol()
969
    {
970 1908
        $token = $this->str[$this->last];
971 1908
        $flags = Context::isSymbol($token);
972
973 1908
        if (! $flags) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $flags of type integer|null is loosely compared to false; this is ambiguous if the integer can be 0. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
974 1904
            return null;
975
        }
976
977 556
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
978 128
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
979
                // This is a system variable (e.g. `@@hostname`).
980 8
                $token .= $this->str[$this->last++];
981 128
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
982
            }
983 460
        } elseif ($flags & Token::FLAG_SYMBOL_PARAMETER) {
984 12
            if ($token !== '?' && $this->last + 1 < $this->len) {
985 12
                ++$this->last;
986
            }
987
        } else {
988 452
            $token = '';
989
        }
990
991 556
        $str = null;
992
993 556
        if ($this->last < $this->len) {
994 556
            $str = $this->parseString('`');
995
996 556
            if ($str === null) {
997 108
                $str = $this->parseUnknown();
998
999 108
                if ($str === null) {
1000 12
                    $this->error(
1001 12
                        'Variable name was expected.',
1002 12
                        $this->str[$this->last],
1003 12
                        $this->last
1004
                    );
1005
                }
1006
            }
1007
        }
1008
1009 556
        if ($str !== null) {
1010 548
            $token .= $str->token;
1011
        }
1012
1013 556
        return new Token($token, Token::TYPE_SYMBOL, $flags);
1014
    }
1015
1016
    /**
1017
     * Parses unknown parts of the query.
1018
     *
1019
     * @return Token|null
1020
     */
1021 1424
    public function parseUnknown()
1022
    {
1023 1424
        $token = $this->str[$this->last];
1024 1424
        if (Context::isSeparator($token)) {
1025 20
            return null;
1026
        }
1027
1028 1420
        while (++$this->last < $this->len && ! Context::isSeparator($this->str[$this->last])) {
1029 1376
            $token .= $this->str[$this->last];
1030
1031
            // Test if end of token equals the current delimiter. If so, remove it from the token.
1032 1376
            if (substr($token, -$this->delimiterLen) === $this->delimiter) {
1033 4
                $token = substr($token, 0, -$this->delimiterLen);
1034 4
                $this->last -= $this->delimiterLen - 1;
1035 4
                break;
1036
            }
1037
        }
1038
1039 1420
        --$this->last;
1040
1041 1420
        return new Token($token);
1042
    }
1043
1044
    /**
1045
     * Parses the delimiter of the query.
1046
     *
1047
     * @return Token|null
1048
     */
1049 1940
    public function parseDelimiter()
1050
    {
1051 1940
        $idx = 0;
1052
1053 1940
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
1054 1940
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
1055 1940
                return null;
1056
            }
1057
1058 652
            ++$idx;
1059
        }
1060
1061 652
        $this->last += $this->delimiterLen - 1;
1062
1063 652
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
1064
    }
1065
}
1066