Passed
Push — master ( 505166...4f85b6 )
by Michal
04:41
created

Lexer::parseUnknown()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 14
Code Lines 8

Duplication

Lines 14
Ratio 100 %

Code Coverage

Tests 8
CRAP Score 4

Importance

Changes 0
Metric Value
dl 14
loc 14
ccs 8
cts 8
cp 1
rs 9.2
c 0
b 0
f 0
cc 4
eloc 8
nc 3
nop 0
crap 4
1
<?php
2
3
/**
4
 * Defines the lexer of the library.
5
 *
6
 * This is one of the most important components, along with the parser.
7
 *
8
 * Depends on context to extract lexemes.
9
 */
10
11
namespace PhpMyAdmin\SqlParser;
12
13
use PhpMyAdmin\SqlParser\Exceptions\LexerException;
14
15 1
if (!defined('USE_UTF_STRINGS')) {
16
    // NOTE: In previous versions of PHP (5.5 and older) the default
17
    // internal encoding is "ISO-8859-1".
18
    // All `mb_` functions must specify the correct encoding, which is
19
    // 'UTF-8' in order to work properly.
20
21
    /*
22
     * Forces usage of `UtfString` if the string is multibyte.
23
     * `UtfString` may be slower, but it gives better results.
24
     *
25
     * @var bool
26
     */
27 1
    define('USE_UTF_STRINGS', true);
28
}
29
30
/**
31
 * Performs lexical analysis over a SQL statement and splits it in multiple
32
 * tokens.
33
 *
34
 * The output of the lexer is affected by the context of the SQL statement.
35
 *
36
 * @category Lexer
37
 *
38
 * @license  https://www.gnu.org/licenses/gpl-2.0.txt GPL-2.0+
39
 *
40
 * @see      Context
41
 */
42
class Lexer extends Core
43
{
44
    /**
45
     * A list of methods that are used in lexing the SQL query.
46
     *
47
     * @var array
48
     */
49
    public static $PARSER_METHODS = array(
50
        // It is best to put the parsers in order of their complexity
51
        // (ascending) and their occurrence rate (descending).
52
        //
53
        // Conflicts:
54
        //
55
        // 1. `parseDelimiter`, `parseUnknown`, `parseKeyword`, `parseNumber`
56
        // They fight over delimiter. The delimiter may be a keyword, a
57
        // number or almost any character which makes the delimiter one of
58
        // the first tokens that must be parsed.
59
        //
60
        // 1. `parseNumber` and `parseOperator`
61
        // They fight over `+` and `-`.
62
        //
63
        // 2. `parseComment` and `parseOperator`
64
        // They fight over `/` (as in ```/*comment*/``` or ```a / b```)
65
        //
66
        // 3. `parseBool` and `parseKeyword`
67
        // They fight over `TRUE` and `FALSE`.
68
        //
69
        // 4. `parseKeyword` and `parseUnknown`
70
        // They fight over words. `parseUnknown` does not know about
71
        // keywords.
72
73
        'parseDelimiter', 'parseWhitespace', 'parseNumber',
74
        'parseComment', 'parseOperator', 'parseBool', 'parseString',
75
        'parseSymbol', 'parseKeyword', 'parseLabel', 'parseUnknown',
76
    );
77
78
    /**
79
     * The string to be parsed.
80
     *
81
     * @var string|UtfString
82
     */
83
    public $str = '';
84
85
    /**
86
     * The length of `$str`.
87
     *
88
     * By storing its length, a lot of time is saved, because parsing methods
89
     * would call `strlen` everytime.
90
     *
91
     * @var int
92
     */
93
    public $len = 0;
94
95
    /**
96
     * The index of the last parsed character.
97
     *
98
     * @var int
99
     */
100
    public $last = 0;
101
102
    /**
103
     * Tokens extracted from given strings.
104
     *
105
     * @var TokensList
106
     */
107
    public $list;
108
109
    /**
110
     * The default delimiter. This is used, by default, in all new instances.
111
     *
112
     * @var string
113
     */
114
    public static $DEFAULT_DELIMITER = ';';
115
116
    /**
117
     * Statements delimiter.
118
     * This may change during lexing.
119
     *
120
     * @var string
121
     */
122
    public $delimiter;
123
124
    /**
125
     * The length of the delimiter.
126
     *
127
     * Because `parseDelimiter` can be called a lot, it would perform a lot of
128
     * calls to `strlen`, which might affect performance when the delimiter is
129
     * big.
130
     *
131
     * @var int
132
     */
133
    public $delimiterLen;
134
135
    /**
136
     * Gets the tokens list parsed by a new instance of a lexer.
137
     *
138
     * @param string|UtfString $str       the query to be lexed
139
     * @param bool             $strict    whether strict mode should be
140
     *                                    enabled or not
141
     * @param string           $delimiter the delimiter to be used
0 ignored issues
show
Documentation introduced by
Should the type for parameter $delimiter not be string|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
142
     *
143
     * @return TokensList
144
     */
145 1
    public static function getTokens($str, $strict = false, $delimiter = null)
146
    {
147 1
        $lexer = new self($str, $strict, $delimiter);
148
149 1
        return $lexer->list;
150
    }
151
152
    /**
153
     * Constructor.
154
     *
155
     * @param string|UtfString $str       the query to be lexed
156
     * @param bool             $strict    whether strict mode should be
157
     *                                    enabled or not
158
     * @param string           $delimiter the delimiter to be used
0 ignored issues
show
Documentation introduced by
Should the type for parameter $delimiter not be string|null?

This check looks for @param annotations where the type inferred by our type inference engine differs from the declared type.

It makes a suggestion as to what type it considers more descriptive.

Most often this is a case of a parameter that can be null in addition to its declared types.

Loading history...
159
     */
160 374
    public function __construct($str, $strict = false, $delimiter = null)
161
    {
162
        // `strlen` is used instead of `mb_strlen` because the lexer needs to
163
        // parse each byte of the input.
164 374
        $len = $str instanceof UtfString ? $str->length() : strlen($str);
165
166
        // For multi-byte strings, a new instance of `UtfString` is
167
        // initialized (only if `UtfString` usage is forced.
168 374
        if (!$str instanceof UtfString && USE_UTF_STRINGS && $len !== mb_strlen($str, 'UTF-8')) {
169 1
            $str = new UtfString($str);
170
        }
171
172 374
        $this->str = $str;
173 374
        $this->len = $str instanceof UtfString ? $str->length() : $len;
174
175 374
        $this->strict = $strict;
176
177
        // Setting the delimiter.
178 374
        $this->setDelimiter(
179 374
            !empty($delimiter) ? $delimiter : static::$DEFAULT_DELIMITER
180
        );
181
182 374
        $this->lex();
183 374
    }
184
185
    /**
186
     * Sets the delimiter.
187
     *
188
     * @param string $delimiter the new delimiter
189
     */
190 374
    public function setDelimiter($delimiter)
191
    {
192 374
        $this->delimiter = $delimiter;
193 374
        $this->delimiterLen = strlen($delimiter);
194 374
    }
195
196
    /**
197
     * Parses the string and extracts lexemes.
198
     */
199 374
    public function lex()
200
    {
201
        // TODO: Sometimes, static::parse* functions make unnecessary calls to
0 ignored issues
show
Coding Style Best Practice introduced by
Comments for TODO tasks are often forgotten in the code; it might be better to use a dedicated issue tracker.
Loading history...
202
        // is* functions. For a better performance, some rules can be deduced
203
        // from context.
204
        // For example, in `parseBool` there is no need to compare the token
205
        // every time with `true` and `false`. The first step would be to
206
        // compare with 'true' only and just after that add another letter from
207
        // context and compare again with `false`.
208
        // Another example is `parseComment`.
209
210 374
        $list = new TokensList();
211
212
        /**
213
         * Last processed token.
214
         *
215
         * @var Token
216
         */
217 374
        $lastToken = null;
218
219 374
        for ($this->last = 0, $lastIdx = 0; $this->last < $this->len; $lastIdx = ++$this->last) {
220
            /**
221
             * The new token.
222
             *
223
             * @var Token
224
             */
225 368
            $token = null;
226
227 368
            foreach (static::$PARSER_METHODS as $method) {
228 368
                if ($token = $this->$method()) {
229 368
                    break;
230
                }
231
            }
232
233 368
            if ($token === null) {
234
                // @assert($this->last === $lastIdx);
235 2
                $token = new Token($this->str[$this->last]);
236 2
                $this->error(
237 2
                    'Unexpected character.',
238 2
                    $this->str[$this->last],
239 2
                    $this->last
240
                );
241 368
            } elseif ($lastToken !== null
242 368
                && $token->type === Token::TYPE_SYMBOL
243 368
                && $token->flags & Token::FLAG_SYMBOL_VARIABLE
244
                && (
245 28
                    $lastToken->type === Token::TYPE_STRING
246
                    || (
247 26
                        $lastToken->type === Token::TYPE_SYMBOL
248 368
                        && $lastToken->flags & Token::FLAG_SYMBOL_BACKTICK
249
                    )
250
                )
251
            ) {
252
                // Handles ```... FROM 'user'@'%' ...```.
253 5
                $lastToken->token .= $token->token;
254 5
                $lastToken->type = Token::TYPE_SYMBOL;
255 5
                $lastToken->flags = Token::FLAG_SYMBOL_USER;
256 5
                $lastToken->value .= '@' . $token->value;
257 5
                continue;
258 368
            } elseif ($lastToken !== null
259 368
                && $token->type === Token::TYPE_KEYWORD
260 368
                && $lastToken->type === Token::TYPE_OPERATOR
261 368
                && $lastToken->value === '.'
262
            ) {
263
                // Handles ```... tbl.FROM ...```. In this case, FROM is not
264
                // a reserved word.
265 3
                $token->type = Token::TYPE_NONE;
266 3
                $token->flags = 0;
267 3
                $token->value = $token->token;
268
            }
269
270 368
            $token->position = $lastIdx;
271
272 368
            $list->tokens[$list->count++] = $token;
273
274
            // Handling delimiters.
275 368
            if ($token->type === Token::TYPE_NONE && $token->value === 'DELIMITER') {
276 7 View Code Duplication
                if ($this->last + 1 >= $this->len) {
277 1
                    $this->error(
278 1
                        'Expected whitespace(s) before delimiter.',
279 1
                        '',
280 1
                        $this->last + 1
281
                    );
282 1
                    continue;
283
                }
284
285
                // Skipping last R (from `delimiteR`) and whitespaces between
286
                // the keyword `DELIMITER` and the actual delimiter.
287 6
                $pos = ++$this->last;
288 6
                if (($token = $this->parseWhitespace()) !== null) {
289 5
                    $token->position = $pos;
290 5
                    $list->tokens[$list->count++] = $token;
291
                }
292
293
                // Preparing the token that holds the new delimiter.
294 6 View Code Duplication
                if ($this->last + 1 >= $this->len) {
295 1
                    $this->error(
296 1
                        'Expected delimiter.',
297 1
                        '',
298 1
                        $this->last + 1
299
                    );
300 1
                    continue;
301
                }
302 5
                $pos = $this->last + 1;
303
304
                // Parsing the delimiter.
305 5
                $this->delimiter = null;
306 5
                $delimiterLen = 0;
307 5
                while (++$this->last < $this->len && !Context::isWhitespace($this->str[$this->last]) && $delimiterLen < 15) {
0 ignored issues
show
Coding Style introduced by
This line exceeds maximum limit of 120 characters; contains 125 characters

Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line.

Loading history...
308 4
                    $this->delimiter .= $this->str[$this->last];
309 4
                    ++$delimiterLen;
310
                }
311
312 5
                if (empty($this->delimiter)) {
313 1
                    $this->error(
314 1
                        'Expected delimiter.',
315 1
                        '',
316 1
                        $this->last
317
                    );
318 1
                    $this->delimiter = ';';
319
                }
320
321 5
                --$this->last;
322
323
                // Saving the delimiter and its token.
324 5
                $this->delimiterLen = strlen($this->delimiter);
325 5
                $token = new Token($this->delimiter, Token::TYPE_DELIMITER);
326 5
                $token->position = $pos;
327 5
                $list->tokens[$list->count++] = $token;
328
            }
329
330 366
            $lastToken = $token;
331
        }
332
333
        // Adding a final delimiter to mark the ending.
334 374
        $list->tokens[$list->count++] = new Token(null, Token::TYPE_DELIMITER);
335
336
        // Saving the tokens list.
337 374
        $this->list = $list;
338 374
    }
339
340
    /**
341
     * Creates a new error log.
342
     *
343
     * @param string $msg  the error message
344
     * @param string $str  the character that produced the error
345
     * @param int    $pos  the position of the character
346
     * @param int    $code the code of the error
347
     *
348
     * @throws LexerException throws the exception, if strict mode is enabled
349
     */
350 13
    public function error($msg, $str = '', $pos = 0, $code = 0)
351
    {
352 13
        $error = new LexerException(
353 13
            Translator::gettext($msg),
354
            $str, $pos, $code
355
        );
356 13
        parent::error($error);
357 12
    }
358
359
    /**
360
     * Parses a keyword.
361
     *
362
     * @return null|Token
363
     */
364 358
    public function parseKeyword()
365
    {
366 358
        $token = '';
367
368
        /**
369
         * Value to be returned.
370
         *
371
         * @var Token
372
         */
373 358
        $ret = null;
374
375
        /**
376
         * The value of `$this->last` where `$token` ends in `$this->str`.
377
         *
378
         * @var int
379
         */
380 358
        $iEnd = $this->last;
381
382
        /**
383
         * Whether last parsed character is a whitespace.
384
         *
385
         * @var bool
386
         */
387 358
        $lastSpace = false;
388
389 358
        for ($j = 1; $j < Context::KEYWORD_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
390
            // Composed keywords shouldn't have more than one whitespace between
391
            // keywords.
392 358
            if (Context::isWhitespace($this->str[$this->last])) {
393 345
                if ($lastSpace) {
394 58
                    --$j; // The size of the keyword didn't increase.
395 58
                    continue;
396
                }
397 345
                $lastSpace = true;
398
            } else {
399 358
                $lastSpace = false;
400
            }
401
402 358
            $token .= $this->str[$this->last];
403 358
            if (($this->last + 1 === $this->len || Context::isSeparator($this->str[$this->last + 1]))
404 358
                && $flags = Context::isKeyword($token)
405
            ) {
406 343
                $ret = new Token($token, Token::TYPE_KEYWORD, $flags);
407 343
                $iEnd = $this->last;
408
409
                // We don't break so we find longest keyword.
410
                // For example, `OR` and `ORDER` have a common prefix `OR`.
411
                // If we stopped at `OR`, the parsing would be invalid.
412
            }
413
        }
414
415 358
        $this->last = $iEnd;
416
417 358
        return $ret;
418
    }
419
420
    /**
421
     * Parses a label.
422
     *
423
     * @return null|Token
424
     */
425 262
    public function parseLabel()
426
    {
427 262
        $token = '';
428
429
        /**
430
         * Value to be returned.
431
         *
432
         * @var Token
433
         */
434 262
        $ret = null;
435
436
        /**
437
         * The value of `$this->last` where `$token` ends in `$this->str`.
438
         *
439
         * @var int
440
         */
441 262
        $iEnd = $this->last;
442 262
        for ($j = 1; $j < Context::LABEL_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
443 262
            if ($this->str[$this->last] === ':' && $j > 1) {
444
                // End of label
445 2
                $token .= $this->str[$this->last];
446 2
                $ret = new Token($token, Token::TYPE_LABEL);
447 2
                $iEnd = $this->last;
448 2
                break;
449 262
            } elseif (Context::isWhitespace($this->str[$this->last]) && $j > 1) {
450
                // Whitespace between label and :
451
                // The size of the keyword didn't increase.
452 197
                --$j;
453 262
            } elseif (Context::isSeparator($this->str[$this->last])) {
454
                // Any other separator
455 204
                break;
456
            }
457 261
            $token .= $this->str[$this->last];
458
        }
459
460 262
        $this->last = $iEnd;
461
462 262
        return $ret;
463
    }
464
465
    /**
466
     * Parses an operator.
467
     *
468
     * @return null|Token
469
     */
470 368
    public function parseOperator()
471
    {
472 368
        $token = '';
473
474
        /**
475
         * Value to be returned.
476
         *
477
         * @var Token
478
         */
479 368
        $ret = null;
480
481
        /**
482
         * The value of `$this->last` where `$token` ends in `$this->str`.
483
         *
484
         * @var int
485
         */
486 368
        $iEnd = $this->last;
487
488 368
        for ($j = 1; $j < Context::OPERATOR_MAX_LENGTH && $this->last < $this->len; ++$j, ++$this->last) {
489 368
            $token .= $this->str[$this->last];
490 368
            if ($flags = Context::isOperator($token)) {
491 271
                $ret = new Token($token, Token::TYPE_OPERATOR, $flags);
492 271
                $iEnd = $this->last;
493
            }
494
        }
495
496 368
        $this->last = $iEnd;
497
498 368
        return $ret;
499
    }
500
501
    /**
502
     * Parses a whitespace.
503
     *
504
     * @return null|Token
505
     */
506 368 View Code Duplication
    public function parseWhitespace()
507
    {
508 368
        $token = $this->str[$this->last];
509
510 368
        if (!Context::isWhitespace($token)) {
511 368
            return null;
512
        }
513
514 355
        while (++$this->last < $this->len && Context::isWhitespace($this->str[$this->last])) {
515 58
            $token .= $this->str[$this->last];
516
        }
517
518 355
        --$this->last;
519
520 355
        return new Token($token, Token::TYPE_WHITESPACE);
521
    }
522
523
    /**
524
     * Parses a comment.
525
     *
526
     * @return null|Token
527
     */
528 368
    public function parseComment()
529
    {
530 368
        $iBak = $this->last;
531 368
        $token = $this->str[$this->last];
532
533
        // Bash style comments. (#comment\n)
534 368
        if (Context::isComment($token)) {
535 View Code Duplication
            while (
536 3
                ++$this->last < $this->len
537 3
                && $this->str[$this->last] !== "\n"
538
            ) {
539 3
                $token .= $this->str[$this->last];
540
            }
541
            // Include trailing \n as whitespace token
542 3
            if ($this->last < $this->len) {
543 3
                --$this->last;
544
            }
545
546 3
            return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_BASH);
547
        }
548
549
        // C style comments. (/*comment*\/)
550 368
        if (++$this->last < $this->len) {
551 366
            $token .= $this->str[$this->last];
552 366
            if (Context::isComment($token)) {
553 23
                $flags = Token::FLAG_COMMENT_C;
554
555
                // This comment already ended. It may be a part of a
556
                // previous MySQL specific command.
557 23
                if ($token === '*/') {
558 2
                    return new Token($token, Token::TYPE_COMMENT, $flags);
559
                }
560
561
                // Checking if this is a MySQL-specific command.
562 23
                if ($this->last + 1 < $this->len
563 23
                    && $this->str[$this->last + 1] === '!'
564
                ) {
565 2
                    $flags |= Token::FLAG_COMMENT_MYSQL_CMD;
566 2
                    $token .= $this->str[++$this->last];
567
568
                    while (
569 2
                        ++$this->last < $this->len
570 2
                        && '0' <= $this->str[$this->last]
571 2
                        && $this->str[$this->last] <= '9'
572
                    ) {
573 1
                        $token .= $this->str[$this->last];
574
                    }
575 2
                    --$this->last;
576
577
                    // We split this comment and parse only its beginning
578
                    // here.
579 2
                    return new Token($token, Token::TYPE_COMMENT, $flags);
580
                }
581
582
                // Parsing the comment.
583
                while (
584 23
                    ++$this->last < $this->len
585
                    && (
586 23
                        $this->str[$this->last - 1] !== '*'
587 23
                        || $this->str[$this->last] !== '/'
588
                    )
589
                ) {
590 23
                    $token .= $this->str[$this->last];
591
                }
592
593
                // Adding the ending.
594 23
                if ($this->last < $this->len) {
595 23
                    $token .= $this->str[$this->last];
596
                }
597
598 23
                return new Token($token, Token::TYPE_COMMENT, $flags);
599
            }
600
        }
601
602
        // SQL style comments. (-- comment\n)
603 368
        if (++$this->last < $this->len) {
604 365
            $token .= $this->str[$this->last];
605 365
            if (Context::isComment($token)) {
606
                // Checking if this comment did not end already (```--\n```).
607 5
                if ($this->str[$this->last] !== "\n") {
608 View Code Duplication
                    while (
609 5
                        ++$this->last < $this->len
610 5
                        && $this->str[$this->last] !== "\n"
611
                    ) {
612 5
                        $token .= $this->str[$this->last];
613
                    }
614
                }
615
                // Include trailing \n as whitespace token
616 5
                if ($this->last < $this->len) {
617 3
                    --$this->last;
618
                }
619
620 5
                return new Token($token, Token::TYPE_COMMENT, Token::FLAG_COMMENT_SQL);
621
            }
622
        }
623
624 368
        $this->last = $iBak;
625
626 368
        return null;
627
    }
628
629
    /**
630
     * Parses a boolean.
631
     *
632
     * @return null|Token
633
     */
634 359
    public function parseBool()
635
    {
636 359
        if ($this->last + 3 >= $this->len) {
637
            // At least `min(strlen('TRUE'), strlen('FALSE'))` characters are
638
            // required.
639 99
            return null;
640
        }
641
642 359
        $iBak = $this->last;
643 359
        $token = $this->str[$this->last] . $this->str[++$this->last]
644 359
        . $this->str[++$this->last] . $this->str[++$this->last]; // _TRUE_ or _FALS_e
645
646 359
        if (Context::isBool($token)) {
647 1
            return new Token($token, Token::TYPE_BOOL);
648 359
        } elseif (++$this->last < $this->len) {
649 358
            $token .= $this->str[$this->last]; // fals_E_
650 358
            if (Context::isBool($token)) {
651 1
                return new Token($token, Token::TYPE_BOOL, 1);
652
            }
653
        }
654
655 359
        $this->last = $iBak;
656
657 359
        return null;
658
    }
659
660
    /**
661
     * Parses a number.
662
     *
663
     * @return null|Token
664
     */
665 368
    public function parseNumber()
666
    {
667
        // A rudimentary state machine is being used to parse numbers due to
668
        // the various forms of their notation.
669
        //
670
        // Below are the states of the machines and the conditions to change
671
        // the state.
672
        //
673
        //      1 --------------------[ + or - ]-------------------> 1
674
        //      1 -------------------[ 0x or 0X ]------------------> 2
675
        //      1 --------------------[ 0 to 9 ]-------------------> 3
676
        //      1 -----------------------[ . ]---------------------> 4
677
        //      1 -----------------------[ b ]---------------------> 7
678
        //
679
        //      2 --------------------[ 0 to F ]-------------------> 2
680
        //
681
        //      3 --------------------[ 0 to 9 ]-------------------> 3
682
        //      3 -----------------------[ . ]---------------------> 4
683
        //      3 --------------------[ e or E ]-------------------> 5
684
        //
685
        //      4 --------------------[ 0 to 9 ]-------------------> 4
686
        //      4 --------------------[ e or E ]-------------------> 5
687
        //
688
        //      5 ---------------[ + or - or 0 to 9 ]--------------> 6
689
        //
690
        //      7 -----------------------[ ' ]---------------------> 8
691
        //
692
        //      8 --------------------[ 0 or 1 ]-------------------> 8
693
        //      8 -----------------------[ ' ]---------------------> 9
694
        //
695
        // State 1 may be reached by negative numbers.
696
        // State 2 is reached only by hex numbers.
697
        // State 4 is reached only by float numbers.
698
        // State 5 is reached only by numbers in approximate form.
699
        // State 7 is reached only by numbers in bit representation.
700
        //
701
        // Valid final states are: 2, 3, 4 and 6. Any parsing that finished in a
702
        // state other than these is invalid.
703 368
        $iBak = $this->last;
704 368
        $token = '';
705 368
        $flags = 0;
706 368
        $state = 1;
707 368
        for (; $this->last < $this->len; ++$this->last) {
708 368
            if ($state === 1) {
709 368
                if ($this->str[$this->last] === '-') {
710 5
                    $flags |= Token::FLAG_NUMBER_NEGATIVE;
711 368
                } elseif ($this->last + 1 < $this->len
712 368
                    && $this->str[$this->last] === '0'
713
                    && (
714 20
                        $this->str[$this->last + 1] === 'x'
715 368
                        || $this->str[$this->last + 1] === 'X'
716
                    )
717
                ) {
718 1
                    $token .= $this->str[$this->last++];
719 1
                    $state = 2;
720 368
                } elseif ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9') {
721 175
                    $state = 3;
722 368
                } elseif ($this->str[$this->last] === '.') {
723 54
                    $state = 4;
724 368
                } elseif ($this->str[$this->last] === 'b') {
725 34
                    $state = 7;
726 368
                } elseif ($this->str[$this->last] !== '+') {
727
                    // `+` is a valid character in a number.
728 368
                    break;
729
                }
730 203
            } elseif ($state === 2) {
731 1
                $flags |= Token::FLAG_NUMBER_HEX;
732
                if (
733
                    !(
734 1
                        ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
735 1
                        || ($this->str[$this->last] >= 'A' && $this->str[$this->last] <= 'F')
736 1
                        || ($this->str[$this->last] >= 'a' && $this->str[$this->last] <= 'f')
737
                    )
738
                ) {
739 1
                    break;
740
                }
741 203
            } elseif ($state === 3) {
742 156
                if ($this->str[$this->last] === '.') {
743 4
                    $state = 4;
744 156
                } elseif ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
745 1
                    $state = 5;
746 156
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
747
                    // Just digits and `.`, `e` and `E` are valid characters.
748 156
                    break;
749
                }
750 83
            } elseif ($state === 4) {
751 57
                $flags |= Token::FLAG_NUMBER_FLOAT;
752 57 View Code Duplication
                if ($this->str[$this->last] === 'e' || $this->str[$this->last] === 'E') {
753 2
                    $state = 5;
754 57
                } elseif ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
755
                    // Just digits, `e` and `E` are valid characters.
756 57
                    break;
757
                }
758 34
            } elseif ($state === 5) {
759 2
                $flags |= Token::FLAG_NUMBER_APPROXIMATE;
760 2 View Code Duplication
                if ($this->str[$this->last] === '+' || $this->str[$this->last] === '-'
761 2
                    || ($this->str[$this->last] >= '0' && $this->str[$this->last] <= '9')
762
                ) {
763 1
                    $state = 6;
764
                } else {
765 2
                    break;
766
                }
767 33
            } elseif ($state === 6) {
768 1
                if ($this->str[$this->last] < '0' || $this->str[$this->last] > '9') {
769
                    // Just digits are valid characters.
770 1
                    break;
771
                }
772 33
            } elseif ($state === 7) {
773 33
                $flags |= Token::FLAG_NUMBER_BINARY;
774 33
                if ($this->str[$this->last] === '\'') {
775 1
                    $state = 8;
776
                } else {
777 33
                    break;
778
                }
779 1
            } elseif ($state === 8) {
780 1
                if ($this->str[$this->last] === '\'') {
781 1
                    $state = 9;
782 1
                } elseif ($this->str[$this->last] !== '0'
783 1
                    && $this->str[$this->last] !== '1'
784
                ) {
785 1
                    break;
786
                }
787 1
            } elseif ($state === 9) {
788 1
                break;
789
            }
790 223
            $token .= $this->str[$this->last];
791
        }
792 368
        if ($state === 2 || $state === 3
793 368
            || ($token !== '.' && $state === 4)
794 368
            || $state === 6 || $state === 9
795
        ) {
796 175
            --$this->last;
797
798 175
            return new Token($token, Token::TYPE_NUMBER, $flags);
799
        }
800 368
        $this->last = $iBak;
801
802 368
        return null;
803
    }
804
805
    /**
806
     * Parses a string.
807
     *
808
     * @param string $quote additional starting symbol
809
     *
810
     * @return null|Token
811
     */
812 359
    public function parseString($quote = '')
813
    {
814 359
        $token = $this->str[$this->last];
815 359
        if (!($flags = Context::isString($token)) && $token !== $quote) {
816 359
            return null;
817
        }
818 157
        $quote = $token;
819
820 157
        while (++$this->last < $this->len) {
821 157
            if ($this->last + 1 < $this->len
822
                && (
823 157
                    ($this->str[$this->last] === $quote && $this->str[$this->last + 1] === $quote)
824 157
                    || ($this->str[$this->last] === '\\' && $quote !== '`')
825
                )
826
            ) {
827 10
                $token .= $this->str[$this->last] . $this->str[++$this->last];
828
            } else {
829 157
                if ($this->str[$this->last] === $quote) {
830 156
                    break;
831
                }
832 155
                $token .= $this->str[$this->last];
833
            }
834
        }
835
836 157
        if ($this->last >= $this->len || $this->str[$this->last] !== $quote) {
837 4
            $this->error(
838
                sprintf(
839 4
                    Translator::gettext('Ending quote %1$s was expected.'),
840
                    $quote
841
                ),
842 4
                '',
843 4
                $this->last
844
            );
845
        } else {
846 156
            $token .= $this->str[$this->last];
847
        }
848
849 157
        return new Token($token, Token::TYPE_STRING, $flags);
850
    }
851
852
    /**
853
     * Parses a symbol.
854
     *
855
     * @return null|Token
856
     */
857 359
    public function parseSymbol()
858
    {
859 359
        $token = $this->str[$this->last];
860 359
        if (!($flags = Context::isSymbol($token))) {
861 358
            return null;
862
        }
863
864 97
        if ($flags & Token::FLAG_SYMBOL_VARIABLE) {
865 28
            if ($this->last + 1 < $this->len && $this->str[++$this->last] === '@') {
866
                // This is a system variable (e.g. `@@hostname`).
867 1
                $token .= $this->str[$this->last++];
868 28
                $flags |= Token::FLAG_SYMBOL_SYSTEM;
869
            }
870
        } else {
871 76
            $token = '';
872
        }
873
874 97
        $str = null;
875
876 97
        if ($this->last < $this->len) {
877 97
            if (($str = $this->parseString('`')) === null) {
878 23
                if (($str = static::parseUnknown()) === null) {
879 2
                    $this->error(
880 2
                        'Variable name was expected.',
881 2
                        $this->str[$this->last],
882 2
                        $this->last
883
                    );
884
                }
885
            }
886
        }
887
888 97
        if ($str !== null) {
889 96
            $token .= $str->token;
890
        }
891
892 97
        return new Token($token, Token::TYPE_SYMBOL, $flags);
893
    }
894
895
    /**
896
     * Parses unknown parts of the query.
897
     *
898
     * @return null|Token
899
     */
900 267 View Code Duplication
    public function parseUnknown()
901
    {
902 267
        $token = $this->str[$this->last];
903 267
        if (Context::isSeparator($token)) {
904 4
            return null;
905
        }
906
907 266
        while (++$this->last < $this->len && !Context::isSeparator($this->str[$this->last])) {
908 255
            $token .= $this->str[$this->last];
909
        }
910 266
        --$this->last;
911
912 266
        return new Token($token);
913
    }
914
915
    /**
916
     * Parses the delimiter of the query.
917
     *
918
     * @return null|Token
919
     */
920 368
    public function parseDelimiter()
921
    {
922 368
        $idx = 0;
923
924 368
        while ($idx < $this->delimiterLen && $this->last + $idx < $this->len) {
925 368
            if ($this->delimiter[$idx] !== $this->str[$this->last + $idx]) {
926 368
                return null;
927
            }
928 116
            ++$idx;
929
        }
930
931 116
        $this->last += $this->delimiterLen - 1;
932
933 116
        return new Token($this->delimiter, Token::TYPE_DELIMITER);
934
    }
935
}
936