ExpressionLexer::nextToken()   F
last analyzed

Complexity

Conditions 33
Paths 240

Size

Total Lines 133
Code Lines 102

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 33
eloc 102
nc 240
nop 0
dl 0
loc 133
rs 2.2666
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace POData\UriProcessor\QueryProcessor\ExpressionParser;
6
7
use POData\Common\Messages;
8
use POData\Common\ODataConstants;
9
use POData\Common\ODataException;
10
use POData\Providers\Metadata\Type\Char;
11
12
/**
13
 * Class ExpressionLexer.
14
 *
15
 * Lexical analyzer for Astoria URI expression parsing
16
 * Literals        Representation
17
 * --------------------------------------------------------------------
18
 * Null            null
19
 * Boolean         true | false
20
 * Int32           (digit+)
21
 * Int64           (digit+)(L|l)
22
 * Decimal         (digit+ ['.' digit+])(M|m)
23
 * Float (Single)  (digit+ ['.' digit+][e|E [+|-] digit+)(f|F)
24
 * Double          (digit+ ['.' digit+][e|E [+|-] digit+)
25
 * String          "'" .* "'"
26
 * DateTime        datetime"'"dddd-dd-dd[T|' ']dd:mm[ss[.fffffff]]"'"
27
 * Binary          (binary|X)'digit*'
28
 * GUID            guid'digit*
29
 */
30
class ExpressionLexer
31
{
32
    /**
33
     * Suffix for single literals.
34
     *
35
     * @var char
36
     */
37
    const SINGLE_SUFFIX_LOWER = 'f';
38
39
    /**
40
     * Suffix for single literals.
41
     *
42
     * @var char
43
     */
44
    const SINGLE_SUFFIX_UPPER = 'F';
45
46
    /**
47
     * Text being parsed.
48
     *
49
     * @var string
50
     */
51
    private $text;
52
53
    /**
54
     * Length of text being parsed.
55
     *
56
     * @var int
57
     */
58
    private $textLen;
59
60
    /**
61
     * Position on text being parsed.
62
     *
63
     * @var int
64
     */
65
    private $textPos;
66
67
    /**
68
     * Character being processed.
69
     *
70
     * @var string
71
     */
72
    private $ch;
73
74
    /**
75
     * ExpressionToken being processed.
76
     *
77
     * @var ExpressionToken
78
     */
79
    private $token;
80
81
    /**
82
     * Initialize a new instance of ExpressionLexer.
83
     *
84
     * @param  string         $expression Expression to parse
85
     * @throws ODataException
86
     */
87
    public function __construct(string $expression)
88
    {
89
        $this->text    = $expression;
90
        $this->textLen = strlen($this->text);
91
        $this->token   = new ExpressionToken();
92
        $this->setTextPos(0);
93
        $this->nextToken();
94
    }
95
96
    /**
97
     * Set the text position.
98
     *
99
     * @param int $pos Value to position
100
     */
101
    private function setTextPos(int $pos): void
102
    {
103
        $this->textPos = $pos;
104
        $nextChar      = $this->textPos < $this->textLen ? $this->text[$this->textPos] : '\0';
105
        assert(2 >= strlen($nextChar));
106
        $this->ch = $nextChar;
107
    }
108
109
    /**
110
     * Reads the next token, skipping whitespace as necessary.
111
     * @throws ODataException
112
     */
113
    public function nextToken(): void
114
    {
115
        while (Char::isWhiteSpace($this->ch)) {
116
            $this->nextChar();
117
        }
118
119
        $t        = null;
120
        $tokenPos = $this->textPos;
121
        switch ($this->ch) {
122
            case '(':
123
                $this->nextChar();
124
                $t = ExpressionTokenId::OPENPARAM();
125
                break;
126
            case ')':
127
                $this->nextChar();
128
                $t = ExpressionTokenId::CLOSEPARAM();
129
                break;
130
            case ',':
131
                $this->nextChar();
132
                $t = ExpressionTokenId::COMMA();
133
                break;
134
            case '-':
135
                $hasNext = $this->textPos + 1 < $this->textLen;
136
                if ($hasNext && Char::isDigit($this->text[$this->textPos + 1])) {
137
                    $this->nextChar();
138
                    $t = $this->parseFromDigit();
139
                    if (self::isNumeric($t)) {
140
                        break;
141
                    }
142
                } elseif ($hasNext && $this->text[$tokenPos + 1] == 'I') {
143
                    $this->nextChar();
144
                    $this->parseIdentifier();
145
                    $currentIdentifier = substr($this->text, $tokenPos + 1, $this->textPos - $tokenPos - 1);
146
147
                    if (self::isInfinityLiteralDouble($currentIdentifier)) {
148
                        $t = ExpressionTokenId::DOUBLE_LITERAL();
149
                        break;
150
                    } elseif (self::isInfinityLiteralSingle($currentIdentifier)) {
151
                        $t = ExpressionTokenId::SINGLE_LITERAL();
152
                        break;
153
                    }
154
155
                    // If it looked like '-INF' but wasn't we'll rewind and fall through to a simple '-' token.
156
                }
157
                $this->setTextPos($tokenPos);
158
                $this->nextChar();
159
                $t = ExpressionTokenId::MINUS();
160
                break;
161
            case '=':
162
                $this->nextChar();
163
                $t = ExpressionTokenId::EQUAL();
164
                break;
165
            case '/':
166
                $this->nextChar();
167
                $t = ExpressionTokenId::SLASH();
168
                break;
169
            case '?':
170
                $this->nextChar();
171
                $t = ExpressionTokenId::QUESTION();
172
                break;
173
            case '.':
174
                $this->nextChar();
175
                $t = ExpressionTokenId::DOT();
176
                break;
177
            case '\'':
178
                $quote = $this->ch;
179
                do {
180
                    $this->nextChar();
181
                    while ($this->textPos < $this->textLen && $this->ch != $quote) {
182
                        $this->nextChar();
183
                    }
184
185
                    if ($this->textPos == $this->textLen) {
186
                        $this->parseError(
187
                            Messages::expressionLexerUnterminatedStringLiteral(
188
                                $this->textPos,
189
                                $this->text
190
                            )
191
                        );
192
                    }
193
194
                    $this->nextChar();
195
                } while ($this->ch == $quote);
196
                $t = ExpressionTokenId::STRING_LITERAL();
197
                break;
198
            case '*':
199
                $this->nextChar();
200
                $t = ExpressionTokenId::STAR();
201
                break;
202
            default:
203
                if (Char::isLetter($this->ch) || $this->ch == '_') {
204
                    $this->parseIdentifier();
205
                    $t = ExpressionTokenId::IDENTIFIER();
206
                    break;
207
                }
208
209
                if (Char::isDigit($this->ch)) {
210
                    $t = $this->parseFromDigit();
211
                    break;
212
                }
213
214
                if ($this->textPos == $this->textLen) {
215
                    $t = ExpressionTokenId::END();
216
                    break;
217
                }
218
219
                $this->parseError(
220
                    Messages::expressionLexerInvalidCharacter(
221
                        $this->ch,
222
                        $this->textPos
223
                    )
224
                );
225
        }
226
227
        $this->token->setId($t);
228
        $this->token->Text     = substr($this->text, $tokenPos, $this->textPos - $tokenPos);
229
        $this->token->Position = $tokenPos;
230
231
        // Handle type-prefixed literals such as binary, datetime or guid.
232
        $this->handleTypePrefixedLiterals();
233
234
        // Handle keywords.
235
        if ($this->token->getId() == ExpressionTokenId::IDENTIFIER()) {
236
            if (self::isInfinityOrNaNDouble($this->token->Text)) {
237
                $this->token->setId(ExpressionTokenId::DOUBLE_LITERAL());
238
            } elseif (self::isInfinityOrNanSingle($this->token->Text)) {
239
                $this->token->setId(ExpressionTokenId::SINGLE_LITERAL());
240
            } elseif ($this->token->Text == ODataConstants::KEYWORD_TRUE
241
                || $this->token->Text == ODataConstants::KEYWORD_FALSE
242
            ) {
243
                $this->token->setId(ExpressionTokenId::BOOLEAN_LITERAL());
244
            } elseif ($this->token->Text == ODataConstants::KEYWORD_NULL) {
245
                $this->token->setId(ExpressionTokenId::NULL_LITERAL());
246
            }
247
        }
248
    }
249
250
    /**
251
     * Advance to next character.
252
     */
253
    private function nextChar(): void
254
    {
255
        if ($this->textPos < $this->textLen) {
256
            ++$this->textPos;
257
        }
258
259
        $nextChar = $this->textPos < $this->textLen ? $this->text[$this->textPos] : '\0';
260
        assert(2 >= strlen($nextChar));
261
        $this->ch = $nextChar;
262
    }
263
264
    /**
265
     * Parses a token that starts with a digit.
266
     *
267
     * @throws ODataException
268
     * @return ExpressionTokenId The kind of token recognized
269
     */
270
    private function parseFromDigit(): ExpressionTokenId
271
    {
272
        $startChar = $this->ch;
273
        $this->nextChar();
274
        if ($startChar == '0' && $this->ch == 'x' || $this->ch == 'X') {
0 ignored issues
show
introduced by
Consider adding parentheses for clarity. Current Interpretation: ($startChar == '0' && $t...x') || $this->ch == 'X', Probably Intended Meaning: $startChar == '0' && ($t...x' || $this->ch == 'X')
Loading history...
275
            $result = ExpressionTokenId::BINARY_LITERAL();
276
            do {
277
                $this->nextChar();
278
            } while (ctype_xdigit($this->ch));
279
        } else {
280
            $result = ExpressionTokenId::INTEGER_LITERAL();
281
            while (Char::isDigit($this->ch)) {
282
                $this->nextChar();
283
            }
284
285
            if ($this->ch == '.') {
286
                $result = ExpressionTokenId::DOUBLE_LITERAL();
287
                $this->nextChar();
288
                $this->validateDigit();
289
290
                do {
291
                    $this->nextChar();
292
                } while (Char::isDigit($this->ch));
293
            }
294
295
            if ($this->ch == 'E' || $this->ch == 'e') {
296
                $result = ExpressionTokenId::DOUBLE_LITERAL();
297
                $this->nextChar();
298
                if ($this->ch == '+' || $this->ch == '-') {
299
                    $this->nextChar();
300
                }
301
302
                $this->validateDigit();
303
                do {
304
                    $this->nextChar();
305
                } while (Char::isDigit($this->ch));
306
            }
307
308
            if ($this->ch == 'M' || $this->ch == 'm') {
309
                $result = ExpressionTokenId::DECIMAL_LITERAL();
310
                $this->nextChar();
311
            } elseif ($this->ch == 'd' || $this->ch == 'D') {
312
                $result = ExpressionTokenId::DOUBLE_LITERAL();
313
                $this->nextChar();
314
            } elseif ($this->ch == 'L' || $this->ch == 'l') {
315
                $result = ExpressionTokenId::INT64_LITERAL();
316
                $this->nextChar();
317
            } elseif ($this->ch == 'f' || $this->ch == 'F') {
318
                $result = ExpressionTokenId::SINGLE_LITERAL();
319
                $this->nextChar();
320
            }
321
        }
322
323
        return $result;
324
    }
325
326
    /**
327
     * Validate current character is a digit.
328
     * @throws ODataException
329
     */
330
    private function validateDigit(): void
331
    {
332
        if (!Char::isDigit($this->ch)) {
333
            $this->parseError(Messages::expressionLexerDigitExpected($this->textPos));
334
        }
335
    }
336
337
    /**
338
     * Throws parser error.
339
     *
340
     * @param string $message The error message
341
     *
342
     * @throws ODataException
343
     */
344
    private function parseError(string $message): void
345
    {
346
        throw ODataException::createSyntaxError($message);
347
    }
348
349
    /**
350
     * Whether the specified token identifier is a numeric literal.
351
     *
352
     * @param ExpressionTokenId $id Token identifier to check
353
     *
354
     * @return bool true if it's a numeric literal; false otherwise
355
     */
356
    public static function isNumeric(ExpressionTokenId $id): bool
357
    {
358
        return
359
            $id == ExpressionTokenId::INTEGER_LITERAL()
360
            || $id == ExpressionTokenId::DECIMAL_LITERAL()
361
            || $id == ExpressionTokenId::DOUBLE_LITERAL()
362
            || $id == ExpressionTokenId::INT64_LITERAL()
363
            || $id == ExpressionTokenId::SINGLE_LITERAL();
364
    }
365
366
    /**
367
     * Parses an identifier by advancing the current character.
368
     */
369
    private function parseIdentifier(): void
370
    {
371
        do {
372
            $this->nextChar();
373
        } while (Char::isLetterOrDigit($this->ch) || $this->ch == '_');
374
    }
375
376
    /**
377
     * Check if the parameter ($text) is INF.
378
     *
379
     * @param string $text Text to look in
380
     *
381
     * @return bool true if match found, false otherwise
382
     */
383
    private static function isInfinityLiteralDouble(string $text): bool
384
    {
385
        return strcmp($text, ODataConstants::XML_INFINITY_LITERAL) == 0;
386
    }
387
388
    /**
389
     * Checks whether parameter ($text) EQUALS to 'INFf' or 'INFF' at position.
390
     *
391
     * @param string $text Text to look in
392
     *
393
     * @return bool true if the substring is equal using an ordinal comparison; false otherwise
394
     */
395
    private static function isInfinityLiteralSingle(string $text): bool
396
    {
397
        return strlen($text) == 4
398
            && ($text[3] == self::SINGLE_SUFFIX_LOWER
399
                || $text[3] == self::SINGLE_SUFFIX_UPPER)
400
            && strncmp($text, ODataConstants::XML_INFINITY_LITERAL, 3) == 0;
401
    }
402
403
    /**
404
     * Handles the literals that are prefixed by types.
405
     * This method modified the token field as necessary.
406
     *
407
     *
408
     * @throws ODataException
409
     */
410
    private function handleTypePrefixedLiterals(): void
411
    {
412
        $id = $this->token->getId();
413
        if ($id != ExpressionTokenId::IDENTIFIER()) {
414
            return;
415
        }
416
417
        $quoteFollows = $this->ch == '\'';
418
        if (!$quoteFollows) {
419
            return;
420
        }
421
422
        $tokenText = $this->token->Text;
423
424
        if (strcasecmp('datetime', $tokenText) == 0) {
425
            $id = ExpressionTokenId::DATETIME_LITERAL();
426
        } elseif (strcasecmp('guid', $tokenText) == 0) {
427
            $id = ExpressionTokenId::GUID_LITERAL();
428
        } elseif (strcasecmp('binary', $tokenText) == 0
429
            || strcasecmp('X', $tokenText) == 0
430
            || strcasecmp('x', $tokenText) == 0
431
        ) {
432
            $id = ExpressionTokenId::BINARY_LITERAL();
433
        } else {
434
            return;
435
        }
436
437
        $tokenPos = $this->token->Position;
438
        do {
439
            $this->nextChar();
440
        } while ($this->ch != '\0' && $this->ch != '\'');
441
442
        if ($this->ch == '\0') {
443
            $this->parseError(
444
                Messages::expressionLexerUnterminatedStringLiteral(
445
                    $this->textPos,
446
                    $this->text
447
                )
448
            );
449
        }
450
451
        $this->nextChar();
452
        $this->token->setId($id);
453
        $this->token->Text
454
            = substr($this->text, $tokenPos, $this->textPos - $tokenPos);
455
    }
456
457
    /**
458
     * Check if the parameter ($tokenText) is INF or NaN.
459
     *
460
     * @param string $tokenText Text to look in
461
     *
462
     * @return bool true if match found, false otherwise
463
     */
464
    private static function isInfinityOrNaNDouble(string $tokenText): bool
465
    {
466
        if (strlen($tokenText) == 3) {
467
            if ($tokenText[0] == 'I') {
468
                return self::isInfinityLiteralDouble($tokenText);
469
            } elseif ($tokenText[0] == 'N') {
470
                return strncmp($tokenText, ODataConstants::XML_NAN_LITERAL, 3) == 0;
471
            }
472
        }
473
474
        return false;
475
    }
476
477
    /**
478
     * Checks if the parameter ($tokenText) is INFf/INFF or NaNf/NaNF.
479
     *
480
     * @param string $tokenText Input token
481
     *
482
     * @return bool true if match found, false otherwise
483
     */
484
    private static function isInfinityOrNanSingle(string $tokenText): bool
485
    {
486
        if (strlen($tokenText) == 4) {
487
            if ($tokenText[0] == 'I') {
488
                return self::isInfinityLiteralSingle($tokenText);
489
            } elseif ($tokenText[0] == 'N') {
490
                return ($tokenText[3] == self::SINGLE_SUFFIX_LOWER
491
                        || $tokenText[3] == self::SINGLE_SUFFIX_UPPER)
492
                    && strncmp($tokenText, ODataConstants::XML_NAN_LITERAL, 3) == 0;
493
            }
494
        }
495
496
        return false;
497
    }
498
499
    /**
500
     * To get the expression token being processed.
501
     *
502
     * @return ExpressionToken
503
     */
504
    public function getCurrentToken(): ExpressionToken
505
    {
506
        return $this->token;
507
    }
508
509
    /**
510
     * To set the token being processed.
511
     *
512
     * @param ExpressionToken $token The expression token to set as current
513
     */
514
    public function setCurrentToken(ExpressionToken $token): void
515
    {
516
        $this->token = $token;
517
    }
518
519
    /**
520
     * To get the text being parsed.
521
     *
522
     * @return string
523
     */
524
    public function getExpressionText(): string
525
    {
526
        return $this->text;
527
    }
528
529
    /**
530
     * Position of the current token in the text being parsed.
531
     *
532
     * @return int
533
     */
534
    public function getPosition(): int
535
    {
536
        return $this->token->Position;
537
    }
538
539
    /**
540
     * Returns the next token without advancing the lexer to next token.
541
     *
542
     * @throws ODataException
543
     * @return ExpressionToken
544
     */
545
    public function peekNextToken(): ExpressionToken
546
    {
547
        $savedTextPos = $this->textPos;
548
        assert(2 >= strlen($this->ch));
549
        $savedChar  = $this->ch;
550
        $savedToken = clone $this->token;
551
        $this->nextToken();
552
        $result        = clone $this->token;
553
        $this->textPos = $savedTextPos;
554
        $this->ch      = $savedChar;
555
        $this->token->setId($savedToken->getId());
556
        $this->token->Position = $savedToken->Position;
557
        $this->token->Text     = $savedToken->Text;
558
559
        return $result;
560
    }
561
562
    /**
563
     * Starting from an identifier, reads alternate sequence of dots and identifiers
564
     * and returns the text for it.
565
     *
566
     * @throws ODataException
567
     * @return string         The dotted identifier starting at the current identifier
568
     */
569
    public function readDottedIdentifier(): string
570
    {
571
        $this->validateToken(ExpressionTokenId::IDENTIFIER());
572
        $identifier = $this->token->Text;
573
        $this->nextToken();
574
        while ($this->token->getId() == ExpressionTokenId::DOT()) {
575
            $this->nextToken();
576
            $this->validateToken(ExpressionTokenId::IDENTIFIER());
577
            $identifier = $identifier . '.' . $this->token->Text;
578
            $this->nextToken();
579
        }
580
581
        return $identifier;
582
    }
583
584
    /**
585
     * Validates the current token is of the specified kind.
586
     *
587
     * @param ExpressionTokenId $tokenId Expected token kind
588
     *
589
     * @throws ODataException if current token is not of the
590
     *                        specified kind
591
     */
592
    public function validateToken(ExpressionTokenId $tokenId): void
593
    {
594
        if ($this->token->getId() != $tokenId) {
595
            $this->parseError(Messages::expressionLexerSyntaxError($this->textPos));
596
        }
597
    }
598
}
599