Passed
Pull Request — master (#196)
by Christoffer
02:41
created

Lexer::lexNumber()   C

Complexity

Conditions 10
Paths 26

Size

Total Lines 55
Code Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 55
rs 6.8372
c 0
b 0
f 0
cc 10
eloc 31
nc 26
nop 4

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace Digia\GraphQL\Language;
4
5
use Digia\GraphQL\Error\SyntaxErrorException;
6
7
class Lexer implements LexerInterface
8
{
9
    protected const ENCODING = 'UTF-8';
10
11
    /**
12
     * A map between punctuation character code and the corresponding token kind.
13
     *
14
     * @var array
15
     */
16
    protected static $codeTokenKindMap = [
17
        33  => TokenKindEnum::BANG,
18
        36  => TokenKindEnum::DOLLAR,
19
        38  => TokenKindEnum::AMP,
20
        40  => TokenKindEnum::PAREN_L,
21
        41  => TokenKindEnum::PAREN_R,
22
        58  => TokenKindEnum::COLON,
23
        61  => TokenKindEnum::EQUALS,
24
        64  => TokenKindEnum::AT,
25
        91  => TokenKindEnum::BRACKET_L,
26
        93  => TokenKindEnum::BRACKET_R,
27
        123 => TokenKindEnum::BRACE_L,
28
        124 => TokenKindEnum::PIPE,
29
        125 => TokenKindEnum::BRACE_R,
30
    ];
31
32
    /**
33
     * The source file for this lexer.
34
     *
35
     * @var Source
36
     */
37
    protected $source;
38
39
    /**
40
     * The contents of the source file.
41
     *
42
     * @var string
43
     */
44
    protected $body;
45
46
    /**
47
     * The total number of characters in the source file.
48
     *
49
     * @var int
50
     */
51
    protected $bodyLength;
52
53
    /**
54
     * The options for this lexer.
55
     *
56
     * @var array
57
     */
58
    protected $options = [];
59
60
    /**
61
     * The previously focused non-ignored token.
62
     *
63
     * @var Token
64
     */
65
    protected $lastToken;
66
67
    /**
68
     * The currently focused non-ignored token.
69
     *
70
     * @var Token
71
     */
72
    protected $token;
73
74
    /**
75
     * The current position.
76
     *
77
     * @var int
78
     */
79
    protected $pos;
80
81
    /**
82
     * The (1-indexed) line containing the current token.
83
     *
84
     * @var int
85
     */
86
    protected $line;
87
88
    /**
89
     * The character offset at which the current line begins.
90
     *
91
     * @var int
92
     */
93
    protected $lineStart;
94
95
    /**
96
     * @var array
97
     */
98
    protected static $charCodeCache = [];
99
100
    /**
101
     * Lexer constructor.
102
     * @param Source|null $source
103
     * @param array       $options
104
     */
105
    public function __construct(Source $source, array $options)
106
    {
107
        $startOfFileToken = $this->createStartOfFileToken();
108
109
        $this->lastToken  = $startOfFileToken;
110
        $this->token      = $startOfFileToken;
111
        $this->line       = 1;
112
        $this->lineStart  = 0;
113
        $this->body       = $source->getBody();
114
        $this->bodyLength = \strlen($this->body);
115
        $this->source     = $source;
116
        $this->options    = $options;
117
    }
118
119
    /**
120
     * @inheritdoc
121
     * @throws SyntaxErrorException
122
     */
123
    public function advance(): Token
124
    {
125
        $this->lastToken = $this->token;
126
        return $this->token = $this->lookahead();
127
    }
128
129
    /**
130
     * @inheritdoc
131
     * @throws SyntaxErrorException
132
     */
133
    public function lookahead(): Token
134
    {
135
        $token = $this->token;
136
137
        if (TokenKindEnum::EOF !== $token->getKind()) {
138
            do {
139
                $next = $this->readToken($token);
140
                $token->setNext($next);
141
                $token = $next;
142
            } while (TokenKindEnum::COMMENT === $token->getKind());
143
        }
144
145
        return $token;
146
    }
147
148
    /**
149
     * @inheritdoc
150
     */
151
    public function getOption(string $name, $default = null)
152
    {
153
        return $this->options[$name] ?? $default;
154
    }
155
156
    /**
157
     * @inheritdoc
158
     */
159
    public function getTokenKind(): string
160
    {
161
        return $this->token->getKind();
162
    }
163
164
    /**
165
     * @inheritdoc
166
     */
167
    public function getTokenValue(): ?string
168
    {
169
        return $this->token->getValue();
170
    }
171
172
    /**
173
     * @inheritdoc
174
     */
175
    public function getToken(): Token
176
    {
177
        return $this->token;
178
    }
179
180
    /**
181
     * @inheritdoc
182
     */
183
    public function getSource(): Source
184
    {
185
        return $this->source;
186
    }
187
188
    /**
189
     * @inheritdoc
190
     */
191
    public function getLastToken(): Token
192
    {
193
        return $this->lastToken;
194
    }
195
196
    /**
197
     * @inheritdoc
198
     */
199
    public function createSyntaxErrorException(?string $description = null): SyntaxErrorException
200
    {
201
        return new SyntaxErrorException(
202
            $this->source,
203
            $this->pos,
204
            $description ?? $this->unexpectedCharacterMessage($this->readCharCode($this->pos))
205
        );
206
    }
207
208
    /**
209
     * Reads the token after the given token.
210
     *
211
     * @param Token $prev
212
     * @return Token
213
     * @throws SyntaxErrorException
214
     */
215
    protected function readToken(Token $prev): Token
216
    {
217
        $this->pos = $prev->getEnd();
218
219
        $this->skipWhitespace();
220
221
        $line = $this->line;
222
        $col  = (1 + $this->pos) - $this->lineStart;
223
224
        if ($this->pos >= $this->bodyLength) {
225
            return $this->createEndOfFileToken($line, $col, $prev);
226
        }
227
228
        $code = $this->readCharCode($this->pos);
229
230
        // Punctuation: [!$&:=@|()\[\]{}]{1}
231
        if (33 === $code || 36 === $code || 38 === $code || 58 === $code || 61 === $code || 64 === $code || 124 === $code ||
232
            40 === $code || 41 === $code || 91 === $code || 93 === $code || 123 === $code || 125 === $code) {
233
            return $this->lexPunctuation($code, $line, $col, $prev);
234
        }
235
236
        // Comment: #[\u0009\u0020-\uFFFF]*
237
        if (35 === $code) {
238
            return $this->lexComment($line, $col, $prev);
239
        }
240
241
        // Int:   -?(0|[1-9][0-9]*)
242
        // Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
243
        if (45 === $code || isNumber($code)) {
244
            return $this->lexNumber($code, $line, $col, $prev);
245
        }
246
247
        // Name: [_A-Za-z][_0-9A-Za-z]*
248
        if (isAlphaNumeric($code)) {
249
            return $this->lexName($line, $col, $prev);
250
        }
251
252
        // Spread: ...
253
        if ($this->bodyLength >= 3 && $this->isSpread($code)) {
254
            return $this->lexSpread($line, $col, $prev);
255
        }
256
257
        // String: "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
258
        if ($this->isString($code)) {
259
            return $this->lexString($line, $col, $prev);
260
        }
261
262
        // Block String: """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
263
        if ($this->bodyLength >= 3 && $this->isTripleQuote($code)) {
264
            return $this->lexBlockString($line, $col, $prev);
265
        }
266
267
        throw $this->createSyntaxErrorException();
268
    }
269
270
    /**
271
     * @return Token
272
     */
273
    protected function createStartOfFileToken(): Token
274
    {
275
        return new Token(TokenKindEnum::SOF);
276
    }
277
278
    /**
279
     * Creates an End Of File (EOF) token.
280
     *
281
     * @param int   $line
282
     * @param int   $col
283
     * @param Token $prev
284
     * @return Token
285
     */
286
    protected function createEndOfFileToken(int $line, int $col, Token $prev): Token
287
    {
288
        return new Token(TokenKindEnum::EOF, $this->bodyLength, $this->bodyLength, $line, $col, $prev);
289
    }
290
291
    /**
292
     * Reads a punctuation token from the source file.
293
     *
294
     * @param int   $code
295
     * @param int   $line
296
     * @param int   $col
297
     * @param Token $prev
298
     * @return Token
299
     * @throws SyntaxErrorException
300
     */
301
    protected function lexPunctuation(int $code, int $line, int $col, Token $prev): ?Token
302
    {
303
        if (!isset(self::$codeTokenKindMap[$code])) {
304
            throw $this->createSyntaxErrorException();
305
        }
306
307
        return new Token(self::$codeTokenKindMap[$code], $this->pos, $this->pos + 1, $line, $col, $prev);
308
    }
309
310
    /**
311
     * Reads a name token from the source file.
312
     *
313
     * @param int   $line
314
     * @param int   $col
315
     * @param Token $prev
316
     * @return Token
317
     */
318
    protected function lexName(int $line, int $col, Token $prev): Token
319
    {
320
        $start = $this->pos;
321
322
        ++$this->pos;
323
324
        while ($this->pos !== $this->bodyLength &&
325
            ($code = $this->readCharCode($this->pos)) !== null &&
326
            isAlphaNumeric($code)) {
327
            ++$this->pos;
328
        }
329
330
        $value = sliceString($this->body, $start, $this->pos);
331
332
        return new Token(TokenKindEnum::NAME, $start, $this->pos, $line, $col, $prev, $value);
333
    }
334
335
    /**
336
     * Reads a number (int or float) token from the source file.
337
     *
338
     * @param int   $code
339
     * @param int   $line
340
     * @param int   $col
341
     * @param Token $prev
342
     * @return Token
343
     * @throws SyntaxErrorException
344
     */
345
    protected function lexNumber(int $code, int $line, int $col, Token $prev): Token
346
    {
347
        $start   = $this->pos;
348
        $isFloat = false;
349
350
        if (45 === $code) {
351
            // -
352
            $code = $this->readCharCode(++$this->pos);
353
        }
354
355
        if (48 === $code) {
356
            // 0
357
            $code = $this->readCharCode(++$this->pos);
358
359
            if (isNumber($code)) {
360
                throw $this->createSyntaxErrorException(
361
                    \sprintf('Invalid number, unexpected digit after 0: %s.', printCharCode($code))
362
                );
363
            }
364
        } else {
365
            $this->skipDigits($code);
366
            $code = $this->readCharCode($this->pos);
367
        }
368
369
        if (46 === $code) {
370
            // .
371
            $isFloat = true;
372
373
            $code = $this->readCharCode(++$this->pos);
374
            $this->skipDigits($code);
375
            $code = $this->readCharCode($this->pos);
376
        }
377
378
        if (69 === $code || 101 === $code) {
379
            // e or E
380
            $isFloat = true;
381
382
            $code = $this->readCharCode(++$this->pos);
383
384
            if (43 === $code || 45 === $code) {
385
                // + or -
386
                $code = $this->readCharCode(++$this->pos);
387
            }
388
389
            $this->skipDigits($code);
390
        }
391
392
        return new Token(
393
            $isFloat ? TokenKindEnum::FLOAT : TokenKindEnum::INT,
394
            $start,
395
            $this->pos,
396
            $line,
397
            $col,
398
            $prev,
399
            sliceString($this->body, $start, $this->pos)
400
        );
401
    }
402
403
    /**
404
     * Skips digits at the current position.
405
     *
406
     * @param int $code
407
     * @throws SyntaxErrorException
408
     */
409
    protected function skipDigits(int $code): void
410
    {
411
        if (isNumber($code)) {
412
            do {
413
                $code = $this->readCharCode(++$this->pos);
414
            } while (isNumber($code));
415
416
            return;
417
        }
418
419
        throw $this->createSyntaxErrorException(
420
            \sprintf('Invalid number, expected digit but got: %s.', printCharCode($code))
421
        );
422
    }
423
424
    /**
425
     * Reads a comment token from the source file.
426
     *
427
     * @param int   $line
428
     * @param int   $col
429
     * @param Token $prev
430
     * @return Token
431
     */
432
    protected function lexComment(int $line, int $col, Token $prev): Token
433
    {
434
        $start = $this->pos;
435
436
        do {
437
            $code = $this->readCharCode(++$this->pos);
438
        } while ($code !== null && ($code > 0x001f || 0x0009 === $code)); // SourceCharacter but not LineTerminator
439
440
        return new Token(
441
            TokenKindEnum::COMMENT,
442
            $start,
443
            $this->pos,
444
            $line,
445
            $col,
446
            $prev,
447
            sliceString($this->body, $start + 1, $this->pos)
448
        );
449
    }
450
451
    /**
452
     * Reads a spread token from the source.
453
     *
454
     * @param int   $line
455
     * @param int   $col
456
     * @param Token $prev
457
     * @return Token
458
     */
459
    protected function lexSpread(int $line, int $col, Token $prev): Token
460
    {
461
        return new Token(TokenKindEnum::SPREAD, $this->pos, $this->pos + 3, $line, $col, $prev);
462
    }
463
464
    /**
465
     * Reads a string token from the source.
466
     *
467
     * @param int   $line
468
     * @param int   $col
469
     * @param Token $prev
470
     * @return Token
471
     * @throws SyntaxErrorException
472
     */
473
    protected function lexString(int $line, int $col, Token $prev): Token
474
    {
475
        $start      = $this->pos;
476
        $chunkStart = ++$this->pos; // skip the quote
477
        $value      = '';
478
479
        while ($this->pos < $this->bodyLength &&
480
            ($code = $this->readCharCode($this->pos)) !== null && !isLineTerminator($code)) {
481
            // Closing Quote (")
482
            if (34 === $code) {
483
                $value .= sliceString($this->body, $chunkStart, $this->pos);
484
                return new Token(TokenKindEnum::STRING, $start, $this->pos + 1, $line, $col, $prev, $value);
485
            }
486
487
            if (isSourceCharacter($code)) {
488
                throw $this->createSyntaxErrorException(
489
                    \sprintf('Invalid character within String: %s.', printCharCode($code))
490
                );
491
            }
492
493
            ++$this->pos;
494
495
            if (92 === $code) {
496
                // \
497
                $value .= sliceString($this->body, $chunkStart, $this->pos - 1);
498
499
                $code = $this->readCharCode($this->pos);
500
501
                switch ($code) {
502
                    case 34: // "
503
                        $value .= '"';
504
                        break;
505
                    case 47: // /
506
                        $value .= '/';
507
                        break;
508
                    case 92: // \
509
                        $value .= '\\';
510
                        break;
511
                    case 98: // b
512
                        $value .= '\b';
513
                        break;
514
                    case 102: // f
515
                        $value .= '\f';
516
                        break;
517
                    case 110: // n
518
                        $value .= '\n';
519
                        break;
520
                    case 114: // r
521
                        $value .= '\r';
522
                        break;
523
                    case 116: // t
524
                        $value .= '\t';
525
                        break;
526
                    case 117: // u
527
                        $unicodeString = sliceString($this->body, $this->pos + 1, $this->pos + 5);
528
529
                        if (!\preg_match('/[0-9A-Fa-f]{4}/', $unicodeString)) {
530
                            throw $this->createSyntaxErrorException(
531
                                \sprintf('Invalid character escape sequence: \\u%s.', $unicodeString)
532
                            );
533
                        }
534
535
                        $value     .= '\\u' . $unicodeString;
536
                        $this->pos += 4;
537
538
                        break;
539
                    default:
540
                        throw $this->createSyntaxErrorException(
541
                            \sprintf('Invalid character escape sequence: \\%s.', \chr($code))
542
                        );
543
                }
544
545
                ++$this->pos;
546
547
                $chunkStart = $this->pos;
548
            }
549
        }
550
551
        throw $this->createSyntaxErrorException('Unterminated string.');
552
    }
553
554
    /**
555
     * Reads a block string token from the source file.
556
     *
557
     * @param int   $line
558
     * @param int   $col
559
     * @param Token $prev
560
     * @return Token
561
     * @throws SyntaxErrorException
562
     */
563
    protected function lexBlockString(int $line, int $col, Token $prev): Token
564
    {
565
        $start      = $this->pos;
566
        $this->pos  = $start + 3; // skip the triple-quote
567
        $chunkStart = $this->pos;
568
        $rawValue   = '';
569
570
        while ($this->pos < $this->bodyLength && ($code = $this->readCharCode($this->pos)) !== null) {
571
            // Closing Triple-Quote (""")
572
            if ($this->isTripleQuote($code)) {
573
                $rawValue .= sliceString($this->body, $chunkStart, $this->pos);
574
                return new Token(
575
                    TokenKindEnum::BLOCK_STRING,
576
                    $start,
577
                    $this->pos + 3,
578
                    $line,
579
                    $col,
580
                    $prev,
581
                    blockStringValue($rawValue)
582
                );
583
            }
584
585
            if (isSourceCharacter($code) && !isLineTerminator($code)) {
586
                throw $this->createSyntaxErrorException(
587
                    \sprintf('Invalid character within String: %s.', printCharCode($code))
588
                );
589
            }
590
591
            if ($this->isEscapedTripleQuote($code)) {
592
                $rawValue   .= sliceString($this->body, $chunkStart, $this->pos) . '"""';
593
                $this->pos  += 4;
594
                $chunkStart = $this->pos;
595
            } else {
596
                ++$this->pos;
597
            }
598
        }
599
600
        throw $this->createSyntaxErrorException('Unterminated string.');
601
    }
602
603
    /**
604
     * Skips whitespace at the current position.
605
     */
606
    protected function skipWhitespace(): void
607
    {
608
        while ($this->pos < $this->bodyLength) {
609
            $code = $this->readCharCode($this->pos);
610
611
            if (9 === $code || 32 === $code || 44 === $code || 0xfeff === $code) {
612
                // tab | space | comma | BOM
613
                ++$this->pos;
614
            } elseif (10 === $code) {
615
                // new line (\n)
616
                ++$this->pos;
617
                ++$this->line;
618
                $this->lineStart = $this->pos;
619
            } elseif (13 === $code) {
620
                // carriage return (\r)
621
                if (10 === $this->readCharCode($this->pos + 1)) {
622
                    // carriage return and new line (\r\n)
623
                    $this->pos += 2;
624
                } else {
625
                    ++$this->pos;
626
                }
627
                ++$this->line;
628
                $this->lineStart = $this->pos;
629
            } else {
630
                break;
631
            }
632
        }
633
    }
634
635
    /**
636
     * @param int $pos
637
     * @return int
638
     */
639
    protected function readCharCode(int $pos): int
640
    {
641
        $char = \mb_substr($this->body, $pos, 1, self::ENCODING);
642
643
        if ('' === $char) {
644
            return 0;
645
        }
646
647
        if (!isset(self::$charCodeCache[$char])) {
648
            self::$charCodeCache[$char] = \mb_ord($char, self::ENCODING);
649
        }
650
651
        return self::$charCodeCache[$char];
652
    }
653
654
    /**
655
     * Report a message that an unexpected character was encountered.
656
     *
657
     * @param int $code
658
     * @return string
659
     */
660
    protected function unexpectedCharacterMessage(int $code): string
661
    {
662
        if (isSourceCharacter($code) && !isLineTerminator($code)) {
663
            return \sprintf('Cannot contain the invalid character %s.', printCharCode($code));
664
        }
665
666
        if ($code === 39) {
667
            // '
668
            return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
669
        }
670
671
        return \sprintf('Cannot parse the unexpected character %s.', printCharCode($code));
672
    }
673
674
    /**
675
     * @param int $code
676
     * @return bool
677
     */
678
    protected function isSpread(int $code): bool
679
    {
680
        return 46 === $code &&
681
            $this->readCharCode($this->pos + 1) === 46 &&
682
            $this->readCharCode($this->pos + 2) === 46; // ...
683
    }
684
685
    /**
686
     * @param int $code
687
     * @return bool
688
     */
689
    protected function isString(int $code): bool
690
    {
691
        return 34 === $code && $this->readCharCode($this->pos + 1) !== 34;
692
    }
693
694
    /**
695
     * @param int $code
696
     * @return bool
697
     */
698
    protected function isTripleQuote(int $code): bool
699
    {
700
        return 34 === $code &&
701
            34 === $this->readCharCode($this->pos + 1) &&
702
            34 === $this->readCharCode($this->pos + 2); // """
703
    }
704
705
    /**
706
     * @param int $code
707
     * @return bool
708
     */
709
    protected function isEscapedTripleQuote(int $code): bool
710
    {
711
        return $code === 92 &&
712
            34 === $this->readCharCode($this->pos + 1) &&
713
            34 === $this->readCharCode($this->pos + 2) &&
714
            34 === $this->readCharCode($this->pos + 3); // \"""
715
    }
716
}
717