Passed
Push — master ( 16c004...87d406 )
by Christoffer
02:33
created

Lexer::lexSpread()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 3
rs 10
c 0
b 0
f 0
cc 1
eloc 1
nc 1
nop 3
1
<?php
2
3
namespace Digia\GraphQL\Language;
4
5
use Digia\GraphQL\Error\SyntaxErrorException;
6
7
class Lexer implements LexerInterface
8
{
9
    protected const ENCODING = 'UTF-8';
10
11
    /**
12
     * A map between punctuation character code and the corresponding token kind.
13
     *
14
     * @var array
15
     */
16
    protected static $codeTokenKindMap = [
17
        33  => TokenKindEnum::BANG,
18
        36  => TokenKindEnum::DOLLAR,
19
        38  => TokenKindEnum::AMP,
20
        40  => TokenKindEnum::PAREN_L,
21
        41  => TokenKindEnum::PAREN_R,
22
        58  => TokenKindEnum::COLON,
23
        61  => TokenKindEnum::EQUALS,
24
        64  => TokenKindEnum::AT,
25
        91  => TokenKindEnum::BRACKET_L,
26
        93  => TokenKindEnum::BRACKET_R,
27
        123 => TokenKindEnum::BRACE_L,
28
        124 => TokenKindEnum::PIPE,
29
        125 => TokenKindEnum::BRACE_R,
30
    ];
31
32
    /**
33
     * The source file for this lexer.
34
     *
35
     * @var Source
36
     */
37
    protected $source;
38
39
    /**
40
     * The contents of the source file.
41
     *
42
     * @var string
43
     */
44
    protected $body;
45
46
    /**
47
     * The total number of characters in the source file.
48
     *
49
     * @var int
50
     */
51
    protected $bodyLength;
52
53
    /**
54
     * The options for this lexer.
55
     *
56
     * @var array
57
     */
58
    protected $options = [];
59
60
    /**
61
     * The previously focused non-ignored token.
62
     *
63
     * @var Token
64
     */
65
    protected $lastToken;
66
67
    /**
68
     * The currently focused non-ignored token.
69
     *
70
     * @var Token
71
     */
72
    protected $token;
73
74
    /**
75
     * The current position.
76
     *
77
     * @var int
78
     */
79
    protected $position;
80
81
    /**
82
     * The (1-indexed) line containing the current token.
83
     *
84
     * @var int
85
     */
86
    protected $line;
87
88
    /**
89
     * The character offset at which the current line begins.
90
     *
91
     * @var int
92
     */
93
    protected $lineStart;
94
95
    /**
96
     * A key-value map over characters and their corresponding character codes.
97
     *
98
     * @var array
99
     */
100
    protected static $charCodeCache = [];
101
102
    /**
103
     * Lexer constructor.
104
     * @param Source|null $source
105
     * @param array       $options
106
     */
107
    public function __construct(Source $source, array $options)
108
    {
109
        $startOfFileToken = $this->createStartOfFileToken();
110
111
        $this->lastToken  = $startOfFileToken;
112
        $this->token      = $startOfFileToken;
113
        $this->line       = 1;
114
        $this->lineStart  = 0;
115
        $this->body       = $source->getBody();
116
        $this->bodyLength = \strlen($this->body);
117
        $this->source     = $source;
118
        $this->options    = $options;
119
    }
120
121
    /**
122
     * @inheritdoc
123
     * @throws SyntaxErrorException
124
     */
125
    public function advance(): Token
126
    {
127
        $this->lastToken = $this->token;
128
        return $this->token = $this->lookahead();
129
    }
130
131
    /**
132
     * @inheritdoc
133
     * @throws SyntaxErrorException
134
     */
135
    public function lookahead(): Token
136
    {
137
        $token = $this->token;
138
139
        if (TokenKindEnum::EOF !== $token->getKind()) {
140
            do {
141
                $next = $this->readToken($token);
142
                $token->setNext($next);
143
                $token = $next;
144
            } while (TokenKindEnum::COMMENT === $token->getKind());
145
        }
146
147
        return $token;
148
    }
149
150
    /**
151
     * @inheritdoc
152
     */
153
    public function getOption(string $name, $default = null)
154
    {
155
        return $this->options[$name] ?? $default;
156
    }
157
158
    /**
159
     * @inheritdoc
160
     */
161
    public function getSource(): Source
162
    {
163
        return $this->source;
164
    }
165
166
    /**
167
     * @inheritdoc
168
     */
169
    public function getToken(): Token
170
    {
171
        return $this->token;
172
    }
173
174
    /**
175
     * @inheritdoc
176
     */
177
    public function getLastToken(): Token
178
    {
179
        return $this->lastToken;
180
    }
181
182
    /**
183
     * @inheritdoc
184
     */
185
    public function createSyntaxErrorException(?string $description = null): SyntaxErrorException
186
    {
187
        return new SyntaxErrorException(
188
            $this->source,
189
            $this->position,
190
            $description ?? $this->unexpectedCharacterMessage($this->readCharCode($this->position))
191
        );
192
    }
193
194
    /**
195
     * Reads the token after the given token.
196
     *
197
     * @param Token $prev
198
     * @return Token
199
     * @throws SyntaxErrorException
200
     */
201
    protected function readToken(Token $prev): Token
202
    {
203
        $this->position = $prev->getEnd();
204
205
        $this->skipWhitespace();
206
207
        $line   = $this->line;
208
        $column = (1 + $this->position) - $this->lineStart;
209
210
        if ($this->position >= $this->bodyLength) {
211
            return $this->createEndOfFileToken($line, $column, $prev);
212
        }
213
214
        $code = $this->readCharCode($this->position);
215
216
        // Punctuation: [!$&:=@|()\[\]{}]{1}
217
        if (33 === $code || 36 === $code || 38 === $code || 58 === $code || 61 === $code || 64 === $code || 124 === $code ||
218
            40 === $code || 41 === $code || 91 === $code || 93 === $code || 123 === $code || 125 === $code) {
219
            return $this->lexPunctuation($code, $line, $column, $prev);
220
        }
221
222
        // Comment: #[\u0009\u0020-\uFFFF]*
223
        if (35 === $code) {
224
            return $this->lexComment($line, $column, $prev);
225
        }
226
227
        // Int:   -?(0|[1-9][0-9]*)
228
        // Float: -?(0|[1-9][0-9]*)(\.[0-9]+)?((E|e)(+|-)?[0-9]+)?
229
        if (45 === $code || isNumber($code)) {
230
            return $this->lexNumber($code, $line, $column, $prev);
231
        }
232
233
        // Name: [_A-Za-z][_0-9A-Za-z]*
234
        if (isAlphaNumeric($code)) {
235
            return $this->lexName($line, $column, $prev);
236
        }
237
238
        // Spread: ...
239
        if ($this->bodyLength >= 3 && $this->isSpread($code)) {
240
            return $this->lexSpread($line, $column, $prev);
241
        }
242
243
        // String: "([^"\\\u000A\u000D]|(\\(u[0-9a-fA-F]{4}|["\\/bfnrt])))*"
244
        if ($this->isString($code)) {
245
            return $this->lexString($line, $column, $prev);
246
        }
247
248
        // Block String: """("?"?(\\"""|\\(?!=""")|[^"\\]))*"""
249
        if ($this->bodyLength >= 3 && $this->isTripleQuote($code)) {
250
            return $this->lexBlockString($line, $column, $prev);
251
        }
252
253
        throw $this->createSyntaxErrorException();
254
    }
255
256
    /**
257
     * @return Token
258
     */
259
    protected function createStartOfFileToken(): Token
260
    {
261
        return new Token(TokenKindEnum::SOF);
262
    }
263
264
    /**
265
     * Creates an End Of File (EOF) token.
266
     *
267
     * @param int   $line
268
     * @param int   $column
269
     * @param Token $prev
270
     * @return Token
271
     */
272
    protected function createEndOfFileToken(int $line, int $column, Token $prev): Token
273
    {
274
        return new Token(TokenKindEnum::EOF, $this->bodyLength, $this->bodyLength, $line, $column, $prev);
275
    }
276
277
    /**
278
     * Reads a punctuation token from the source file.
279
     *
280
     * @param int   $code
281
     * @param int   $line
282
     * @param int   $column
283
     * @param Token $prev
284
     * @return Token
285
     * @throws SyntaxErrorException
286
     */
287
    protected function lexPunctuation(int $code, int $line, int $column, Token $prev): ?Token
288
    {
289
        if (!isset(self::$codeTokenKindMap[$code])) {
290
            throw $this->createSyntaxErrorException();
291
        }
292
293
        return new Token(self::$codeTokenKindMap[$code], $this->position, $this->position + 1, $line, $column, $prev);
294
    }
295
296
    /**
297
     * Reads a name token from the source file.
298
     *
299
     * @param int   $line
300
     * @param int   $column
301
     * @param Token $prev
302
     * @return Token
303
     */
304
    protected function lexName(int $line, int $column, Token $prev): Token
305
    {
306
        $start = $this->position;
307
308
        ++$this->position;
309
310
        while ($this->position !== $this->bodyLength &&
311
            ($code = $this->readCharCode($this->position)) !== null &&
312
            isAlphaNumeric($code)) {
313
            ++$this->position;
314
        }
315
316
        $value = sliceString($this->body, $start, $this->position);
317
318
        return new Token(TokenKindEnum::NAME, $start, $this->position, $line, $column, $prev, $value);
319
    }
320
321
    /**
322
     * Reads a number (int or float) token from the source file.
323
     *
324
     * @param int   $code
325
     * @param int   $line
326
     * @param int   $column
327
     * @param Token $prev
328
     * @return Token
329
     * @throws SyntaxErrorException
330
     */
331
    protected function lexNumber(int $code, int $line, int $column, Token $prev): Token
332
    {
333
        $start   = $this->position;
334
        $isFloat = false;
335
336
        if (45 === $code) {
337
            // -
338
            $code = $this->readCharCode(++$this->position);
339
        }
340
341
        if (48 === $code) {
342
            // 0
343
            $code = $this->readCharCode(++$this->position);
344
345
            if (isNumber($code)) {
346
                throw $this->createSyntaxErrorException(
347
                    \sprintf('Invalid number, unexpected digit after 0: %s.', printCharCode($code))
348
                );
349
            }
350
        } else {
351
            $this->skipDigits($code);
352
            $code = $this->readCharCode($this->position);
353
        }
354
355
        if (46 === $code) {
356
            // .
357
            $isFloat = true;
358
359
            $code = $this->readCharCode(++$this->position);
360
            $this->skipDigits($code);
361
            $code = $this->readCharCode($this->position);
362
        }
363
364
        if (69 === $code || 101 === $code) {
365
            // e or E
366
            $isFloat = true;
367
368
            $code = $this->readCharCode(++$this->position);
369
370
            if (43 === $code || 45 === $code) {
371
                // + or -
372
                $code = $this->readCharCode(++$this->position);
373
            }
374
375
            $this->skipDigits($code);
376
        }
377
378
        return new Token(
379
            $isFloat ? TokenKindEnum::FLOAT : TokenKindEnum::INT,
380
            $start,
381
            $this->position,
382
            $line,
383
            $column,
384
            $prev,
385
            sliceString($this->body, $start, $this->position)
386
        );
387
    }
388
389
    /**
390
     * Skips digits at the current position.
391
     *
392
     * @param int $code
393
     * @throws SyntaxErrorException
394
     */
395
    protected function skipDigits(int $code): void
396
    {
397
        if (isNumber($code)) {
398
            do {
399
                $code = $this->readCharCode(++$this->position);
400
            } while (isNumber($code));
401
402
            return;
403
        }
404
405
        throw $this->createSyntaxErrorException(
406
            \sprintf('Invalid number, expected digit but got: %s.', printCharCode($code))
407
        );
408
    }
409
410
    /**
411
     * Reads a comment token from the source file.
412
     *
413
     * @param int   $line
414
     * @param int   $column
415
     * @param Token $prev
416
     * @return Token
417
     */
418
    protected function lexComment(int $line, int $column, Token $prev): Token
419
    {
420
        $start = $this->position;
421
422
        do {
423
            $code = $this->readCharCode(++$this->position);
424
        } while ($code !== null && ($code > 0x001f || 0x0009 === $code)); // SourceCharacter but not LineTerminator
425
426
        return new Token(
427
            TokenKindEnum::COMMENT,
428
            $start,
429
            $this->position,
430
            $line,
431
            $column,
432
            $prev,
433
            sliceString($this->body, $start + 1, $this->position)
434
        );
435
    }
436
437
    /**
438
     * Reads a spread token from the source.
439
     *
440
     * @param int   $line
441
     * @param int   $column
442
     * @param Token $prev
443
     * @return Token
444
     */
445
    protected function lexSpread(int $line, int $column, Token $prev): Token
446
    {
447
        return new Token(TokenKindEnum::SPREAD, $this->position, $this->position + 3, $line, $column, $prev);
448
    }
449
450
    /**
451
     * Reads a string token from the source.
452
     *
453
     * @param int   $line
454
     * @param int   $column
455
     * @param Token $prev
456
     * @return Token
457
     * @throws SyntaxErrorException
458
     */
459
    protected function lexString(int $line, int $column, Token $prev): Token
460
    {
461
        $start      = $this->position;
462
        $chunkStart = ++$this->position; // skip the quote
463
        $value      = '';
464
465
        while ($this->position < $this->bodyLength &&
466
            ($code = $this->readCharCode($this->position)) !== null && !isLineTerminator($code)) {
467
            // Closing Quote (")
468
            if (34 === $code) {
469
                $value .= sliceString($this->body, $chunkStart, $this->position);
470
                return new Token(TokenKindEnum::STRING, $start, $this->position + 1, $line, $column, $prev, $value);
471
            }
472
473
            if (isSourceCharacter($code)) {
474
                throw $this->createSyntaxErrorException(
475
                    \sprintf('Invalid character within String: %s.', printCharCode($code))
476
                );
477
            }
478
479
            ++$this->position;
480
481
            if (92 === $code) {
482
                // \
483
                $value .= sliceString($this->body, $chunkStart, $this->position - 1);
484
485
                $code = $this->readCharCode($this->position);
486
487
                switch ($code) {
488
                    case 34: // "
489
                        $value .= '"';
490
                        break;
491
                    case 47: // /
492
                        $value .= '/';
493
                        break;
494
                    case 92: // \
495
                        $value .= '\\';
496
                        break;
497
                    case 98: // b
498
                        $value .= '\b';
499
                        break;
500
                    case 102: // f
501
                        $value .= '\f';
502
                        break;
503
                    case 110: // n
504
                        $value .= '\n';
505
                        break;
506
                    case 114: // r
507
                        $value .= '\r';
508
                        break;
509
                    case 116: // t
510
                        $value .= '\t';
511
                        break;
512
                    case 117: // u
513
                        $unicodeString = sliceString($this->body, $this->position + 1, $this->position + 5);
514
515
                        if (!\preg_match('/[0-9A-Fa-f]{4}/', $unicodeString)) {
516
                            throw $this->createSyntaxErrorException(
517
                                \sprintf('Invalid character escape sequence: \\u%s.', $unicodeString)
518
                            );
519
                        }
520
521
                        $value .= '\\u' . $unicodeString;
522
523
                        $this->position += 4;
524
525
                        break;
526
                    default:
527
                        throw $this->createSyntaxErrorException(
528
                            \sprintf('Invalid character escape sequence: \\%s.', \chr($code))
529
                        );
530
                }
531
532
                ++$this->position;
533
534
                $chunkStart = $this->position;
535
            }
536
        }
537
538
        throw $this->createSyntaxErrorException('Unterminated string.');
539
    }
540
541
    /**
542
     * Reads a block string token from the source file.
543
     *
544
     * @param int   $line
545
     * @param int   $column
546
     * @param Token $prev
547
     * @return Token
548
     * @throws SyntaxErrorException
549
     */
550
    protected function lexBlockString(int $line, int $column, Token $prev): Token
551
    {
552
        $start          = $this->position;
553
        $this->position = $start + 3; // skip the triple-quote
554
        $chunkStart     = $this->position;
555
        $rawValue       = '';
556
557
        while ($this->position < $this->bodyLength && ($code = $this->readCharCode($this->position)) !== null) {
558
            // Closing Triple-Quote (""")
559
            if ($this->isTripleQuote($code)) {
560
                $rawValue .= sliceString($this->body, $chunkStart, $this->position);
561
                return new Token(
562
                    TokenKindEnum::BLOCK_STRING,
563
                    $start,
564
                    $this->position + 3,
565
                    $line,
566
                    $column,
567
                    $prev,
568
                    blockStringValue($rawValue)
569
                );
570
            }
571
572
            if (isSourceCharacter($code) && !isLineTerminator($code)) {
573
                throw $this->createSyntaxErrorException(
574
                    \sprintf('Invalid character within String: %s.', printCharCode($code))
575
                );
576
            }
577
578
            if ($this->isEscapedTripleQuote($code)) {
579
                $rawValue       .= sliceString($this->body, $chunkStart, $this->position) . '"""';
580
                $this->position += 4;
581
                $chunkStart     = $this->position;
582
            } else {
583
                ++$this->position;
584
            }
585
        }
586
587
        throw $this->createSyntaxErrorException('Unterminated string.');
588
    }
589
590
    /**
591
     * Skips whitespace at the current position.
592
     */
593
    protected function skipWhitespace(): void
594
    {
595
        while ($this->position < $this->bodyLength) {
596
            $code = $this->readCharCode($this->position);
597
598
            if (9 === $code || 32 === $code || 44 === $code || 0xfeff === $code) {
599
                // tab | space | comma | BOM
600
                ++$this->position;
601
            } elseif (10 === $code) {
602
                // new line (\n)
603
                ++$this->position;
604
                ++$this->line;
605
                $this->lineStart = $this->position;
606
            } elseif (13 === $code) {
607
                // carriage return (\r)
608
                if (10 === $this->readCharCode($this->position + 1)) {
609
                    // carriage return and new line (\r\n)
610
                    $this->position += 2;
611
                } else {
612
                    ++$this->position;
613
                }
614
                ++$this->line;
615
                $this->lineStart = $this->position;
616
            } else {
617
                break;
618
            }
619
        }
620
    }
621
622
    /**
623
     * @param int $position
624
     * @return int
625
     */
626
    protected function readCharCode(int $position): int
627
    {
628
        $char = \mb_substr($this->body, $position, 1, self::ENCODING);
629
630
        if ('' === $char) {
631
            return 0;
632
        }
633
634
        if (!isset(self::$charCodeCache[$char])) {
635
            $code = \ord($char);
636
637
            if ($code >= 128) {
638
                $code = \mb_ord($char, self::ENCODING);
639
            }
640
641
            self::$charCodeCache[$char] = $code;
642
        }
643
644
        return self::$charCodeCache[$char];
645
    }
646
647
    /**
648
     * Report a message that an unexpected character was encountered.
649
     *
650
     * @param int $code
651
     * @return string
652
     */
653
    protected function unexpectedCharacterMessage(int $code): string
654
    {
655
        if (isSourceCharacter($code) && !isLineTerminator($code)) {
656
            return \sprintf('Cannot contain the invalid character %s.', printCharCode($code));
657
        }
658
659
        if ($code === 39) {
660
            // '
661
            return 'Unexpected single quote character (\'), did you mean to use a double quote (")?';
662
        }
663
664
        return \sprintf('Cannot parse the unexpected character %s.', printCharCode($code));
665
    }
666
667
    /**
668
     * @param int $code
669
     * @return bool
670
     */
671
    protected function isSpread(int $code): bool
672
    {
673
        return 46 === $code &&
674
            $this->readCharCode($this->position + 1) === 46 &&
675
            $this->readCharCode($this->position + 2) === 46; // ...
676
    }
677
678
    /**
679
     * @param int $code
680
     * @return bool
681
     */
682
    protected function isString(int $code): bool
683
    {
684
        return 34 === $code && $this->readCharCode($this->position + 1) !== 34;
685
    }
686
687
    /**
688
     * @param int $code
689
     * @return bool
690
     */
691
    protected function isTripleQuote(int $code): bool
692
    {
693
        return 34 === $code &&
694
            34 === $this->readCharCode($this->position + 1) &&
695
            34 === $this->readCharCode($this->position + 2); // """
696
    }
697
698
    /**
699
     * @param int $code
700
     * @return bool
701
     */
702
    protected function isEscapedTripleQuote(int $code): bool
703
    {
704
        return $code === 92 &&
705
            34 === $this->readCharCode($this->position + 1) &&
706
            34 === $this->readCharCode($this->position + 2) &&
707
            34 === $this->readCharCode($this->position + 3); // \"""
708
    }
709
}
710