Lexer::nextToken()   C
last analyzed

Complexity

Conditions 14
Paths 175

Size

Total Lines 73
Code Lines 41

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 34
CRAP Score 14.6615

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 14
eloc 41
c 1
b 0
f 0
nc 175
nop 0
dl 0
loc 73
ccs 34
cts 40
cp 0.85
crap 14.6615
rs 5.6416

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Antlr\Antlr4\Runtime;
6
7
use Antlr\Antlr4\Runtime\Atn\LexerATNSimulator;
8
use Antlr\Antlr4\Runtime\Error\Exceptions\LexerNoViableAltException;
9
use Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException;
10
use Antlr\Antlr4\Runtime\Utils\Pair;
11
12
/**
13
 * A lexer is recognizer that draws input symbols from a character stream.
14
 * lexer grammars result in a subclass of this object. A Lexer object
15
 * uses simplified match() and error recovery mechanisms in the interest
16
 * of speed.
17
 */
18
abstract class Lexer extends Recognizer implements TokenSource
19
{
20
    public const DEFAULT_MODE = 0;
21
    public const MORE = -2;
22
    public const SKIP = -3;
23
24
    public const DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
25
    public const HIDDEN = Token::HIDDEN_CHANNEL;
26
    public const MIN_CHAR_VALUE = 0x0000;
27
    public const MAX_CHAR_VALUE = 0x10FFFF;
28
29
    /** @var CharStream|null */
30
    public $input;
31
32
    /** @var Pair Pair<TokenSource, CharStream> */
33
    protected $tokenFactorySourcePair;
34
35
    /** @var TokenFactory */
36
    protected $factory;
37
38
    /**
39
     * The goal of all lexer rules/methods is to create a token object.
40
     * This is an instance variable as multiple rules may collaborate to
41
     * create a single token. `nextToken` will return this object after
42
     * matching lexer rule(s).
43
     *
44
     * If you subclass to allow multiple token emissions, then set this
45
     * to the last token to be matched or something nonnull so that
46
     * the auto token emit mechanism will not emit another token.
47
     *
48
     * @var Token|null
49
     */
50
    public $token;
51
52
    /**
53
     * What character index in the stream did the current token start at?
54
     * Needed, for example, to get the text for current token. Set at
55
     * the start of nextToken.
56
     *
57
     * @var int
58
     */
59
    public $tokenStartCharIndex = -1;
60
61
    /**
62
     * The line on which the first character of the token resides.
63
     *
64
     * @var int
65
     */
66
    public $tokenStartLine = -1;
67
68
    /**
69
     * The character position of first character within the line
70
     *
71
     * @var int
72
     */
73
    public $tokenStartCharPositionInLine = -1;
74
75
    /**
76
     * Once we see EOF on char stream, next token will be EOF.
77
     * If you have DONE : EOF ; then you see DONE EOF.
78
     *
79
     * @var bool
80
     */
81
    public $hitEOF = false;
82
83
    /**
84
     * The channel number for the current token.
85
     *
86
     * @var int
87
     */
88
    public $channel = Token::DEFAULT_CHANNEL;
89
90
    /**
91
     * The token type for the current token.
92
     *
93
     * @var int
94
     */
95
    public $type = Token::INVALID_TYPE;
96
97
    /** @var array<int> */
98
    public $modeStack = [];
99
100
    /** @var int */
101
    public $mode = self::DEFAULT_MODE;
102
103
    /**
104
     * You can set the text for the current token to override what is in the
105
     * input char buffer. Use {@see Lexer::setText()} or can set this instance var.
106
     *
107
     * @var string|null
108
     */
109
    public $text;
110
111
    /** @var LexerATNSimulator|null */
112
    protected $interp;
113
114 7
    public function __construct(?CharStream $input = null)
115
    {
116 7
        parent::__construct();
117
118 7
        $this->input = $input;
119 7
        $this->factory = CommonTokenFactory::default();
120 7
        $this->tokenFactorySourcePair = new Pair($this, $input);
121
122
        // @todo remove this property
123 7
        $this->interp = null;// child classes must populate this
124 7
    }
125
126
    public function reset() : void
127
    {
128
        // wack Lexer state variables
129
        if ($this->input !== null) {
130
            $this->input->seek(0);// rewind the input
131
        }
132
133
        $this->token = null;
134
        $this->type = Token::INVALID_TYPE;
135
        $this->channel = Token::DEFAULT_CHANNEL;
136
        $this->tokenStartCharIndex = -1;
137
        $this->tokenStartCharPositionInLine = -1;
138
        $this->tokenStartLine = -1;
139
        $this->text = null;
140
141
        $this->hitEOF = false;
142
        $this->mode = self::DEFAULT_MODE;
143
        $this->modeStack = [];
144
145
        if ($this->interp !== null) {
146
            $this->interp->reset();
147
        }
148
    }
149
150
    /**
151
     * Return a token from this source; i.e., match a token on the char stream.
152
     */
153 7
    public function nextToken() : ?Token
154
    {
155 7
        if ($this->input === null) {
156
            throw new \RuntimeException('NextToken requires a non-null input stream.');
157
        }
158
159
        // Mark start location in char stream so unbuffered streams are
160
        // guaranteed at least have text of current token
161 7
        $tokenStartMarker = $this->input->mark();
162
163
        try {
164 7
            while (true) {
165 7
                if ($this->hitEOF) {
166 6
                    $this->emitEOF();
167
168 6
                    return $this->token;
169
                }
170
171 7
                if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
172
                    throw new \RuntimeException('Unexpected interpreter type.');
173
                }
174
175 7
                $this->token = null;
176 7
                $this->channel = Token::DEFAULT_CHANNEL;
177 7
                $this->tokenStartCharIndex = $this->input->getIndex();
178 7
                $this->tokenStartCharPositionInLine = $this->interp->getCharPositionInLine();
179 7
                $this->tokenStartLine = $this->interp->getLine();
180 7
                $this->text = null;
181 7
                $continueOuter = false;
182
183 7
                while (true) {
184 7
                    $this->type = Token::INVALID_TYPE;
185 7
                    $ttype = self::SKIP;
0 ignored issues
show
Unused Code introduced by
The assignment to $ttype is dead and can be removed.
Loading history...
186
                    try {
187 7
                        $ttype = $this->interp->match($this->input, $this->mode);
188
                    } catch (LexerNoViableAltException $e) {
189
                        $this->notifyListeners($e); // report error
190
                        $this->recover($e);
191
                    }
192
193 7
                    if ($this->input->LA(1) === Token::EOF) {
194 7
                        $this->hitEOF = true;
195
                    }
196
197 7
                    if ($this->type === Token::INVALID_TYPE) {
198 6
                        $this->type = $ttype;
199
                    }
200
201 7
                    if ($this->type === self::SKIP) {
202 5
                        $continueOuter = true;
203
204 5
                        break;
205
                    }
206
207 6
                    if ($this->type !== self::MORE) {
208 6
                        break;
209
                    }
210
                }
211
212 7
                if ($continueOuter) {
213 5
                    continue;
214
                }
215
216 6
                if ($this->token === null) {
217 6
                    $this->emit();
218
                }
219
220 6
                return $this->token;
221
            }
222
        } finally {
223
            // make sure we release marker after match or
224
            // unbuffered char stream will keep buffering
225 7
            $this->input->release($tokenStartMarker);
226
        }
227
    }
228
229
    /**
230
     * Instruct the lexer to skip creating a token for current lexer rule
231
     * and look for another token. `nextToken` knows to keep looking when
232
     * a lexer rule finishes with token set to SKIP_TOKEN. Recall that
233
     * if `token === null` at end of any token rule, it creates one for you
234
     * and emits it.
235
     */
236 5
    public function skip() : void
237
    {
238 5
        $this->type = self::SKIP;
239 5
    }
240
241
    public function more() : void
242
    {
243
        $this->type = self::MORE;
244
    }
245
246
    public function mode(int $m) : void
247
    {
248
        $this->mode = $m;
249
    }
250
251
    public function pushMode(int $m) : void
252
    {
253
        $this->modeStack[] = $this->mode;
254
255
        $this->mode($m);
256
    }
257
258
    public function popMode() : int
259
    {
260
        if (\count($this->modeStack) === 0) {
261
            throw new \RuntimeException('Empty Stack');
262
        }
263
264
        $this->mode(\array_pop($this->modeStack));
265
266
        return $this->mode;
267
    }
268
269
    public function getSourceName() : string
270
    {
271
        return $this->input === null ? '' : $this->input->getSourceName();
272
    }
273
274
    public function getInputStream() : ?IntStream
275
    {
276
        return $this->input;
277
    }
278
279
    public function getTokenFactory() : TokenFactory
280
    {
281
        return $this->factory;
282
    }
283
284
    public function setTokenFactory(TokenFactory $factory) : void
285
    {
286
        $this->factory = $factory;
287
    }
288
289
    public function setInputStream(IntStream $input) : void
290
    {
291
        $this->input = null;
292
        $this->tokenFactorySourcePair = new Pair($this, $this->input);
293
294
        $this->reset();
295
296
        if (!$input instanceof CharStream) {
297
            throw new \RuntimeException('Input must be CharStream.');
298
        }
299
300
        $this->input = $input;
301
        $this->tokenFactorySourcePair = new Pair($this, $this->input);
302
    }
303
304
    /**
305
     * By default does not support multiple emits per nextToken invocation
306
     * for efficiency reasons. Subclass and override this method, nextToken,
307
     * and getToken (to push tokens into a list and pull from that list
308
     * rather than a single variable as this implementation does).
309
     */
310 7
    public function emitToken(Token $token) : void
311
    {
312 7
        $this->token = $token;
313 7
    }
314
315
    /**
316
     * The standard method called to automatically emit a token at the
317
     * outermost lexical rule. The token object should point into the
318
     * char buffer start..stop. If there is a text override in 'text',
319
     * use that to set the token's text. Override this method to emit
320
     * custom Token objects or provide a new factory.
321
     */
322 6
    public function emit() : Token
323
    {
324 6
        $token = $this->factory->createEx(
325 6
            $this->tokenFactorySourcePair,
326 6
            $this->type,
327 6
            $this->text,
328 6
            $this->channel,
329 6
            $this->tokenStartCharIndex,
330 6
            $this->getCharIndex() - 1,
331 6
            $this->tokenStartLine,
332 6
            $this->tokenStartCharPositionInLine
333
        );
334
335 6
        $this->emitToken($token);
336
337 6
        return $token;
338
    }
339
340 6
    public function emitEOF() : Token
341
    {
342 6
        if ($this->input === null) {
343
            throw new \RuntimeException('Cannot emit EOF for null stream.');
344
        }
345
346 6
        $cpos = $this->getCharPositionInLine();
347 6
        $lpos = $this->getLine();
348 6
        $eof = $this->factory->createEx(
349 6
            $this->tokenFactorySourcePair,
350 6
            Token::EOF,
351 6
            null,
352 6
            Token::DEFAULT_CHANNEL,
353 6
            $this->input->getIndex(),
354 6
            $this->input->getIndex() - 1,
355
            $lpos,
356
            $cpos
357
        );
358
359 6
        $this->emitToken($eof);
360
361 6
        return $eof;
362
    }
363
364 7
    public function getLine() : int
365
    {
366 7
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
367
            throw new \RuntimeException('Unexpected interpreter type.');
368
        }
369
370 7
        return $this->interp->getLine();
371
    }
372
373
    public function setLine(int $line) : void
374
    {
375
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
376
            throw new \RuntimeException('Unexpected interpreter type.');
377
        }
378
379
        $this->interp->setLine($line);
380
    }
381
382 7
    public function getCharPositionInLine() : int
383
    {
384 7
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
385
            throw new \RuntimeException('Unexpected interpreter type.');
386
        }
387
388 7
        return $this->interp->getCharPositionInLine();
389
    }
390
391
    public function setCharPositionInLine(int $charPositionInLine) : void
392
    {
393
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
394
            throw new \RuntimeException('Unexpected interpreter type.');
395
        }
396
397
        $this->interp->setCharPositionInLine($charPositionInLine);
398
    }
399
400
    /**
401
     * What is the index of the current character of lookahead?
402
     */
403 6
    public function getCharIndex() : int
404
    {
405 6
        if ($this->input === null) {
406
            throw new \RuntimeException('Cannot know char index for null stream.');
407
        }
408
409 6
        return $this->input->getIndex();
410
    }
411
412
    /**
413
     * Return the text matched so far for the current token or any text override.
414
     */
415
    public function getText() : string
416
    {
417
        if ($this->text !== null) {
418
            return $this->text;
419
        }
420
421
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
422
            throw new \RuntimeException('Unexpected interpreter type.');
423
        }
424
425
        return $this->input === null ? '' : $this->interp->getText($this->input);
426
    }
427
428
    /**
429
     * Set the complete text of this token; it wipes any previous changes to the text.
430
     */
431
    public function setText(string $text) : void
432
    {
433
        $this->text = $text;
434
    }
435
436
    public function getToken() : ?Token
437
    {
438
        return $this->token;
439
    }
440
441
    /**
442
     * Override if emitting multiple tokens.
443
     */
444
    public function setToken(Token $token) : void
445
    {
446
        $this->token = $token;
447
    }
448
449
    public function getType() : int
450
    {
451
        return $this->type;
452
    }
453
454
    public function setType(int $type) : void
455
    {
456
        $this->type = $type;
457
    }
458
459
    public function getChannel() : int
460
    {
461
        return $this->channel;
462
    }
463
464
    public function setChannel(int $channel) : void
465
    {
466
        $this->channel = $channel;
467
    }
468
469
    /**
470
     * @return array<string>|null
471
     */
472
    public function getChannelNames() : ?array
473
    {
474
        return null;
475
    }
476
477
    /**
478
     * @return array<string>|null
479
     */
480
    public function getModeNames() : ?array
481
    {
482
        return null;
483
    }
484
485
    /**
486
     * Return a list of all Token objects in input char stream.
487
     * Forces load of all tokens. Does not include EOF token.
488
     *
489
     * @return array<Token>
490
     */
491
    public function getAllTokens() : array
492
    {
493
        $tokens = [];
494
        $token = $this->nextToken();
495
496
        while ($token && $token->getType() !== Token::EOF) {
497
            $tokens[] = $token;
498
            $token = $this->nextToken();
499
        }
500
501
        return $tokens;
502
    }
503
504
    /**
505
     * Lexers can normally match any char in it's vocabulary after matching
506
     * a token, so do the easy thing and just kill a character and hope
507
     * it all works out. You can instead use the rule invocation stack
508
     * to do sophisticated error recovery if you are in a fragment rule.
509
     */
510
    public function recover(RecognitionException $re) : void
511
    {
512
        if ($this->input !== null && $this->input->LA(1) !== Token::EOF) {
513
            if ($re instanceof LexerNoViableAltException && $this->interp !== null) {
514
                // skip a char and try again
515
                $this->interp->consume($this->input);
516
            } else {
517
                // TODO: Do we lose character or line position information?
518
                $this->input->consume();
519
            }
520
        }
521
    }
522
523
    public function notifyListeners(LexerNoViableAltException $e) : void
524
    {
525
        $start = $this->tokenStartCharIndex;
526
527
        if ($this->input === null) {
528
            $text = '';
529
        } else {
530
            $stop = $this->input->getIndex();
531
            $text = $this->input->getText($start, $stop);
532
        }
533
534
        $listener = $this->getErrorListenerDispatch();
535
536
        $listener->syntaxError(
537
            $this,
538
            null,
539
            $this->tokenStartLine,
540
            $this->tokenStartCharPositionInLine,
541
            \sprintf('token recognition error at: \'%s\'', $text),
542
            $e
543
        );
544
    }
545
}
546