Lexer::getChannelNames()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 3
ccs 0
cts 2
cp 0
crap 2
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Antlr\Antlr4\Runtime;
6
7
use Antlr\Antlr4\Runtime\Atn\LexerATNSimulator;
8
use Antlr\Antlr4\Runtime\Error\Exceptions\LexerNoViableAltException;
9
use Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException;
10
use Antlr\Antlr4\Runtime\Utils\Pair;
11
12
/**
13
 * A lexer is recognizer that draws input symbols from a character stream.
14
 * lexer grammars result in a subclass of this object. A Lexer object
15
 * uses simplified match() and error recovery mechanisms in the interest
16
 * of speed.
17
 */
18
abstract class Lexer extends Recognizer implements TokenSource
19
{
20
    public const DEFAULT_MODE = 0;
21
    public const MORE = -2;
22
    public const SKIP = -3;
23
24
    public const DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
25
    public const HIDDEN = Token::HIDDEN_CHANNEL;
26
    public const MIN_CHAR_VALUE = 0x0000;
27
    public const MAX_CHAR_VALUE = 0x10FFFF;
28
29
    /** @var CharStream|null */
30
    public $input;
31
32
    /** @var Pair Pair<TokenSource, CharStream> */
33
    protected $tokenFactorySourcePair;
34
35
    /** @var TokenFactory */
36
    protected $factory;
37
38
    /**
39
     * The goal of all lexer rules/methods is to create a token object.
40
     * This is an instance variable as multiple rules may collaborate to
41
     * create a single token. `nextToken` will return this object after
42
     * matching lexer rule(s).
43
     *
44
     * If you subclass to allow multiple token emissions, then set this
45
     * to the last token to be matched or something nonnull so that
46
     * the auto token emit mechanism will not emit another token.
47
     *
48
     * @var Token|null
49
     */
50
    public $token;
51
52
    /**
53
     * What character index in the stream did the current token start at?
54
     * Needed, for example, to get the text for current token. Set at
55
     * the start of nextToken.
56
     *
57
     * @var int
58
     */
59
    public $tokenStartCharIndex = -1;
60
61
    /**
62
     * The line on which the first character of the token resides.
63
     *
64
     * @var int
65
     */
66
    public $tokenStartLine = -1;
67
68
    /**
69
     * The character position of first character within the line
70
     *
71
     * @var int
72
     */
73
    public $tokenStartCharPositionInLine = -1;
74
75
    /**
76
     * Once we see EOF on char stream, next token will be EOF.
77
     * If you have DONE : EOF ; then you see DONE EOF.
78
     *
79
     * @var bool
80
     */
81
    public $hitEOF = false;
82
83
    /**
84
     * The channel number for the current token.
85
     *
86
     * @var int
87
     */
88
    public $channel = Token::DEFAULT_CHANNEL;
89
90
    /**
91
     * The token type for the current token.
92
     *
93
     * @var int
94
     */
95
    public $type = Token::INVALID_TYPE;
96
97
    /** @var array<int> */
98
    public $modeStack = [];
99
100
    /** @var int */
101
    public $mode = self::DEFAULT_MODE;
102
103
    /**
104
     * You can set the text for the current token to override what is in the
105
     * input char buffer. Use {@see Lexer::setText()} or can set this instance var.
106
     *
107
     * @var string|null
108
     */
109
    public $text;
110
111
    /** @var LexerATNSimulator|null */
112
    protected $interp;
113
114 7
    public function __construct(?CharStream $input = null)
115
    {
116 7
        parent::__construct();
117
118 7
        $this->input = $input;
119 7
        $this->factory = CommonTokenFactory::default();
120 7
        $this->tokenFactorySourcePair = new Pair($this, $input);
121
122
        // @todo remove this property
123 7
        $this->interp = null;// child classes must populate this
124 7
    }
125
126
    public function reset() : void
127
    {
128
        // wack Lexer state variables
129
        if ($this->input !== null) {
130
            $this->input->seek(0);// rewind the input
131
        }
132
133
        $this->token = null;
134
        $this->type = Token::INVALID_TYPE;
135
        $this->channel = Token::DEFAULT_CHANNEL;
136
        $this->tokenStartCharIndex = -1;
137
        $this->tokenStartCharPositionInLine = -1;
138
        $this->tokenStartLine = -1;
139
        $this->text = null;
140
141
        $this->hitEOF = false;
142
        $this->mode = self::DEFAULT_MODE;
143
        $this->modeStack = [];
144
145
        if ($this->interp !== null) {
146
            $this->interp->reset();
147
        }
148
    }
149
150
    /**
151
     * Return a token from this source; i.e., match a token on the char stream.
152
     */
153 7
    public function nextToken() : ?Token
154
    {
155 7
        if ($this->input === null) {
156
            throw new \RuntimeException('NextToken requires a non-null input stream.');
157
        }
158
159
        // Mark start location in char stream so unbuffered streams are
160
        // guaranteed at least have text of current token
161 7
        $tokenStartMarker = $this->input->mark();
162
163
        try {
164 7
            while (true) {
165 7
                if ($this->hitEOF) {
166 6
                    $this->emitEOF();
167
168 6
                    return $this->token;
169
                }
170
171 7
                if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
172
                    throw new \RuntimeException('Unexpected interpreter type.');
173
                }
174
175 7
                $this->token = null;
176 7
                $this->channel = Token::DEFAULT_CHANNEL;
177 7
                $this->tokenStartCharIndex = $this->input->getIndex();
178 7
                $this->tokenStartCharPositionInLine = $this->interp->getCharPositionInLine();
179 7
                $this->tokenStartLine = $this->interp->getLine();
180 7
                $this->text = null;
181 7
                $continueOuter = false;
182
183 7
                while (true) {
184 7
                    $this->type = Token::INVALID_TYPE;
185 7
                    $ttype = self::SKIP;
0 ignored issues
show
Unused Code introduced by
The assignment to $ttype is dead and can be removed.
Loading history...
186
                    try {
187 7
                        $ttype = $this->interp->match($this->input, $this->mode);
188
                    } catch (LexerNoViableAltException $e) {
189
                        $this->notifyListeners($e); // report error
190
                        $this->recover($e);
191
                    }
192
193 7
                    if ($this->input->LA(1) === Token::EOF) {
194 7
                        $this->hitEOF = true;
195
                    }
196
197 7
                    if ($this->type === Token::INVALID_TYPE) {
198 6
                        $this->type = $ttype;
199
                    }
200
201 7
                    if ($this->type === self::SKIP) {
202 5
                        $continueOuter = true;
203
204 5
                        break;
205
                    }
206
207 6
                    if ($this->type !== self::MORE) {
208 6
                        break;
209
                    }
210
                }
211
212 7
                if ($continueOuter) {
213 5
                    continue;
214
                }
215
216 6
                if ($this->token === null) {
217 6
                    $this->emit();
218
                }
219
220 6
                return $this->token;
221
            }
222
        } finally {
223
            // make sure we release marker after match or
224
            // unbuffered char stream will keep buffering
225 7
            $this->input->release($tokenStartMarker);
226
        }
227
    }
228
229
    /**
230
     * Instruct the lexer to skip creating a token for current lexer rule
231
     * and look for another token. `nextToken` knows to keep looking when
232
     * a lexer rule finishes with token set to SKIP_TOKEN. Recall that
233
     * if `token === null` at end of any token rule, it creates one for you
234
     * and emits it.
235
     */
236 5
    public function skip() : void
237
    {
238 5
        $this->type = self::SKIP;
239 5
    }
240
241
    public function more() : void
242
    {
243
        $this->type = self::MORE;
244
    }
245
246
    public function mode(int $m) : void
247
    {
248
        $this->mode = $m;
249
    }
250
251
    public function pushMode(int $m) : void
252
    {
253
        $this->modeStack[] = $this->mode;
254
255
        $this->mode($m);
256
    }
257
258
    public function popMode() : int
259
    {
260
        if (\count($this->modeStack) === 0) {
261
            throw new \RuntimeException('Empty Stack');
262
        }
263
264
        $this->mode(\array_pop($this->modeStack));
265
266
        return $this->mode;
267
    }
268
269
    public function getSourceName() : string
270
    {
271
        return $this->input === null ? '' : $this->input->getSourceName();
272
    }
273
274
    public function getInputStream() : ?IntStream
275
    {
276
        return $this->input;
277
    }
278
279
    public function getTokenFactory() : TokenFactory
280
    {
281
        return $this->factory;
282
    }
283
284
    public function setTokenFactory(TokenFactory $factory) : void
285
    {
286
        $this->factory = $factory;
287
    }
288
289
    public function setInputStream(IntStream $input) : void
290
    {
291
        $this->input = null;
292
        $this->tokenFactorySourcePair = new Pair($this, $this->input);
293
294
        $this->reset();
295
296
        if (!$input instanceof CharStream) {
297
            throw new \RuntimeException('Input must be CharStream.');
298
        }
299
300
        $this->input = $input;
301
        $this->tokenFactorySourcePair = new Pair($this, $this->input);
302
    }
303
304
    /**
305
     * By default does not support multiple emits per nextToken invocation
306
     * for efficiency reasons. Subclass and override this method, nextToken,
307
     * and getToken (to push tokens into a list and pull from that list
308
     * rather than a single variable as this implementation does).
309
     */
310 7
    public function emitToken(Token $token) : void
311
    {
312 7
        $this->token = $token;
313 7
    }
314
315
    /**
316
     * The standard method called to automatically emit a token at the
317
     * outermost lexical rule. The token object should point into the
318
     * char buffer start..stop. If there is a text override in 'text',
319
     * use that to set the token's text. Override this method to emit
320
     * custom Token objects or provide a new factory.
321
     */
322 6
    public function emit() : Token
323
    {
324 6
        $token = $this->factory->createEx(
325 6
            $this->tokenFactorySourcePair,
326 6
            $this->type,
327 6
            $this->text,
328 6
            $this->channel,
329 6
            $this->tokenStartCharIndex,
330 6
            $this->getCharIndex() - 1,
331 6
            $this->tokenStartLine,
332 6
            $this->tokenStartCharPositionInLine
333
        );
334
335 6
        $this->emitToken($token);
336
337 6
        return $token;
338
    }
339
340 6
    public function emitEOF() : Token
341
    {
342 6
        if ($this->input === null) {
343
            throw new \RuntimeException('Cannot emit EOF for null stream.');
344
        }
345
346 6
        $cpos = $this->getCharPositionInLine();
347 6
        $lpos = $this->getLine();
348 6
        $eof = $this->factory->createEx(
349 6
            $this->tokenFactorySourcePair,
350 6
            Token::EOF,
351 6
            null,
352 6
            Token::DEFAULT_CHANNEL,
353 6
            $this->input->getIndex(),
354 6
            $this->input->getIndex() - 1,
355
            $lpos,
356
            $cpos
357
        );
358
359 6
        $this->emitToken($eof);
360
361 6
        return $eof;
362
    }
363
364 7
    public function getLine() : int
365
    {
366 7
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
367
            throw new \RuntimeException('Unexpected interpreter type.');
368
        }
369
370 7
        return $this->interp->getLine();
371
    }
372
373
    public function setLine(int $line) : void
374
    {
375
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
376
            throw new \RuntimeException('Unexpected interpreter type.');
377
        }
378
379
        $this->interp->setLine($line);
380
    }
381
382 7
    public function getCharPositionInLine() : int
383
    {
384 7
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
385
            throw new \RuntimeException('Unexpected interpreter type.');
386
        }
387
388 7
        return $this->interp->getCharPositionInLine();
389
    }
390
391
    public function setCharPositionInLine(int $charPositionInLine) : void
392
    {
393
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
394
            throw new \RuntimeException('Unexpected interpreter type.');
395
        }
396
397
        $this->interp->setCharPositionInLine($charPositionInLine);
398
    }
399
400
    /**
401
     * What is the index of the current character of lookahead?
402
     */
403 6
    public function getCharIndex() : int
404
    {
405 6
        if ($this->input === null) {
406
            throw new \RuntimeException('Cannot know char index for null stream.');
407
        }
408
409 6
        return $this->input->getIndex();
410
    }
411
412
    /**
413
     * Return the text matched so far for the current token or any text override.
414
     */
415
    public function getText() : string
416
    {
417
        if ($this->text !== null) {
418
            return $this->text;
419
        }
420
421
        if ($this->interp === null || !$this->interp instanceof LexerATNSimulator) {
422
            throw new \RuntimeException('Unexpected interpreter type.');
423
        }
424
425
        return $this->input === null ? '' : $this->interp->getText($this->input);
426
    }
427
428
    /**
429
     * Set the complete text of this token; it wipes any previous changes to the text.
430
     */
431
    public function setText(string $text) : void
432
    {
433
        $this->text = $text;
434
    }
435
436
    public function getToken() : ?Token
437
    {
438
        return $this->token;
439
    }
440
441
    /**
442
     * Override if emitting multiple tokens.
443
     */
444
    public function setToken(Token $token) : void
445
    {
446
        $this->token = $token;
447
    }
448
449
    public function getType() : int
450
    {
451
        return $this->type;
452
    }
453
454
    public function setType(int $type) : void
455
    {
456
        $this->type = $type;
457
    }
458
459
    public function getChannel() : int
460
    {
461
        return $this->channel;
462
    }
463
464
    public function setChannel(int $channel) : void
465
    {
466
        $this->channel = $channel;
467
    }
468
469
    /**
470
     * @return array<string>|null
471
     */
472
    public function getChannelNames() : ?array
473
    {
474
        return null;
475
    }
476
477
    /**
478
     * @return array<string>|null
479
     */
480
    public function getModeNames() : ?array
481
    {
482
        return null;
483
    }
484
485
    /**
486
     * Return a list of all Token objects in input char stream.
487
     * Forces load of all tokens. Does not include EOF token.
488
     *
489
     * @return array<Token>
490
     */
491
    public function getAllTokens() : array
492
    {
493
        $tokens = [];
494
        $token = $this->nextToken();
495
496
        while ($token && $token->getType() !== Token::EOF) {
497
            $tokens[] = $token;
498
            $token = $this->nextToken();
499
        }
500
501
        return $tokens;
502
    }
503
504
    /**
505
     * Lexers can normally match any char in it's vocabulary after matching
506
     * a token, so do the easy thing and just kill a character and hope
507
     * it all works out. You can instead use the rule invocation stack
508
     * to do sophisticated error recovery if you are in a fragment rule.
509
     */
510
    public function recover(RecognitionException $re) : void
511
    {
512
        if ($this->input !== null && $this->input->LA(1) !== Token::EOF) {
513
            if ($re instanceof LexerNoViableAltException && $this->interp !== null) {
514
                // skip a char and try again
515
                $this->interp->consume($this->input);
516
            } else {
517
                // TODO: Do we lose character or line position information?
518
                $this->input->consume();
519
            }
520
        }
521
    }
522
523
    public function notifyListeners(LexerNoViableAltException $e) : void
524
    {
525
        $start = $this->tokenStartCharIndex;
526
527
        if ($this->input === null) {
528
            $text = '';
529
        } else {
530
            $stop = $this->input->getIndex();
531
            $text = $this->input->getText($start, $stop);
532
        }
533
534
        $listener = $this->getErrorListenerDispatch();
535
536
        $listener->syntaxError(
537
            $this,
538
            null,
539
            $this->tokenStartLine,
540
            $this->tokenStartCharPositionInLine,
541
            \sprintf('token recognition error at: \'%s\'', $text),
542
            $e
543
        );
544
    }
545
}
546