DefaultErrorStrategy::reportUnwantedToken()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 19
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 2
Bugs 0 Features 0
Metric Value
cc 2
eloc 11
c 2
b 0
f 0
nc 2
nop 1
dl 0
loc 19
ccs 0
cts 12
cp 0
crap 6
rs 9.9
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Antlr\Antlr4\Runtime\Error;
6
7
use Antlr\Antlr4\Runtime\Atn\States\ATNState;
8
use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition;
9
use Antlr\Antlr4\Runtime\Error\Exceptions\FailedPredicateException;
10
use Antlr\Antlr4\Runtime\Error\Exceptions\InputMismatchException;
11
use Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException;
12
use Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException;
13
use Antlr\Antlr4\Runtime\IntervalSet;
14
use Antlr\Antlr4\Runtime\Parser;
15
use Antlr\Antlr4\Runtime\ParserRuleContext;
16
use Antlr\Antlr4\Runtime\Token;
17
use Antlr\Antlr4\Runtime\Utils\Pair;
18
use Antlr\Antlr4\Runtime\Utils\StringUtils;
19
20
/**
21
 * This is the default implementation of {@see ANTLRErrorStrategy} used for
22
 * error reporting and recovery in ANTLR parsers.
23
 */
24
class DefaultErrorStrategy implements ANTLRErrorStrategy
25
{
26
    /**
27
     * Indicates whether the error strategy is currently "recovering from an
28
     * error". This is used to suppress reporting multiple error messages while
29
     * attempting to recover from a detected syntax error.
30
     *
31
     * @see DefaultErrorStrategy::inErrorRecoveryMode()
32
     *
33
     * @var bool
34
     */
35
    protected $errorRecoveryMode = false;
36
37
    /** The index into the input stream where the last error occurred.
38
     *  This is used to prevent infinite loops where an error is found
39
     *  but no token is consumed during recovery...another error is found,
40
     *  ad nauseum. This is a failsafe mechanism to guarantee that at least
41
     *  one token/tree node is consumed for two errors.
42
     *
43
     * @var int
44
     */
45
    protected $lastErrorIndex = -1;
46
47
    /** @var IntervalSet|null */
48
    protected $lastErrorStates;
49
50
    /**
51
     * This field is used to propagate information about the lookahead following
52
     * the previous match. Since prediction prefers completing the current rule
53
     * to error recovery efforts, error reporting may occur later than the
54
     * original point where it was discoverable. The original context is used to
55
     * compute the true expected sets as though the reporting occurred as early
56
     * as possible.
57
     *
58
     * @var ParserRuleContext|null
59
     */
60
    protected $nextTokensContext;
61
62
    /**
63
     * @see DefaultErrorStrategy::$nextTokensContext
64
     *
65
     * @var int|null
66
     */
67
    protected $nextTokensState;
68
69
    /**
70
     * {@inheritdoc}
71
     *
72
     * The default implementation simply calls
73
     * {@see DefaultErrorStrategy::endErrorCondition()} to ensure that
74
     * the handler is not in error recovery mode.
75
     */
76 7
    public function reset(Parser $recognizer) : void
77
    {
78 7
        $this->endErrorCondition($recognizer);
79 7
    }
80
81
    /**
82
     * This method is called to enter error recovery mode when a recognition
83
     * exception is reported.
84
     *
85
     * @param Parser $recognizer The parser instance.
86
     */
87 4
    protected function beginErrorCondition(Parser $recognizer) : void
0 ignored issues
show
Unused Code introduced by
The parameter $recognizer is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

87
    protected function beginErrorCondition(/** @scrutinizer ignore-unused */ Parser $recognizer) : void

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
88
    {
89 4
        $this->errorRecoveryMode = true;
90 4
    }
91
92 7
    public function inErrorRecoveryMode(Parser $recognizer) : bool
93
    {
94 7
        return $this->errorRecoveryMode;
95
    }
96
97
    /**
98
     * This method is called to leave error recovery mode after recovering from
99
     * a recognition exception.
100
     */
101 7
    protected function endErrorCondition(Parser $recognizer) : void
0 ignored issues
show
Unused Code introduced by
The parameter $recognizer is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

101
    protected function endErrorCondition(/** @scrutinizer ignore-unused */ Parser $recognizer) : void

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
102
    {
103 7
        $this->errorRecoveryMode = false;
104 7
        $this->lastErrorStates = null;
105 7
        $this->lastErrorIndex = -1;
106 7
    }
107
108
    /**
109
     * {@inheritdoc}
110
     *
111
     * The default implementation simply calls
112
     * {@see DefaultErrorStrategy::endErrorCondition()}.
113
     */
114 4
    public function reportMatch(Parser $recognizer) : void
115
    {
116 4
        $this->endErrorCondition($recognizer);
117 4
    }
118
119
    /**
120
     * {@inheritdoc}
121
     *
122
     * The default implementation returns immediately if the handler is already
123
     * in error recovery mode. Otherwise, it calls
124
     * {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches
125
     * the reporting task based on the runtime type of `e` according to
126
     * the following table.
127
     *
128
     * - {@see NoViableAltException}: Dispatches the call to
129
     * {@see reportNoViableAlternative}
130
     * - {@see InputMismatchException}: Dispatches the call to
131
     * {@see reportInputMismatch}
132
     * - {@see FailedPredicateException}: Dispatches the call to
133
     * {@see reportFailedPredicate}
134
     * - All other types: calls {@see Parser#notifyErrorListeners} to report
135
     * the exception
136
     */
137 4
    public function reportError(Parser $recognizer, RecognitionException $e) : void
138
    {
139
        // if we've already reported an error and have not matched a token
140
        // yet successfully, don't report any errors.
141 4
        if ($this->inErrorRecoveryMode($recognizer)) {
142
            // don't report spurious errors
143
            return;
144
        }
145
146 4
        $this->beginErrorCondition($recognizer);
147
148 4
        if ($e instanceof NoViableAltException) {
149
            $this->reportNoViableAlternative($recognizer, $e);
150 4
        } elseif ($e instanceof InputMismatchException) {
151 4
            $this->reportInputMismatch($recognizer, $e);
152
        } elseif ($e instanceof FailedPredicateException) {
153
            $this->reportFailedPredicate($recognizer, $e);
154
        } else {
155
            $recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e);
156
        }
157 4
    }
158
159
    /**
160
     * {@inheritdoc}
161
     *
162
     * The default implementation resynchronizes the parser by consuming tokens
163
     * until we find one in the resynchronization set--loosely the set of tokens
164
     * that can follow the current rule.
165
     */
166 4
    public function recover(Parser $recognizer, RecognitionException $e) : void
167
    {
168 4
        $inputStream = $recognizer->getInputStream();
169
170 4
        if ($inputStream === null) {
171
            throw new \RuntimeException('Unexpected null input stream.');
172
        }
173
174 4
        if ($this->lastErrorStates !== null
175 4
            && $this->lastErrorIndex === $inputStream->getIndex()
176 4
            && $this->lastErrorStates->contains($recognizer->getState())
177
        ) {
178
            // uh oh, another error at same token index and previously-visited
179
            // state in ATN; must be a case where LT(1) is in the recovery
180
            // token set so nothing got consumed. Consume a single token
181
            // at least to prevent an infinite loop; this is a failsafe.
182
            $recognizer->consume();
183
        }
184
185 4
        $this->lastErrorIndex = $inputStream->getIndex();
186
187 4
        if ($this->lastErrorStates === null) {
188 4
            $this->lastErrorStates = new IntervalSet();
189
        }
190
191 4
        $this->lastErrorStates->addOne($recognizer->getState());
192
193 4
        $followSet = $this->getErrorRecoverySet($recognizer);
194
195 4
        $this->consumeUntil($recognizer, $followSet);
196 4
    }
197
198
    /**
199
     * The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure
200
     * that the current lookahead symbol is consistent with what were expecting
201
     * at this point in the ATN. You can call this anytime but ANTLR only
202
     * generates code to check before subrules/loops and each iteration.
203
     *
204
     * Implements Jim Idle's magic sync mechanism in closures and optional
205
     * subrules. E.g.,
206
     *
207
     *     a : sync ( stuff sync )* ;
208
     *     sync : {consume to what can follow sync} ;
209
     *
210
     * At the start of a sub rule upon error, {@see sync} performs single
211
     * token deletion, if possible. If it can't do that, it bails on the current
212
     * rule and uses the default error recovery, which consumes until the
213
     * resynchronization set of the current rule.
214
     *
215
     * If the sub rule is optional (`(...)?`, `(...)*`, or block
216
     * with an empty alternative), then the expected set includes what follows
217
     * the subrule.
218
     *
219
     * During loop iteration, it consumes until it sees a token that can start a
220
     * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
221
     * stay in the loop as long as possible.
222
     *
223
     * ORIGINS
224
     *
225
     * Previous versions of ANTLR did a poor job of their recovery within loops.
226
     * A single mismatch token or missing token would force the parser to bail
227
     * out of the entire rules surrounding the loop. So, for rule
228
     *
229
     *     classDef : 'class' ID '{' member* '}'
230
     *
231
     * input with an extra token between members would force the parser to
232
     * consume until it found the next class definition rather than the next
233
     * member definition of the current class.
234
     *
235
     * This functionality cost a little bit of effort because the parser has to
236
     * compare token set at the start of the loop and at each iteration. If for
237
     * some reason speed is suffering for you, you can turn off this
238
     * functionality by simply overriding this method as a blank { }.
239
     *
240
     * @throws RecognitionException
241
     */
242 7
    public function sync(Parser $recognizer) : void
243
    {
244 7
        $interpreter = $recognizer->getInterpreter();
245
246 7
        if ($interpreter === null) {
247
            throw new \RuntimeException('Unexpected null interpreter.');
248
        }
249
250
        /** @var ATNState $s */
251 7
        $s = $interpreter->atn->states[$recognizer->getState()];
252
253
        // If already recovering, don't try to sync
254 7
        if ($this->inErrorRecoveryMode($recognizer)) {
255
            return;
256
        }
257
258 7
        $tokens = $recognizer->getInputStream();
259
260 7
        if ($tokens === null) {
261
            throw new \RuntimeException('Unexpected null input stream.');
262
        }
263
264 7
        $la = $tokens->LA(1);
265
266
        // try cheaper subset first; might get lucky. seems to shave a wee bit off
267 7
        $nextTokens = $recognizer->getATN()->nextTokens($s);
268
269 7
        if ($nextTokens->contains($la)) {
270
            // We are sure the token matches
271 4
            $this->nextTokensContext = null;
272 4
            $this->nextTokensState = ATNState::INVALID_STATE_NUMBER;
273
274 4
            return;
275
        }
276
277 7
        if ($nextTokens->contains(Token::EPSILON)) {
278 4
            if ($this->nextTokensContext === null) {
279
                // It's possible the next token won't match; information tracked
280
                // by sync is restricted for performance.
281 4
                $this->nextTokensContext = $recognizer->getContext();
282 4
                $this->nextTokensState = $recognizer->getState();
283
            }
284 4
            return;
285
        }
286
287 3
        switch ($s->getStateType()) {
288
            case ATNState::BLOCK_START:
289
            case ATNState::STAR_BLOCK_START:
290
            case ATNState::PLUS_BLOCK_START:
291
            case ATNState::STAR_LOOP_ENTRY:
292
                // report error and recover if possible
293 3
                if ($this->singleTokenDeletion($recognizer) !== null) {
294
                    return;
295
                }
296
297 3
                throw new InputMismatchException($recognizer);
298
299
            case ATNState::PLUS_LOOP_BACK:
300
            case ATNState::STAR_LOOP_BACK:
301
                $this->reportUnwantedToken($recognizer);
302
                $expecting = $recognizer->getExpectedTokens();
303
                $whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer));
304
                $this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule);
305
                break;
306
307
            default:
308
                // do nothing if we can't identify the exact kind of ATN state
309
                break;
310
        }
311
    }
312
313
    /**
314
     * This is called by {@see DefaultErrorStrategy::reportError()} when
315
     * the exception is a {@see NoViableAltException}.
316
     *
317
     * @param Parser               $recognizer The parser instance.
318
     * @param NoViableAltException $e          The recognition exception.
319
     *
320
     * @see DefaultErrorStrategy::reportError()
321
     */
322
    protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void
323
    {
324
        $tokens = $recognizer->getTokenStream();
325
326
        $input = '<unknown input>';
327
328
        if ($tokens !== null) {
329
            $startToken = $e->getStartToken();
330
331
            if ($startToken === null) {
332
                throw new \RuntimeException('Unexpected null start token.');
333
            }
334
335
            if ($startToken->getType() === Token::EOF) {
336
                $input = '<EOF>';
337
            } else {
338
                $input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken());
339
            }
340
        }
341
342
        $msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input));
343
344
        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
345
    }
346
347
    /**
348
     * This is called by {@see DefaultErrorStrategy::reportError()} when
349
     * the exception is an {@see InputMismatchException}.
350
     *
351
     * @param Parser                 $recognizer The parser instance.
352
     * @param InputMismatchException $e          The recognition exception.
353
     *
354
     * @see DefaultErrorStrategy::reportError()
355
     */
356 4
    protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void
357
    {
358 4
        $expectedTokens = $e->getExpectedTokens();
359
360 4
        if ($expectedTokens === null) {
361
            throw new \RuntimeException('Unexpected null expected tokens.');
362
        }
363
364 4
        $msg = \sprintf(
365 4
            'mismatched input %s expecting %s',
366 4
            $this->getTokenErrorDisplay($e->getOffendingToken()),
367 4
            $expectedTokens->toStringVocabulary($recognizer->getVocabulary())
368
        );
369
370 4
        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
371 4
    }
372
373
    /**
374
     * This is called by {@see DefaultErrorStrategy::reportError()} when
375
     * the exception is a {@see FailedPredicateException}.
376
     *
377
     * @param Parser                   $recognizer The parser instance.
378
     * @param FailedPredicateException $e          The recognition exception.
379
     *
380
     * @see DefaultErrorStrategy::reportError()
381
     */
382
    protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void
383
    {
384
        $msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage());
385
386
        $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e);
387
    }
388
389
    /**
390
     * This method is called to report a syntax error which requires the removal
391
     * of a token from the input stream. At the time this method is called, the
392
     * erroneous symbol is current `LT(1)` symbol and has not yet been
393
     * removed from the input stream. When this method returns,
394
     * `$recognizer` is in error recovery mode.
395
     *
396
     * This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()}
397
     * identifies single-token deletion as a viable recovery strategy for
398
     * a mismatched input error.
399
     *
400
     * The default implementation simply returns if the handler is already in
401
     * error recovery mode. Otherwise, it calls
402
     * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error
403
     * recovery mode, followed by calling {@see Parser::notifyErrorListeners}.
404
     *
405
     * @param Parser $recognizer The parser instance.
406
     */
407
    protected function reportUnwantedToken(Parser $recognizer) : void
408
    {
409
        if ($this->inErrorRecoveryMode($recognizer)) {
410
            return;
411
        }
412
413
        $this->beginErrorCondition($recognizer);
414
415
        $t = $recognizer->getCurrentToken();
416
        $tokenName = $this->getTokenErrorDisplay($t);
417
        $expecting = $this->getExpectedTokens($recognizer);
418
419
        $msg = \sprintf(
420
            'extraneous input %s expecting %s',
421
            $tokenName,
422
            $expecting->toStringVocabulary($recognizer->getVocabulary())
423
        );
424
425
        $recognizer->notifyErrorListeners($msg, $t);
426
    }
427
428
    /**
429
     * This method is called to report a syntax error which requires the
430
     * insertion of a missing token into the input stream. At the time this
431
     * method is called, the missing token has not yet been inserted. When this
432
     * method returns, `$recognizer` is in error recovery mode.
433
     *
434
     * This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()}
435
     * identifies single-token insertion as a viable recovery strategy for
436
     * a mismatched input error.
437
     *
438
     * The default implementation simply returns if the handler is already in
439
     * error recovery mode. Otherwise, it calls
440
     * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error
441
     * recovery mode, followed by calling {@see Parser::notifyErrorListeners()}.
442
     *
443
     * @param Parser $recognizer the parser instance
444
     */
445
    protected function reportMissingToken(Parser $recognizer) : void
446
    {
447
        if ($this->inErrorRecoveryMode($recognizer)) {
448
            return;
449
        }
450
451
        $this->beginErrorCondition($recognizer);
452
453
        $t = $recognizer->getCurrentToken();
454
        $expecting = $this->getExpectedTokens($recognizer);
455
456
        $msg = \sprintf(
457
            'missing %s at %s',
458
            $expecting->toStringVocabulary($recognizer->getVocabulary()),
459
            $this->getTokenErrorDisplay($t)
460
        );
461
462
        $recognizer->notifyErrorListeners($msg, $t);
463
    }
464
465
    /**
466
     * {@inheritdoc}
467
     *
468
     * The default implementation attempts to recover from the mismatched input
469
     * by using single token insertion and deletion as described below. If the
470
     * recovery attempt fails, this method throws an
471
     * {@see InputMismatchException}.
472
     *
473
     * EXTRA TOKEN (single token deletion)
474
     *
475
     * `LA(1)` is not what we are looking for. If `LA(2)` has the
476
     * right token, however, then assume `LA(1)` is some extra spurious
477
     * token and delete it. Then consume and return the next token (which was
478
     * the `LA(2)` token) as the successful result of the match operation.
479
     *
480
     * This recovery strategy is implemented by
481
     * {@see DefaultErrorStrategy::singleTokenDeletion()}.
482
     *
483
     * MISSING TOKEN (single token insertion)
484
     *
485
     * If current token (at `LA(1)`) is consistent with what could come
486
     * after the expected `LA(1)` token, then assume the token is missing
487
     * and use the parser's {@see TokenFactory} to create it on the fly. The
488
     * "insertion" is performed by returning the created token as the successful
489
     * result of the match operation.
490
     *
491
     * This recovery strategy is implemented by
492
     * {@see DefaultErrorStrategy::singleTokenInsertion()}.
493
     *
494
     * EXAMPLE
495
     *
496
     * For example, Input `i=(3;` is clearly missing the `')'`. When
497
     * the parser returns from the nested call to `expr`, it will have
498
     * call chain:
499
     *
500
     *     stat &rarr; expr &rarr; atom
501
     *
502
     * and it will be trying to match the `')'` at this point in the
503
     * derivation:
504
     *
505
     *     =&gt; ID '=' '(' INT ')' ('+' atom)* ';'
506
     *                        ^
507
     *
508
     * The attempt to match `')'` will fail when it sees `';'` and call
509
     * {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that
510
     * `LA(1)==';'` is in the set of tokens that can follow the `')'` token
511
     * reference in rule `atom`. It can assume that you forgot the `')'`.
512
     *
513
     * @throws RecognitionException
514
     */
515 1
    public function recoverInline(Parser $recognizer) : Token
516
    {
517
        // SINGLE TOKEN DELETION
518 1
        $matchedSymbol = $this->singleTokenDeletion($recognizer);
519
520 1
        if ($matchedSymbol !== null) {
521
            // we have deleted the extra token.
522
            // now, move past ttype token as if all were ok
523
            $recognizer->consume();
524
525
            return $matchedSymbol;
526
        }
527
528
        // SINGLE TOKEN INSERTION
529 1
        if ($this->singleTokenInsertion($recognizer)) {
530
            return $this->getMissingSymbol($recognizer);
531
        }
532
533
        // even that didn't work; must throw the exception
534 1
        if ($this->nextTokensContext === null) {
535
            throw new InputMismatchException($recognizer);
536
        }
537
538 1
        throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext);
539
    }
540
541
    /**
542
     * This method implements the single-token insertion inline error recovery
543
     * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()}
544
     * if the single-token deletion strategy fails to recover from the mismatched
545
     * input. If this method returns `true`, `$recognizer` will be in error
546
     * recovery mode.
547
     *
548
     * This method determines whether or not single-token insertion is viable by
549
     * checking if the `LA(1)` input symbol could be successfully matched
550
     * if it were instead the `LA(2)` symbol. If this method returns
551
     * `true`, the caller is responsible for creating and inserting a
552
     * token with the correct type to produce this behavior.
553
     *
554
     * @param Parser $recognizer The parser instance.
555
     *
556
     * @return bool `true` If single-token insertion is a viable recovery
557
     *              strategy for the current mismatched input, otherwise `false`.
558
     */
559 1
    protected function singleTokenInsertion(Parser $recognizer) : bool
560
    {
561 1
        $stream = $recognizer->getInputStream();
562
563 1
        if ($stream === null) {
564
            throw new \RuntimeException('Unexpected null input stream.');
565
        }
566
567 1
        $interpreter = $recognizer->getInterpreter();
568
569 1
        if ($interpreter === null) {
570
            throw new \RuntimeException('Unexpected null interpreter.');
571
        }
572
573 1
        $currentSymbolType = $stream->LA(1);
574
575
        // if current token is consistent with what could come after current
576
        // ATN state, then we know we're missing a token; error recovery
577
        // is free to conjure up and insert the missing token
578
579 1
        $atn = $interpreter->atn;
580
        /** @var ATNState $currentState */
581 1
        $currentState = $atn->states[$recognizer->getState()];
582 1
        $next = $currentState->getTransition(0)->target;
583 1
        $expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext());
584
585 1
        if ($expectingAtLL2->contains($currentSymbolType)) {
586
            $this->reportMissingToken($recognizer);
587
588
            return true;
589
        }
590
591 1
        return false;
592
    }
593
594
    /**
595
     * This method implements the single-token deletion inline error recovery
596
     * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()}
597
     * to attempt to recover from mismatched input. If this method returns null,
598
     * the parser and error handler state will not have changed. If this method
599
     * returns non-null, `$recognizer` will _not_ be in error recovery mode
600
     * since the returned token was a successful match.
601
     *
602
     * If the single-token deletion is successful, this method calls
603
     * {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error,
604
     * followed by {@see Parser::consume()} to actually "delete" the extraneous
605
     * token. Then, before returning {@see DefaultErrorStrategy::reportMatch()}
606
     * is called to signal a successful match.
607
     *
608
     * @param Parser $recognizer The parser instance.
609
     *
610
     * @return Token The successfully matched {@see Token} instance if
611
     *               single-token deletion successfully recovers from
612
     *               the mismatched input, otherwise `null`.
613
     */
614 4
    protected function singleTokenDeletion(Parser $recognizer) : ?Token
615
    {
616 4
        $inputStream = $recognizer->getInputStream();
617
618 4
        if ($inputStream === null) {
619
            throw new \RuntimeException('Unexpected null input stream.');
620
        }
621
622 4
        $nextTokenType = $inputStream->LA(2);
623 4
        $expecting = $this->getExpectedTokens($recognizer);
624
625 4
        if ($expecting->contains($nextTokenType)) {
626
            $this->reportUnwantedToken($recognizer);
627
            $recognizer->consume(); // simply delete extra token
628
            // we want to return the token we're actually matching
629
            $matchedSymbol = $recognizer->getCurrentToken();
630
            $this->reportMatch($recognizer);  // we know current token is correct
631
632
            return $matchedSymbol;
633
        }
634
635 4
        return null;
636
    }
637
638
    /** Conjure up a missing token during error recovery.
639
     *
640
     *  The recognizer attempts to recover from single missing
641
     *  symbols. But, actions might refer to that missing symbol.
642
     *  For example, x=ID {f($x);}. The action clearly assumes
643
     *  that there has been an identifier matched previously and that
644
     *  $x points at that token. If that token is missing, but
645
     *  the next token in the stream is what we want we assume that
646
     *  this token is missing and we keep going. Because we
647
     *  have to return some token to replace the missing token,
648
     *  we have to conjure one up. This method gives the user control
649
     *  over the tokens returned for missing tokens. Mostly,
650
     *  you will want to create something special for identifier
651
     *  tokens. For literals such as '{' and ',', the default
652
     *  action in the parser or tree parser works. It simply creates
653
     *  a CommonToken of the appropriate type. The text will be the token.
654
     *  If you change what tokens must be created by the lexer,
655
     *  override this method to create the appropriate tokens.
656
     */
657
    protected function getMissingSymbol(Parser $recognizer) : Token
658
    {
659
        $currentSymbol = $recognizer->getCurrentToken();
660
661
        if ($currentSymbol === null) {
662
            throw new \RuntimeException('Unexpected null current token.');
663
        }
664
665
        $inputStream = $recognizer->getInputStream();
666
667
        if ($inputStream === null) {
668
            throw new \RuntimeException('Unexpected null input stream.');
669
        }
670
671
        $tokenSource = $currentSymbol->getTokenSource();
672
673
        if ($tokenSource === null) {
674
            throw new \RuntimeException('Unexpected null token source.');
675
        }
676
677
        $expecting = $this->getExpectedTokens($recognizer);
678
679
        $expectedTokenType = Token::INVALID_TYPE;
680
681
        if (!$expecting->isNull()) {
682
            $expectedTokenType = $expecting->getMinElement(); // get any element
683
        }
684
685
        if ($expectedTokenType === Token::EOF) {
686
            $tokenText = '<missing EOF>';
687
        } else {
688
            $tokenText = \sprintf('<missing %s>', $recognizer->getVocabulary()->getDisplayName($expectedTokenType));
689
        }
690
691
        $current = $currentSymbol;
692
        $lookback = $inputStream->LT(-1);
693
694
        if ($current->getType() === Token::EOF && $lookback !== null) {
695
            $current = $lookback;
696
        }
697
698
        return $recognizer->getTokenFactory()->createEx(
699
            new Pair(
700
                $tokenSource,
701
                $tokenSource->getInputStream()
702
            ),
703
            $expectedTokenType,
704
            $tokenText,
705
            Token::DEFAULT_CHANNEL,
706
            -1,
707
            -1,
708
            $current->getLine(),
709
            $current->getCharPositionInLine()
710
        );
711
    }
712
713 4
    protected function getExpectedTokens(Parser $recognizer) : IntervalSet
714
    {
715 4
        return $recognizer->getExpectedTokens();
716
    }
717
718
    /**
719
     * How should a token be displayed in an error message? The default
720
     * is to display just the text, but during development you might
721
     * want to have a lot of information spit out.  Override in that case
722
     * to use (string) (which, for CommonToken, dumps everything about
723
     * the token). This is better than forcing you to override a method in
724
     * your token objects because you don't have to go modify your lexer
725
     * so that it creates a new Java type.
726
     */
727 4
    protected function getTokenErrorDisplay(?Token $t) : string
728
    {
729 4
        if ($t === null) {
730
            return '<no token>';
731
        }
732
733 4
        $s = $this->getSymbolText($t);
734
735 4
        if ($s === null) {
736
            if ($this->getSymbolType($t) === Token::EOF) {
737
                $s = '<EOF>';
738
            } else {
739
                $s = '<' . $this->getSymbolType($t) . '>';
740
            }
741
        }
742
743 4
        return $this->escapeWSAndQuote($s);
744
    }
745
746 4
    protected function getSymbolText(Token $symbol) : ?string
747
    {
748 4
        return $symbol->getText();
749
    }
750
751
    protected function getSymbolType(Token $symbol) : int
752
    {
753
        return $symbol->getType();
754
    }
755
756 4
    protected function escapeWSAndQuote(string $s) : string
757
    {
758 4
        return "'" . StringUtils::escapeWhitespace($s) . "'";
759
    }
760
761
    /**
762
     * Compute the error recovery set for the current rule.  During
763
     * rule invocation, the parser pushes the set of tokens that can
764
     * follow that rule reference on the stack; this amounts to
765
     * computing FIRST of what follows the rule reference in the
766
     * enclosing rule. See LinearApproximator::FIRST.
767
     * This local follow set only includes tokens
768
     * from within the rule; i.e., the FIRST computation done by
769
     * ANTLR stops at the end of a rule.
770
     *
771
     * EXAMPLE
772
     *
773
     * When you find a "no viable alt exception", the input is not
774
     * consistent with any of the alternatives for rule r.  The best
775
     * thing to do is to consume tokens until you see something that
776
     * can legally follow a call to r *or* any rule that called r.
777
     * You don't want the exact set of viable next tokens because the
778
     * input might just be missing a token--you might consume the
779
     * rest of the input looking for one of the missing tokens.
780
     *
781
     * Consider grammar:
782
     *
783
     *     a : '[' b ']'
784
     *       | '(' b ')'
785
     *       ;
786
     *     b : c '^' INT ;
787
     *     c : ID
788
     *       | INT
789
     *       ;
790
     *
791
     * At each rule invocation, the set of tokens that could follow
792
     * that rule is pushed on a stack.  Here are the various
793
     * context-sensitive follow sets:
794
     *
795
     *     FOLLOW(b1_in_a) = FIRST(']') = ']'
796
     *     FOLLOW(b2_in_a) = FIRST(')') = ')'
797
     *     FOLLOW(c_in_b) = FIRST('^') = '^'
798
     *
799
     * Upon erroneous input "[]", the call chain is
800
     *
801
     *     a -> b -> c
802
     *
803
     * and, hence, the follow context stack is:
804
     *
805
     * depth | follow set | start of rule execution
806
     * ------|------------|-------------------------
807
     *   0   |   <EOF>    |    a (from main())
808
     *   1   |   ']'      |          b
809
     *   2   |   '^'      |          c
810
     *
811
     * Notice that ')' is not included, because b would have to have
812
     * been called from a different context in rule a for ')' to be
813
     * included.
814
     *
815
     * For error recovery, we cannot consider FOLLOW(c)
816
     * (context-sensitive or otherwise).  We need the combined set of
817
     * all context-sensitive FOLLOW sets--the set of all tokens that
818
     * could follow any reference in the call chain.  We need to
819
     * resync to one of those tokens.  Note that FOLLOW(c)='^' and if
820
     * we resync'd to that token, we'd consume until EOF.  We need to
821
     * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
822
     * In this case, for input "[]", LA(1) is ']' and in the set, so we would
823
     * not consume anything. After printing an error, rule c would
824
     * return normally.  Rule b would not find the required '^' though.
825
     * At this point, it gets a mismatched token error and throws an
826
     * exception (since LA(1) is not in the viable following token
827
     * set).  The rule exception handler tries to recover, but finds
828
     * the same recovery set and doesn't consume anything.  Rule b
829
     * exits normally returning to rule a.  Now it finds the ']' (and
830
     * with the successful match exits errorRecovery mode).
831
     *
832
     * So, you can see that the parser walks up the call chain looking
833
     * for the token that was a member of the recovery set.
834
     *
835
     * Errors are not generated in errorRecovery mode.
836
     *
837
     * ANTLR's error recovery mechanism is based upon original ideas:
838
     *
839
     * "Algorithms + Data Structures = Programs" by Niklaus Wirth
840
     *
841
     * and
842
     *
843
     * "A note on error recovery in recursive descent parsers":
844
     * http://portal.acm.org/citation.cfm?id=947902.947905
845
     *
846
     * Later, Josef Grosch had some good ideas:
847
     *
848
     * "Efficient and Comfortable Error Recovery in Recursive Descent
849
     * Parsers":
850
     * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
851
     *
852
     * Like Grosch I implement context-sensitive FOLLOW sets that are combined
853
     * at run-time upon error to avoid overhead during parsing.
854
     */
855 4
    protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet
856
    {
857 4
        $interpreter = $recognizer->getInterpreter();
858
859 4
        if ($interpreter === null) {
860
            throw new \RuntimeException('Unexpected null interpreter.');
861
        }
862
863 4
        $atn = $interpreter->atn;
864 4
        $ctx = $recognizer->getContext();
865 4
        $recoverSet = new IntervalSet();
866
867 4
        while ($ctx !== null && $ctx->invokingState >= 0) {
868
            // compute what follows who invoked us
869
            /** @var ATNState $invokingState */
870
            $invokingState = $atn->states[$ctx->invokingState];
871
            /** @var RuleTransition $rt */
872
            $rt = $invokingState->getTransition(0);
873
            $follow = $atn->nextTokens($rt->followState);
874
            $recoverSet->addSet($follow);
875
            $ctx = $ctx->getParent();
876
        }
877
878 4
        $recoverSet->removeOne(Token::EPSILON);
879
880 4
        return $recoverSet;
881
    }
882
883
    /**
884
     * Consume tokens until one matches the given token set.
885
     */
886 4
    protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void
887
    {
888 4
        $inputStream = $recognizer->getInputStream();
889
890 4
        if ($inputStream === null) {
891
            throw new \RuntimeException('Unexpected null input stream.');
892
        }
893
894 4
        $ttype = $inputStream->LA(1);
895
896 4
        while ($ttype !== Token::EOF && !$set->contains($ttype)) {
897 1
            $recognizer->consume();
898 1
            $ttype = $inputStream->LA(1);
899
        }
900 4
    }
901
}
902