SetBased /
antlr-php-runtime
| 1 | <?php |
||||
| 2 | |||||
| 3 | declare(strict_types=1); |
||||
| 4 | |||||
| 5 | namespace Antlr\Antlr4\Runtime\Error; |
||||
| 6 | |||||
| 7 | use Antlr\Antlr4\Runtime\Atn\States\ATNState; |
||||
| 8 | use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; |
||||
| 9 | use Antlr\Antlr4\Runtime\Error\Exceptions\FailedPredicateException; |
||||
| 10 | use Antlr\Antlr4\Runtime\Error\Exceptions\InputMismatchException; |
||||
| 11 | use Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException; |
||||
| 12 | use Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException; |
||||
| 13 | use Antlr\Antlr4\Runtime\IntervalSet; |
||||
| 14 | use Antlr\Antlr4\Runtime\Parser; |
||||
| 15 | use Antlr\Antlr4\Runtime\ParserRuleContext; |
||||
| 16 | use Antlr\Antlr4\Runtime\Token; |
||||
| 17 | use Antlr\Antlr4\Runtime\Utils\Pair; |
||||
| 18 | use Antlr\Antlr4\Runtime\Utils\StringUtils; |
||||
| 19 | |||||
| 20 | /** |
||||
| 21 | * This is the default implementation of {@see ANTLRErrorStrategy} used for |
||||
| 22 | * error reporting and recovery in ANTLR parsers. |
||||
| 23 | */ |
||||
| 24 | class DefaultErrorStrategy implements ANTLRErrorStrategy |
||||
| 25 | { |
||||
| 26 | /** |
||||
| 27 | * Indicates whether the error strategy is currently "recovering from an |
||||
| 28 | * error". This is used to suppress reporting multiple error messages while |
||||
| 29 | * attempting to recover from a detected syntax error. |
||||
| 30 | * |
||||
| 31 | * @see DefaultErrorStrategy::inErrorRecoveryMode() |
||||
| 32 | * |
||||
| 33 | * @var bool |
||||
| 34 | */ |
||||
| 35 | protected $errorRecoveryMode = false; |
||||
| 36 | |||||
| 37 | /** The index into the input stream where the last error occurred. |
||||
| 38 | * This is used to prevent infinite loops where an error is found |
||||
| 39 | * but no token is consumed during recovery...another error is found, |
||||
| 40 | * ad nauseum. This is a failsafe mechanism to guarantee that at least |
||||
| 41 | * one token/tree node is consumed for two errors. |
||||
| 42 | * |
||||
| 43 | * @var int |
||||
| 44 | */ |
||||
| 45 | protected $lastErrorIndex = -1; |
||||
| 46 | |||||
| 47 | /** @var IntervalSet|null */ |
||||
| 48 | protected $lastErrorStates; |
||||
| 49 | |||||
| 50 | /** |
||||
| 51 | * This field is used to propagate information about the lookahead following |
||||
| 52 | * the previous match. Since prediction prefers completing the current rule |
||||
| 53 | * to error recovery efforts, error reporting may occur later than the |
||||
| 54 | * original point where it was discoverable. The original context is used to |
||||
| 55 | * compute the true expected sets as though the reporting occurred as early |
||||
| 56 | * as possible. |
||||
| 57 | * |
||||
| 58 | * @var ParserRuleContext|null |
||||
| 59 | */ |
||||
| 60 | protected $nextTokensContext; |
||||
| 61 | |||||
| 62 | /** |
||||
| 63 | * @see DefaultErrorStrategy::$nextTokensContext |
||||
| 64 | * |
||||
| 65 | * @var int|null |
||||
| 66 | */ |
||||
| 67 | protected $nextTokensState; |
||||
| 68 | |||||
| 69 | /** |
||||
| 70 | * {@inheritdoc} |
||||
| 71 | * |
||||
| 72 | * The default implementation simply calls |
||||
| 73 | * {@see DefaultErrorStrategy::endErrorCondition()} to ensure that |
||||
| 74 | * the handler is not in error recovery mode. |
||||
| 75 | */ |
||||
| 76 | 7 | public function reset(Parser $recognizer) : void |
|||
| 77 | { |
||||
| 78 | 7 | $this->endErrorCondition($recognizer); |
|||
| 79 | 7 | } |
|||
| 80 | |||||
| 81 | /** |
||||
| 82 | * This method is called to enter error recovery mode when a recognition |
||||
| 83 | * exception is reported. |
||||
| 84 | * |
||||
| 85 | * @param Parser $recognizer The parser instance. |
||||
| 86 | */ |
||||
| 87 | 4 | protected function beginErrorCondition(Parser $recognizer) : void |
|||
|
0 ignored issues
–
show
|
|||||
| 88 | { |
||||
| 89 | 4 | $this->errorRecoveryMode = true; |
|||
| 90 | 4 | } |
|||
| 91 | |||||
| 92 | 7 | public function inErrorRecoveryMode(Parser $recognizer) : bool |
|||
| 93 | { |
||||
| 94 | 7 | return $this->errorRecoveryMode; |
|||
| 95 | } |
||||
| 96 | |||||
| 97 | /** |
||||
| 98 | * This method is called to leave error recovery mode after recovering from |
||||
| 99 | * a recognition exception. |
||||
| 100 | */ |
||||
| 101 | 7 | protected function endErrorCondition(Parser $recognizer) : void |
|||
|
0 ignored issues
–
show
The parameter
$recognizer is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||
| 102 | { |
||||
| 103 | 7 | $this->errorRecoveryMode = false; |
|||
| 104 | 7 | $this->lastErrorStates = null; |
|||
| 105 | 7 | $this->lastErrorIndex = -1; |
|||
| 106 | 7 | } |
|||
| 107 | |||||
| 108 | /** |
||||
| 109 | * {@inheritdoc} |
||||
| 110 | * |
||||
| 111 | * The default implementation simply calls |
||||
| 112 | * {@see DefaultErrorStrategy::endErrorCondition()}. |
||||
| 113 | */ |
||||
| 114 | 4 | public function reportMatch(Parser $recognizer) : void |
|||
| 115 | { |
||||
| 116 | 4 | $this->endErrorCondition($recognizer); |
|||
| 117 | 4 | } |
|||
| 118 | |||||
| 119 | /** |
||||
| 120 | * {@inheritdoc} |
||||
| 121 | * |
||||
| 122 | * The default implementation returns immediately if the handler is already |
||||
| 123 | * in error recovery mode. Otherwise, it calls |
||||
| 124 | * {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches |
||||
| 125 | * the reporting task based on the runtime type of `e` according to |
||||
| 126 | * the following table. |
||||
| 127 | * |
||||
| 128 | * - {@see NoViableAltException}: Dispatches the call to |
||||
| 129 | * {@see reportNoViableAlternative} |
||||
| 130 | * - {@see InputMismatchException}: Dispatches the call to |
||||
| 131 | * {@see reportInputMismatch} |
||||
| 132 | * - {@see FailedPredicateException}: Dispatches the call to |
||||
| 133 | * {@see reportFailedPredicate} |
||||
| 134 | * - All other types: calls {@see Parser#notifyErrorListeners} to report |
||||
| 135 | * the exception |
||||
| 136 | */ |
||||
| 137 | 4 | public function reportError(Parser $recognizer, RecognitionException $e) : void |
|||
| 138 | { |
||||
| 139 | // if we've already reported an error and have not matched a token |
||||
| 140 | // yet successfully, don't report any errors. |
||||
| 141 | 4 | if ($this->inErrorRecoveryMode($recognizer)) { |
|||
| 142 | // don't report spurious errors |
||||
| 143 | return; |
||||
| 144 | } |
||||
| 145 | |||||
| 146 | 4 | $this->beginErrorCondition($recognizer); |
|||
| 147 | |||||
| 148 | 4 | if ($e instanceof NoViableAltException) { |
|||
| 149 | $this->reportNoViableAlternative($recognizer, $e); |
||||
| 150 | 4 | } elseif ($e instanceof InputMismatchException) { |
|||
| 151 | 4 | $this->reportInputMismatch($recognizer, $e); |
|||
| 152 | } elseif ($e instanceof FailedPredicateException) { |
||||
| 153 | $this->reportFailedPredicate($recognizer, $e); |
||||
| 154 | } else { |
||||
| 155 | $recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e); |
||||
| 156 | } |
||||
| 157 | 4 | } |
|||
| 158 | |||||
| 159 | /** |
||||
| 160 | * {@inheritdoc} |
||||
| 161 | * |
||||
| 162 | * The default implementation resynchronizes the parser by consuming tokens |
||||
| 163 | * until we find one in the resynchronization set--loosely the set of tokens |
||||
| 164 | * that can follow the current rule. |
||||
| 165 | */ |
||||
| 166 | 4 | public function recover(Parser $recognizer, RecognitionException $e) : void |
|||
| 167 | { |
||||
| 168 | 4 | $inputStream = $recognizer->getInputStream(); |
|||
| 169 | |||||
| 170 | 4 | if ($inputStream === null) { |
|||
| 171 | throw new \RuntimeException('Unexpected null input stream.'); |
||||
| 172 | } |
||||
| 173 | |||||
| 174 | 4 | if ($this->lastErrorStates !== null |
|||
| 175 | 4 | && $this->lastErrorIndex === $inputStream->getIndex() |
|||
| 176 | 4 | && $this->lastErrorStates->contains($recognizer->getState()) |
|||
| 177 | ) { |
||||
| 178 | // uh oh, another error at same token index and previously-visited |
||||
| 179 | // state in ATN; must be a case where LT(1) is in the recovery |
||||
| 180 | // token set so nothing got consumed. Consume a single token |
||||
| 181 | // at least to prevent an infinite loop; this is a failsafe. |
||||
| 182 | $recognizer->consume(); |
||||
| 183 | } |
||||
| 184 | |||||
| 185 | 4 | $this->lastErrorIndex = $inputStream->getIndex(); |
|||
| 186 | |||||
| 187 | 4 | if ($this->lastErrorStates === null) { |
|||
| 188 | 4 | $this->lastErrorStates = new IntervalSet(); |
|||
| 189 | } |
||||
| 190 | |||||
| 191 | 4 | $this->lastErrorStates->addOne($recognizer->getState()); |
|||
| 192 | |||||
| 193 | 4 | $followSet = $this->getErrorRecoverySet($recognizer); |
|||
| 194 | |||||
| 195 | 4 | $this->consumeUntil($recognizer, $followSet); |
|||
| 196 | 4 | } |
|||
| 197 | |||||
| 198 | /** |
||||
| 199 | * The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure |
||||
| 200 | * that the current lookahead symbol is consistent with what were expecting |
||||
| 201 | * at this point in the ATN. You can call this anytime but ANTLR only |
||||
| 202 | * generates code to check before subrules/loops and each iteration. |
||||
| 203 | * |
||||
| 204 | * Implements Jim Idle's magic sync mechanism in closures and optional |
||||
| 205 | * subrules. E.g., |
||||
| 206 | * |
||||
| 207 | * a : sync ( stuff sync )* ; |
||||
| 208 | * sync : {consume to what can follow sync} ; |
||||
| 209 | * |
||||
| 210 | * At the start of a sub rule upon error, {@see sync} performs single |
||||
| 211 | * token deletion, if possible. If it can't do that, it bails on the current |
||||
| 212 | * rule and uses the default error recovery, which consumes until the |
||||
| 213 | * resynchronization set of the current rule. |
||||
| 214 | * |
||||
| 215 | * If the sub rule is optional (`(...)?`, `(...)*`, or block |
||||
| 216 | * with an empty alternative), then the expected set includes what follows |
||||
| 217 | * the subrule. |
||||
| 218 | * |
||||
| 219 | * During loop iteration, it consumes until it sees a token that can start a |
||||
| 220 | * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to |
||||
| 221 | * stay in the loop as long as possible. |
||||
| 222 | * |
||||
| 223 | * ORIGINS |
||||
| 224 | * |
||||
| 225 | * Previous versions of ANTLR did a poor job of their recovery within loops. |
||||
| 226 | * A single mismatch token or missing token would force the parser to bail |
||||
| 227 | * out of the entire rules surrounding the loop. So, for rule |
||||
| 228 | * |
||||
| 229 | * classDef : 'class' ID '{' member* '}' |
||||
| 230 | * |
||||
| 231 | * input with an extra token between members would force the parser to |
||||
| 232 | * consume until it found the next class definition rather than the next |
||||
| 233 | * member definition of the current class. |
||||
| 234 | * |
||||
| 235 | * This functionality cost a little bit of effort because the parser has to |
||||
| 236 | * compare token set at the start of the loop and at each iteration. If for |
||||
| 237 | * some reason speed is suffering for you, you can turn off this |
||||
| 238 | * functionality by simply overriding this method as a blank { }. |
||||
| 239 | * |
||||
| 240 | * @throws RecognitionException |
||||
| 241 | */ |
||||
| 242 | 7 | public function sync(Parser $recognizer) : void |
|||
| 243 | { |
||||
| 244 | 7 | $interpreter = $recognizer->getInterpreter(); |
|||
| 245 | |||||
| 246 | 7 | if ($interpreter === null) { |
|||
| 247 | throw new \RuntimeException('Unexpected null interpreter.'); |
||||
| 248 | } |
||||
| 249 | |||||
| 250 | /** @var ATNState $s */ |
||||
| 251 | 7 | $s = $interpreter->atn->states[$recognizer->getState()]; |
|||
| 252 | |||||
| 253 | // If already recovering, don't try to sync |
||||
| 254 | 7 | if ($this->inErrorRecoveryMode($recognizer)) { |
|||
| 255 | return; |
||||
| 256 | } |
||||
| 257 | |||||
| 258 | 7 | $tokens = $recognizer->getInputStream(); |
|||
| 259 | |||||
| 260 | 7 | if ($tokens === null) { |
|||
| 261 | throw new \RuntimeException('Unexpected null input stream.'); |
||||
| 262 | } |
||||
| 263 | |||||
| 264 | 7 | $la = $tokens->LA(1); |
|||
| 265 | |||||
| 266 | // try cheaper subset first; might get lucky. seems to shave a wee bit off |
||||
| 267 | 7 | $nextTokens = $recognizer->getATN()->nextTokens($s); |
|||
| 268 | |||||
| 269 | 7 | if ($nextTokens->contains($la)) { |
|||
| 270 | // We are sure the token matches |
||||
| 271 | 4 | $this->nextTokensContext = null; |
|||
| 272 | 4 | $this->nextTokensState = ATNState::INVALID_STATE_NUMBER; |
|||
| 273 | |||||
| 274 | 4 | return; |
|||
| 275 | } |
||||
| 276 | |||||
| 277 | 7 | if ($nextTokens->contains(Token::EPSILON)) { |
|||
| 278 | 4 | if ($this->nextTokensContext === null) { |
|||
| 279 | // It's possible the next token won't match; information tracked |
||||
| 280 | // by sync is restricted for performance. |
||||
| 281 | 4 | $this->nextTokensContext = $recognizer->getContext(); |
|||
| 282 | 4 | $this->nextTokensState = $recognizer->getState(); |
|||
| 283 | } |
||||
| 284 | 4 | return; |
|||
| 285 | } |
||||
| 286 | |||||
| 287 | 3 | switch ($s->getStateType()) { |
|||
| 288 | case ATNState::BLOCK_START: |
||||
| 289 | case ATNState::STAR_BLOCK_START: |
||||
| 290 | case ATNState::PLUS_BLOCK_START: |
||||
| 291 | case ATNState::STAR_LOOP_ENTRY: |
||||
| 292 | // report error and recover if possible |
||||
| 293 | 3 | if ($this->singleTokenDeletion($recognizer) !== null) { |
|||
| 294 | return; |
||||
| 295 | } |
||||
| 296 | |||||
| 297 | 3 | throw new InputMismatchException($recognizer); |
|||
| 298 | |||||
| 299 | case ATNState::PLUS_LOOP_BACK: |
||||
| 300 | case ATNState::STAR_LOOP_BACK: |
||||
| 301 | $this->reportUnwantedToken($recognizer); |
||||
| 302 | $expecting = $recognizer->getExpectedTokens(); |
||||
| 303 | $whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer)); |
||||
| 304 | $this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule); |
||||
| 305 | break; |
||||
| 306 | |||||
| 307 | default: |
||||
| 308 | // do nothing if we can't identify the exact kind of ATN state |
||||
| 309 | break; |
||||
| 310 | } |
||||
| 311 | } |
||||
| 312 | |||||
| 313 | /** |
||||
| 314 | * This is called by {@see DefaultErrorStrategy::reportError()} when |
||||
| 315 | * the exception is a {@see NoViableAltException}. |
||||
| 316 | * |
||||
| 317 | * @param Parser $recognizer The parser instance. |
||||
| 318 | * @param NoViableAltException $e The recognition exception. |
||||
| 319 | * |
||||
| 320 | * @see DefaultErrorStrategy::reportError() |
||||
| 321 | */ |
||||
| 322 | protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void |
||||
| 323 | { |
||||
| 324 | $tokens = $recognizer->getTokenStream(); |
||||
| 325 | |||||
| 326 | $input = '<unknown input>'; |
||||
| 327 | |||||
| 328 | if ($tokens !== null) { |
||||
| 329 | $startToken = $e->getStartToken(); |
||||
| 330 | |||||
| 331 | if ($startToken === null) { |
||||
| 332 | throw new \RuntimeException('Unexpected null start token.'); |
||||
| 333 | } |
||||
| 334 | |||||
| 335 | if ($startToken->getType() === Token::EOF) { |
||||
| 336 | $input = '<EOF>'; |
||||
| 337 | } else { |
||||
| 338 | $input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken()); |
||||
| 339 | } |
||||
| 340 | } |
||||
| 341 | |||||
| 342 | $msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input)); |
||||
| 343 | |||||
| 344 | $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); |
||||
| 345 | } |
||||
| 346 | |||||
| 347 | /** |
||||
| 348 | * This is called by {@see DefaultErrorStrategy::reportError()} when |
||||
| 349 | * the exception is an {@see InputMismatchException}. |
||||
| 350 | * |
||||
| 351 | * @param Parser $recognizer The parser instance. |
||||
| 352 | * @param InputMismatchException $e The recognition exception. |
||||
| 353 | * |
||||
| 354 | * @see DefaultErrorStrategy::reportError() |
||||
| 355 | */ |
||||
| 356 | 4 | protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void |
|||
| 357 | { |
||||
| 358 | 4 | $expectedTokens = $e->getExpectedTokens(); |
|||
| 359 | |||||
| 360 | 4 | if ($expectedTokens === null) { |
|||
| 361 | throw new \RuntimeException('Unexpected null expected tokens.'); |
||||
| 362 | } |
||||
| 363 | |||||
| 364 | 4 | $msg = \sprintf( |
|||
| 365 | 4 | 'mismatched input %s expecting %s', |
|||
| 366 | 4 | $this->getTokenErrorDisplay($e->getOffendingToken()), |
|||
| 367 | 4 | $expectedTokens->toStringVocabulary($recognizer->getVocabulary()) |
|||
| 368 | ); |
||||
| 369 | |||||
| 370 | 4 | $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); |
|||
| 371 | 4 | } |
|||
| 372 | |||||
| 373 | /** |
||||
| 374 | * This is called by {@see DefaultErrorStrategy::reportError()} when |
||||
| 375 | * the exception is a {@see FailedPredicateException}. |
||||
| 376 | * |
||||
| 377 | * @param Parser $recognizer The parser instance. |
||||
| 378 | * @param FailedPredicateException $e The recognition exception. |
||||
| 379 | * |
||||
| 380 | * @see DefaultErrorStrategy::reportError() |
||||
| 381 | */ |
||||
| 382 | protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void |
||||
| 383 | { |
||||
| 384 | $msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage()); |
||||
| 385 | |||||
| 386 | $recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); |
||||
| 387 | } |
||||
| 388 | |||||
| 389 | /** |
||||
| 390 | * This method is called to report a syntax error which requires the removal |
||||
| 391 | * of a token from the input stream. At the time this method is called, the |
||||
| 392 | * erroneous symbol is current `LT(1)` symbol and has not yet been |
||||
| 393 | * removed from the input stream. When this method returns, |
||||
| 394 | * `$recognizer` is in error recovery mode. |
||||
| 395 | * |
||||
| 396 | * This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()} |
||||
| 397 | * identifies single-token deletion as a viable recovery strategy for |
||||
| 398 | * a mismatched input error. |
||||
| 399 | * |
||||
| 400 | * The default implementation simply returns if the handler is already in |
||||
| 401 | * error recovery mode. Otherwise, it calls |
||||
| 402 | * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error |
||||
| 403 | * recovery mode, followed by calling {@see Parser::notifyErrorListeners}. |
||||
| 404 | * |
||||
| 405 | * @param Parser $recognizer The parser instance. |
||||
| 406 | */ |
||||
| 407 | protected function reportUnwantedToken(Parser $recognizer) : void |
||||
| 408 | { |
||||
| 409 | if ($this->inErrorRecoveryMode($recognizer)) { |
||||
| 410 | return; |
||||
| 411 | } |
||||
| 412 | |||||
| 413 | $this->beginErrorCondition($recognizer); |
||||
| 414 | |||||
| 415 | $t = $recognizer->getCurrentToken(); |
||||
| 416 | $tokenName = $this->getTokenErrorDisplay($t); |
||||
| 417 | $expecting = $this->getExpectedTokens($recognizer); |
||||
| 418 | |||||
| 419 | $msg = \sprintf( |
||||
| 420 | 'extraneous input %s expecting %s', |
||||
| 421 | $tokenName, |
||||
| 422 | $expecting->toStringVocabulary($recognizer->getVocabulary()) |
||||
| 423 | ); |
||||
| 424 | |||||
| 425 | $recognizer->notifyErrorListeners($msg, $t); |
||||
| 426 | } |
||||
| 427 | |||||
| 428 | /** |
||||
| 429 | * This method is called to report a syntax error which requires the |
||||
| 430 | * insertion of a missing token into the input stream. At the time this |
||||
| 431 | * method is called, the missing token has not yet been inserted. When this |
||||
| 432 | * method returns, `$recognizer` is in error recovery mode. |
||||
| 433 | * |
||||
| 434 | * This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()} |
||||
| 435 | * identifies single-token insertion as a viable recovery strategy for |
||||
| 436 | * a mismatched input error. |
||||
| 437 | * |
||||
| 438 | * The default implementation simply returns if the handler is already in |
||||
| 439 | * error recovery mode. Otherwise, it calls |
||||
| 440 | * {@see DefaultErrorStrategy::beginErrorCondition()} to enter error |
||||
| 441 | * recovery mode, followed by calling {@see Parser::notifyErrorListeners()}. |
||||
| 442 | * |
||||
| 443 | * @param Parser $recognizer the parser instance |
||||
| 444 | */ |
||||
| 445 | protected function reportMissingToken(Parser $recognizer) : void |
||||
| 446 | { |
||||
| 447 | if ($this->inErrorRecoveryMode($recognizer)) { |
||||
| 448 | return; |
||||
| 449 | } |
||||
| 450 | |||||
| 451 | $this->beginErrorCondition($recognizer); |
||||
| 452 | |||||
| 453 | $t = $recognizer->getCurrentToken(); |
||||
| 454 | $expecting = $this->getExpectedTokens($recognizer); |
||||
| 455 | |||||
| 456 | $msg = \sprintf( |
||||
| 457 | 'missing %s at %s', |
||||
| 458 | $expecting->toStringVocabulary($recognizer->getVocabulary()), |
||||
| 459 | $this->getTokenErrorDisplay($t) |
||||
| 460 | ); |
||||
| 461 | |||||
| 462 | $recognizer->notifyErrorListeners($msg, $t); |
||||
| 463 | } |
||||
| 464 | |||||
| 465 | /** |
||||
| 466 | * {@inheritdoc} |
||||
| 467 | * |
||||
| 468 | * The default implementation attempts to recover from the mismatched input |
||||
| 469 | * by using single token insertion and deletion as described below. If the |
||||
| 470 | * recovery attempt fails, this method throws an |
||||
| 471 | * {@see InputMismatchException}. |
||||
| 472 | * |
||||
| 473 | * EXTRA TOKEN (single token deletion) |
||||
| 474 | * |
||||
| 475 | * `LA(1)` is not what we are looking for. If `LA(2)` has the |
||||
| 476 | * right token, however, then assume `LA(1)` is some extra spurious |
||||
| 477 | * token and delete it. Then consume and return the next token (which was |
||||
| 478 | * the `LA(2)` token) as the successful result of the match operation. |
||||
| 479 | * |
||||
| 480 | * This recovery strategy is implemented by |
||||
| 481 | * {@see DefaultErrorStrategy::singleTokenDeletion()}. |
||||
| 482 | * |
||||
| 483 | * MISSING TOKEN (single token insertion) |
||||
| 484 | * |
||||
| 485 | * If current token (at `LA(1)`) is consistent with what could come |
||||
| 486 | * after the expected `LA(1)` token, then assume the token is missing |
||||
| 487 | * and use the parser's {@see TokenFactory} to create it on the fly. The |
||||
| 488 | * "insertion" is performed by returning the created token as the successful |
||||
| 489 | * result of the match operation. |
||||
| 490 | * |
||||
| 491 | * This recovery strategy is implemented by |
||||
| 492 | * {@see DefaultErrorStrategy::singleTokenInsertion()}. |
||||
| 493 | * |
||||
| 494 | * EXAMPLE |
||||
| 495 | * |
||||
| 496 | * For example, Input `i=(3;` is clearly missing the `')'`. When |
||||
| 497 | * the parser returns from the nested call to `expr`, it will have |
||||
| 498 | * call chain: |
||||
| 499 | * |
||||
| 500 | * stat → expr → atom |
||||
| 501 | * |
||||
| 502 | * and it will be trying to match the `')'` at this point in the |
||||
| 503 | * derivation: |
||||
| 504 | * |
||||
| 505 | * => ID '=' '(' INT ')' ('+' atom)* ';' |
||||
| 506 | * ^ |
||||
| 507 | * |
||||
| 508 | * The attempt to match `')'` will fail when it sees `';'` and call |
||||
| 509 | * {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that |
||||
| 510 | * `LA(1)==';'` is in the set of tokens that can follow the `')'` token |
||||
| 511 | * reference in rule `atom`. It can assume that you forgot the `')'`. |
||||
| 512 | * |
||||
| 513 | * @throws RecognitionException |
||||
| 514 | */ |
||||
| 515 | 1 | public function recoverInline(Parser $recognizer) : Token |
|||
| 516 | { |
||||
| 517 | // SINGLE TOKEN DELETION |
||||
| 518 | 1 | $matchedSymbol = $this->singleTokenDeletion($recognizer); |
|||
| 519 | |||||
| 520 | 1 | if ($matchedSymbol !== null) { |
|||
| 521 | // we have deleted the extra token. |
||||
| 522 | // now, move past ttype token as if all were ok |
||||
| 523 | $recognizer->consume(); |
||||
| 524 | |||||
| 525 | return $matchedSymbol; |
||||
| 526 | } |
||||
| 527 | |||||
| 528 | // SINGLE TOKEN INSERTION |
||||
| 529 | 1 | if ($this->singleTokenInsertion($recognizer)) { |
|||
| 530 | return $this->getMissingSymbol($recognizer); |
||||
| 531 | } |
||||
| 532 | |||||
| 533 | // even that didn't work; must throw the exception |
||||
| 534 | 1 | if ($this->nextTokensContext === null) { |
|||
| 535 | throw new InputMismatchException($recognizer); |
||||
| 536 | } |
||||
| 537 | |||||
| 538 | 1 | throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext); |
|||
| 539 | } |
||||
| 540 | |||||
| 541 | /** |
||||
| 542 | * This method implements the single-token insertion inline error recovery |
||||
| 543 | * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} |
||||
| 544 | * if the single-token deletion strategy fails to recover from the mismatched |
||||
| 545 | * input. If this method returns `true`, `$recognizer` will be in error |
||||
| 546 | * recovery mode. |
||||
| 547 | * |
||||
| 548 | * This method determines whether or not single-token insertion is viable by |
||||
| 549 | * checking if the `LA(1)` input symbol could be successfully matched |
||||
| 550 | * if it were instead the `LA(2)` symbol. If this method returns |
||||
| 551 | * `true`, the caller is responsible for creating and inserting a |
||||
| 552 | * token with the correct type to produce this behavior. |
||||
| 553 | * |
||||
| 554 | * @param Parser $recognizer The parser instance. |
||||
| 555 | * |
||||
| 556 | * @return bool `true` If single-token insertion is a viable recovery |
||||
| 557 | * strategy for the current mismatched input, otherwise `false`. |
||||
| 558 | */ |
||||
| 559 | 1 | protected function singleTokenInsertion(Parser $recognizer) : bool |
|||
| 560 | { |
||||
| 561 | 1 | $stream = $recognizer->getInputStream(); |
|||
| 562 | |||||
| 563 | 1 | if ($stream === null) { |
|||
| 564 | throw new \RuntimeException('Unexpected null input stream.'); |
||||
| 565 | } |
||||
| 566 | |||||
| 567 | 1 | $interpreter = $recognizer->getInterpreter(); |
|||
| 568 | |||||
| 569 | 1 | if ($interpreter === null) { |
|||
| 570 | throw new \RuntimeException('Unexpected null interpreter.'); |
||||
| 571 | } |
||||
| 572 | |||||
| 573 | 1 | $currentSymbolType = $stream->LA(1); |
|||
| 574 | |||||
| 575 | // if current token is consistent with what could come after current |
||||
| 576 | // ATN state, then we know we're missing a token; error recovery |
||||
| 577 | // is free to conjure up and insert the missing token |
||||
| 578 | |||||
| 579 | 1 | $atn = $interpreter->atn; |
|||
| 580 | /** @var ATNState $currentState */ |
||||
| 581 | 1 | $currentState = $atn->states[$recognizer->getState()]; |
|||
| 582 | 1 | $next = $currentState->getTransition(0)->target; |
|||
| 583 | 1 | $expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext()); |
|||
| 584 | |||||
| 585 | 1 | if ($expectingAtLL2->contains($currentSymbolType)) { |
|||
| 586 | $this->reportMissingToken($recognizer); |
||||
| 587 | |||||
| 588 | return true; |
||||
| 589 | } |
||||
| 590 | |||||
| 591 | 1 | return false; |
|||
| 592 | } |
||||
| 593 | |||||
| 594 | /** |
||||
| 595 | * This method implements the single-token deletion inline error recovery |
||||
| 596 | * strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} |
||||
| 597 | * to attempt to recover from mismatched input. If this method returns null, |
||||
| 598 | * the parser and error handler state will not have changed. If this method |
||||
| 599 | * returns non-null, `$recognizer` will _not_ be in error recovery mode |
||||
| 600 | * since the returned token was a successful match. |
||||
| 601 | * |
||||
| 602 | * If the single-token deletion is successful, this method calls |
||||
| 603 | * {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error, |
||||
| 604 | * followed by {@see Parser::consume()} to actually "delete" the extraneous |
||||
| 605 | * token. Then, before returning {@see DefaultErrorStrategy::reportMatch()} |
||||
| 606 | * is called to signal a successful match. |
||||
| 607 | * |
||||
| 608 | * @param Parser $recognizer The parser instance. |
||||
| 609 | * |
||||
| 610 | * @return Token The successfully matched {@see Token} instance if |
||||
| 611 | * single-token deletion successfully recovers from |
||||
| 612 | * the mismatched input, otherwise `null`. |
||||
| 613 | */ |
||||
| 614 | 4 | protected function singleTokenDeletion(Parser $recognizer) : ?Token |
|||
| 615 | { |
||||
| 616 | 4 | $inputStream = $recognizer->getInputStream(); |
|||
| 617 | |||||
| 618 | 4 | if ($inputStream === null) { |
|||
| 619 | throw new \RuntimeException('Unexpected null input stream.'); |
||||
| 620 | } |
||||
| 621 | |||||
| 622 | 4 | $nextTokenType = $inputStream->LA(2); |
|||
| 623 | 4 | $expecting = $this->getExpectedTokens($recognizer); |
|||
| 624 | |||||
| 625 | 4 | if ($expecting->contains($nextTokenType)) { |
|||
| 626 | $this->reportUnwantedToken($recognizer); |
||||
| 627 | $recognizer->consume(); // simply delete extra token |
||||
| 628 | // we want to return the token we're actually matching |
||||
| 629 | $matchedSymbol = $recognizer->getCurrentToken(); |
||||
| 630 | $this->reportMatch($recognizer); // we know current token is correct |
||||
| 631 | |||||
| 632 | return $matchedSymbol; |
||||
| 633 | } |
||||
| 634 | |||||
| 635 | 4 | return null; |
|||
| 636 | } |
||||
| 637 | |||||
| 638 | /** Conjure up a missing token during error recovery. |
||||
| 639 | * |
||||
| 640 | * The recognizer attempts to recover from single missing |
||||
| 641 | * symbols. But, actions might refer to that missing symbol. |
||||
| 642 | * For example, x=ID {f($x);}. The action clearly assumes |
||||
| 643 | * that there has been an identifier matched previously and that |
||||
| 644 | * $x points at that token. If that token is missing, but |
||||
| 645 | * the next token in the stream is what we want we assume that |
||||
| 646 | * this token is missing and we keep going. Because we |
||||
| 647 | * have to return some token to replace the missing token, |
||||
| 648 | * we have to conjure one up. This method gives the user control |
||||
| 649 | * over the tokens returned for missing tokens. Mostly, |
||||
| 650 | * you will want to create something special for identifier |
||||
| 651 | * tokens. For literals such as '{' and ',', the default |
||||
| 652 | * action in the parser or tree parser works. It simply creates |
||||
| 653 | * a CommonToken of the appropriate type. The text will be the token. |
||||
| 654 | * If you change what tokens must be created by the lexer, |
||||
| 655 | * override this method to create the appropriate tokens. |
||||
| 656 | */ |
||||
| 657 | protected function getMissingSymbol(Parser $recognizer) : Token |
||||
| 658 | { |
||||
| 659 | $currentSymbol = $recognizer->getCurrentToken(); |
||||
| 660 | |||||
| 661 | if ($currentSymbol === null) { |
||||
| 662 | throw new \RuntimeException('Unexpected null current token.'); |
||||
| 663 | } |
||||
| 664 | |||||
| 665 | $inputStream = $recognizer->getInputStream(); |
||||
| 666 | |||||
| 667 | if ($inputStream === null) { |
||||
| 668 | throw new \RuntimeException('Unexpected null input stream.'); |
||||
| 669 | } |
||||
| 670 | |||||
| 671 | $tokenSource = $currentSymbol->getTokenSource(); |
||||
| 672 | |||||
| 673 | if ($tokenSource === null) { |
||||
| 674 | throw new \RuntimeException('Unexpected null token source.'); |
||||
| 675 | } |
||||
| 676 | |||||
| 677 | $expecting = $this->getExpectedTokens($recognizer); |
||||
| 678 | |||||
| 679 | $expectedTokenType = Token::INVALID_TYPE; |
||||
| 680 | |||||
| 681 | if (!$expecting->isNull()) { |
||||
| 682 | $expectedTokenType = $expecting->getMinElement(); // get any element |
||||
| 683 | } |
||||
| 684 | |||||
| 685 | if ($expectedTokenType === Token::EOF) { |
||||
| 686 | $tokenText = '<missing EOF>'; |
||||
| 687 | } else { |
||||
| 688 | $tokenText = \sprintf('<missing %s>', $recognizer->getVocabulary()->getDisplayName($expectedTokenType)); |
||||
| 689 | } |
||||
| 690 | |||||
| 691 | $current = $currentSymbol; |
||||
| 692 | $lookback = $inputStream->LT(-1); |
||||
| 693 | |||||
| 694 | if ($current->getType() === Token::EOF && $lookback !== null) { |
||||
| 695 | $current = $lookback; |
||||
| 696 | } |
||||
| 697 | |||||
| 698 | return $recognizer->getTokenFactory()->createEx( |
||||
| 699 | new Pair( |
||||
| 700 | $tokenSource, |
||||
| 701 | $tokenSource->getInputStream() |
||||
| 702 | ), |
||||
| 703 | $expectedTokenType, |
||||
| 704 | $tokenText, |
||||
| 705 | Token::DEFAULT_CHANNEL, |
||||
| 706 | -1, |
||||
| 707 | -1, |
||||
| 708 | $current->getLine(), |
||||
| 709 | $current->getCharPositionInLine() |
||||
| 710 | ); |
||||
| 711 | } |
||||
| 712 | |||||
| 713 | 4 | protected function getExpectedTokens(Parser $recognizer) : IntervalSet |
|||
| 714 | { |
||||
| 715 | 4 | return $recognizer->getExpectedTokens(); |
|||
| 716 | } |
||||
| 717 | |||||
| 718 | /** |
||||
| 719 | * How should a token be displayed in an error message? The default |
||||
| 720 | * is to display just the text, but during development you might |
||||
| 721 | * want to have a lot of information spit out. Override in that case |
||||
| 722 | * to use (string) (which, for CommonToken, dumps everything about |
||||
| 723 | * the token). This is better than forcing you to override a method in |
||||
| 724 | * your token objects because you don't have to go modify your lexer |
||||
| 725 | * so that it creates a new Java type. |
||||
| 726 | */ |
||||
| 727 | 4 | protected function getTokenErrorDisplay(?Token $t) : string |
|||
| 728 | { |
||||
| 729 | 4 | if ($t === null) { |
|||
| 730 | return '<no token>'; |
||||
| 731 | } |
||||
| 732 | |||||
| 733 | 4 | $s = $this->getSymbolText($t); |
|||
| 734 | |||||
| 735 | 4 | if ($s === null) { |
|||
| 736 | if ($this->getSymbolType($t) === Token::EOF) { |
||||
| 737 | $s = '<EOF>'; |
||||
| 738 | } else { |
||||
| 739 | $s = '<' . $this->getSymbolType($t) . '>'; |
||||
| 740 | } |
||||
| 741 | } |
||||
| 742 | |||||
| 743 | 4 | return $this->escapeWSAndQuote($s); |
|||
| 744 | } |
||||
| 745 | |||||
| 746 | 4 | protected function getSymbolText(Token $symbol) : ?string |
|||
| 747 | { |
||||
| 748 | 4 | return $symbol->getText(); |
|||
| 749 | } |
||||
| 750 | |||||
| 751 | protected function getSymbolType(Token $symbol) : int |
||||
| 752 | { |
||||
| 753 | return $symbol->getType(); |
||||
| 754 | } |
||||
| 755 | |||||
| 756 | 4 | protected function escapeWSAndQuote(string $s) : string |
|||
| 757 | { |
||||
| 758 | 4 | return "'" . StringUtils::escapeWhitespace($s) . "'"; |
|||
| 759 | } |
||||
| 760 | |||||
| 761 | /** |
||||
| 762 | * Compute the error recovery set for the current rule. During |
||||
| 763 | * rule invocation, the parser pushes the set of tokens that can |
||||
| 764 | * follow that rule reference on the stack; this amounts to |
||||
| 765 | * computing FIRST of what follows the rule reference in the |
||||
| 766 | * enclosing rule. See LinearApproximator::FIRST. |
||||
| 767 | * This local follow set only includes tokens |
||||
| 768 | * from within the rule; i.e., the FIRST computation done by |
||||
| 769 | * ANTLR stops at the end of a rule. |
||||
| 770 | * |
||||
| 771 | * EXAMPLE |
||||
| 772 | * |
||||
| 773 | * When you find a "no viable alt exception", the input is not |
||||
| 774 | * consistent with any of the alternatives for rule r. The best |
||||
| 775 | * thing to do is to consume tokens until you see something that |
||||
| 776 | * can legally follow a call to r *or* any rule that called r. |
||||
| 777 | * You don't want the exact set of viable next tokens because the |
||||
| 778 | * input might just be missing a token--you might consume the |
||||
| 779 | * rest of the input looking for one of the missing tokens. |
||||
| 780 | * |
||||
| 781 | * Consider grammar: |
||||
| 782 | * |
||||
| 783 | * a : '[' b ']' |
||||
| 784 | * | '(' b ')' |
||||
| 785 | * ; |
||||
| 786 | * b : c '^' INT ; |
||||
| 787 | * c : ID |
||||
| 788 | * | INT |
||||
| 789 | * ; |
||||
| 790 | * |
||||
| 791 | * At each rule invocation, the set of tokens that could follow |
||||
| 792 | * that rule is pushed on a stack. Here are the various |
||||
| 793 | * context-sensitive follow sets: |
||||
| 794 | * |
||||
| 795 | * FOLLOW(b1_in_a) = FIRST(']') = ']' |
||||
| 796 | * FOLLOW(b2_in_a) = FIRST(')') = ')' |
||||
| 797 | * FOLLOW(c_in_b) = FIRST('^') = '^' |
||||
| 798 | * |
||||
| 799 | * Upon erroneous input "[]", the call chain is |
||||
| 800 | * |
||||
| 801 | * a -> b -> c |
||||
| 802 | * |
||||
| 803 | * and, hence, the follow context stack is: |
||||
| 804 | * |
||||
| 805 | * depth | follow set | start of rule execution |
||||
| 806 | * ------|------------|------------------------- |
||||
| 807 | * 0 | <EOF> | a (from main()) |
||||
| 808 | * 1 | ']' | b |
||||
| 809 | * 2 | '^' | c |
||||
| 810 | * |
||||
| 811 | * Notice that ')' is not included, because b would have to have |
||||
| 812 | * been called from a different context in rule a for ')' to be |
||||
| 813 | * included. |
||||
| 814 | * |
||||
| 815 | * For error recovery, we cannot consider FOLLOW(c) |
||||
| 816 | * (context-sensitive or otherwise). We need the combined set of |
||||
| 817 | * all context-sensitive FOLLOW sets--the set of all tokens that |
||||
| 818 | * could follow any reference in the call chain. We need to |
||||
| 819 | * resync to one of those tokens. Note that FOLLOW(c)='^' and if |
||||
| 820 | * we resync'd to that token, we'd consume until EOF. We need to |
||||
| 821 | * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. |
||||
| 822 | * In this case, for input "[]", LA(1) is ']' and in the set, so we would |
||||
| 823 | * not consume anything. After printing an error, rule c would |
||||
| 824 | * return normally. Rule b would not find the required '^' though. |
||||
| 825 | * At this point, it gets a mismatched token error and throws an |
||||
| 826 | * exception (since LA(1) is not in the viable following token |
||||
| 827 | * set). The rule exception handler tries to recover, but finds |
||||
| 828 | * the same recovery set and doesn't consume anything. Rule b |
||||
| 829 | * exits normally returning to rule a. Now it finds the ']' (and |
||||
| 830 | * with the successful match exits errorRecovery mode). |
||||
| 831 | * |
||||
| 832 | * So, you can see that the parser walks up the call chain looking |
||||
| 833 | * for the token that was a member of the recovery set. |
||||
| 834 | * |
||||
| 835 | * Errors are not generated in errorRecovery mode. |
||||
| 836 | * |
||||
| 837 | * ANTLR's error recovery mechanism is based upon original ideas: |
||||
| 838 | * |
||||
| 839 | * "Algorithms + Data Structures = Programs" by Niklaus Wirth |
||||
| 840 | * |
||||
| 841 | * and |
||||
| 842 | * |
||||
| 843 | * "A note on error recovery in recursive descent parsers": |
||||
| 844 | * http://portal.acm.org/citation.cfm?id=947902.947905 |
||||
| 845 | * |
||||
| 846 | * Later, Josef Grosch had some good ideas: |
||||
| 847 | * |
||||
| 848 | * "Efficient and Comfortable Error Recovery in Recursive Descent |
||||
| 849 | * Parsers": |
||||
| 850 | * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip |
||||
| 851 | * |
||||
| 852 | * Like Grosch I implement context-sensitive FOLLOW sets that are combined |
||||
| 853 | * at run-time upon error to avoid overhead during parsing. |
||||
| 854 | */ |
||||
| 855 | 4 | protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet |
|||
| 856 | { |
||||
| 857 | 4 | $interpreter = $recognizer->getInterpreter(); |
|||
| 858 | |||||
| 859 | 4 | if ($interpreter === null) { |
|||
| 860 | throw new \RuntimeException('Unexpected null interpreter.'); |
||||
| 861 | } |
||||
| 862 | |||||
| 863 | 4 | $atn = $interpreter->atn; |
|||
| 864 | 4 | $ctx = $recognizer->getContext(); |
|||
| 865 | 4 | $recoverSet = new IntervalSet(); |
|||
| 866 | |||||
| 867 | 4 | while ($ctx !== null && $ctx->invokingState >= 0) { |
|||
| 868 | // compute what follows who invoked us |
||||
| 869 | /** @var ATNState $invokingState */ |
||||
| 870 | $invokingState = $atn->states[$ctx->invokingState]; |
||||
| 871 | /** @var RuleTransition $rt */ |
||||
| 872 | $rt = $invokingState->getTransition(0); |
||||
| 873 | $follow = $atn->nextTokens($rt->followState); |
||||
| 874 | $recoverSet->addSet($follow); |
||||
| 875 | $ctx = $ctx->getParent(); |
||||
| 876 | } |
||||
| 877 | |||||
| 878 | 4 | $recoverSet->removeOne(Token::EPSILON); |
|||
| 879 | |||||
| 880 | 4 | return $recoverSet; |
|||
| 881 | } |
||||
| 882 | |||||
| 883 | /** |
||||
| 884 | * Consume tokens until one matches the given token set. |
||||
| 885 | */ |
||||
| 886 | 4 | protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void |
|||
| 887 | { |
||||
| 888 | 4 | $inputStream = $recognizer->getInputStream(); |
|||
| 889 | |||||
| 890 | 4 | if ($inputStream === null) { |
|||
| 891 | throw new \RuntimeException('Unexpected null input stream.'); |
||||
| 892 | } |
||||
| 893 | |||||
| 894 | 4 | $ttype = $inputStream->LA(1); |
|||
| 895 | |||||
| 896 | 4 | while ($ttype !== Token::EOF && !$set->contains($ttype)) { |
|||
| 897 | 1 | $recognizer->consume(); |
|||
| 898 | 1 | $ttype = $inputStream->LA(1); |
|||
| 899 | } |
||||
| 900 | 4 | } |
|||
| 901 | } |
||||
| 902 |
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.