|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace Antlr\Antlr4\Runtime\Error; |
|
6
|
|
|
|
|
7
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\ATNState; |
|
8
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; |
|
9
|
|
|
use Antlr\Antlr4\Runtime\Error\Exceptions\FailedPredicateException; |
|
10
|
|
|
use Antlr\Antlr4\Runtime\Error\Exceptions\InputMismatchException; |
|
11
|
|
|
use Antlr\Antlr4\Runtime\Error\Exceptions\NoViableAltException; |
|
12
|
|
|
use Antlr\Antlr4\Runtime\Error\Exceptions\RecognitionException; |
|
13
|
|
|
use Antlr\Antlr4\Runtime\IntervalSet; |
|
14
|
|
|
use Antlr\Antlr4\Runtime\Parser; |
|
15
|
|
|
use Antlr\Antlr4\Runtime\ParserRuleContext; |
|
16
|
|
|
use Antlr\Antlr4\Runtime\Token; |
|
17
|
|
|
use Antlr\Antlr4\Runtime\Utils\Pair; |
|
18
|
|
|
use Antlr\Antlr4\Runtime\Utils\StringUtils; |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* This is the default implementation of {@see ANTLRErrorStrategy} used for |
|
22
|
|
|
* error reporting and recovery in ANTLR parsers. |
|
23
|
|
|
*/ |
|
24
|
|
|
class DefaultErrorStrategy implements ANTLRErrorStrategy |
|
25
|
|
|
{ |
|
26
|
|
|
/** |
|
27
|
|
|
* Indicates whether the error strategy is currently "recovering from an |
|
28
|
|
|
* error". This is used to suppress reporting multiple error messages while |
|
29
|
|
|
* attempting to recover from a detected syntax error. |
|
30
|
|
|
* |
|
31
|
|
|
* @see DefaultErrorStrategy::inErrorRecoveryMode() |
|
32
|
|
|
* |
|
33
|
|
|
* @var bool |
|
34
|
|
|
*/ |
|
35
|
|
|
protected $errorRecoveryMode = false; |
|
36
|
|
|
|
|
37
|
|
|
/** The index into the input stream where the last error occurred. |
|
38
|
|
|
* This is used to prevent infinite loops where an error is found |
|
39
|
|
|
* but no token is consumed during recovery...another error is found, |
|
40
|
|
|
* ad nauseum. This is a failsafe mechanism to guarantee that at least |
|
41
|
|
|
* one token/tree node is consumed for two errors. |
|
42
|
|
|
* |
|
43
|
|
|
* @var int |
|
44
|
|
|
*/ |
|
45
|
|
|
protected $lastErrorIndex = -1; |
|
46
|
|
|
|
|
47
|
|
|
/** @var IntervalSet|null */ |
|
48
|
|
|
protected $lastErrorStates; |
|
49
|
|
|
|
|
50
|
|
|
/** |
|
51
|
|
|
* This field is used to propagate information about the lookahead following |
|
52
|
|
|
* the previous match. Since prediction prefers completing the current rule |
|
53
|
|
|
* to error recovery efforts, error reporting may occur later than the |
|
54
|
|
|
* original point where it was discoverable. The original context is used to |
|
55
|
|
|
* compute the true expected sets as though the reporting occurred as early |
|
56
|
|
|
* as possible. |
|
57
|
|
|
* |
|
58
|
|
|
* @var ParserRuleContext|null |
|
59
|
|
|
*/ |
|
60
|
|
|
protected $nextTokensContext; |
|
61
|
|
|
|
|
62
|
|
|
/** |
|
63
|
|
|
* @see DefaultErrorStrategy::$nextTokensContext |
|
64
|
|
|
* |
|
65
|
|
|
* @var int|null |
|
66
|
|
|
*/ |
|
67
|
|
|
protected $nextTokensState; |
|
68
|
|
|
|
|
69
|
|
|
/** |
|
70
|
|
|
* {@inheritdoc} |
|
71
|
|
|
* |
|
72
|
|
|
* The default implementation simply calls |
|
73
|
|
|
* {@see DefaultErrorStrategy::endErrorCondition()} to ensure that |
|
74
|
|
|
* the handler is not in error recovery mode. |
|
75
|
|
|
*/ |
|
76
|
7 |
|
public function reset(Parser $recognizer) : void |
|
77
|
|
|
{ |
|
78
|
7 |
|
$this->endErrorCondition($recognizer); |
|
79
|
7 |
|
} |
|
80
|
|
|
|
|
81
|
|
|
/** |
|
82
|
|
|
* This method is called to enter error recovery mode when a recognition |
|
83
|
|
|
* exception is reported. |
|
84
|
|
|
* |
|
85
|
|
|
* @param Parser $recognizer The parser instance. |
|
86
|
|
|
*/ |
|
87
|
4 |
|
protected function beginErrorCondition(Parser $recognizer) : void |
|
|
|
|
|
|
88
|
|
|
{ |
|
89
|
4 |
|
$this->errorRecoveryMode = true; |
|
90
|
4 |
|
} |
|
91
|
|
|
|
|
92
|
7 |
|
public function inErrorRecoveryMode(Parser $recognizer) : bool |
|
93
|
|
|
{ |
|
94
|
7 |
|
return $this->errorRecoveryMode; |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
/** |
|
98
|
|
|
* This method is called to leave error recovery mode after recovering from |
|
99
|
|
|
* a recognition exception. |
|
100
|
|
|
*/ |
|
101
|
7 |
|
protected function endErrorCondition(Parser $recognizer) : void |
|
|
|
|
|
|
102
|
|
|
{ |
|
103
|
7 |
|
$this->errorRecoveryMode = false; |
|
104
|
7 |
|
$this->lastErrorStates = null; |
|
105
|
7 |
|
$this->lastErrorIndex = -1; |
|
106
|
7 |
|
} |
|
107
|
|
|
|
|
108
|
|
|
/** |
|
109
|
|
|
* {@inheritdoc} |
|
110
|
|
|
* |
|
111
|
|
|
* The default implementation simply calls |
|
112
|
|
|
* {@see DefaultErrorStrategy::endErrorCondition()}. |
|
113
|
|
|
*/ |
|
114
|
4 |
|
public function reportMatch(Parser $recognizer) : void |
|
115
|
|
|
{ |
|
116
|
4 |
|
$this->endErrorCondition($recognizer); |
|
117
|
4 |
|
} |
|
118
|
|
|
|
|
119
|
|
|
/** |
|
120
|
|
|
* {@inheritdoc} |
|
121
|
|
|
* |
|
122
|
|
|
* The default implementation returns immediately if the handler is already |
|
123
|
|
|
* in error recovery mode. Otherwise, it calls |
|
124
|
|
|
* {@see DefaultErrorStrategy::beginErrorCondition()} and dispatches |
|
125
|
|
|
* the reporting task based on the runtime type of `e` according to |
|
126
|
|
|
* the following table. |
|
127
|
|
|
* |
|
128
|
|
|
* - {@see NoViableAltException}: Dispatches the call to |
|
129
|
|
|
* {@see reportNoViableAlternative} |
|
130
|
|
|
* - {@see InputMismatchException}: Dispatches the call to |
|
131
|
|
|
* {@see reportInputMismatch} |
|
132
|
|
|
* - {@see FailedPredicateException}: Dispatches the call to |
|
133
|
|
|
* {@see reportFailedPredicate} |
|
134
|
|
|
* - All other types: calls {@see Parser#notifyErrorListeners} to report |
|
135
|
|
|
* the exception |
|
136
|
|
|
*/ |
|
137
|
4 |
|
public function reportError(Parser $recognizer, RecognitionException $e) : void |
|
138
|
|
|
{ |
|
139
|
|
|
// if we've already reported an error and have not matched a token |
|
140
|
|
|
// yet successfully, don't report any errors. |
|
141
|
4 |
|
if ($this->inErrorRecoveryMode($recognizer)) { |
|
142
|
|
|
// don't report spurious errors |
|
143
|
|
|
return; |
|
144
|
|
|
} |
|
145
|
|
|
|
|
146
|
4 |
|
$this->beginErrorCondition($recognizer); |
|
147
|
|
|
|
|
148
|
4 |
|
if ($e instanceof NoViableAltException) { |
|
149
|
|
|
$this->reportNoViableAlternative($recognizer, $e); |
|
150
|
4 |
|
} elseif ($e instanceof InputMismatchException) { |
|
151
|
4 |
|
$this->reportInputMismatch($recognizer, $e); |
|
152
|
|
|
} elseif ($e instanceof FailedPredicateException) { |
|
153
|
|
|
$this->reportFailedPredicate($recognizer, $e); |
|
154
|
|
|
} else { |
|
155
|
|
|
$recognizer->notifyErrorListeners($e->getMessage(), $e->getOffendingToken(), $e); |
|
156
|
|
|
} |
|
157
|
4 |
|
} |
|
158
|
|
|
|
|
159
|
|
|
/** |
|
160
|
|
|
* {@inheritdoc} |
|
161
|
|
|
* |
|
162
|
|
|
* The default implementation resynchronizes the parser by consuming tokens |
|
163
|
|
|
* until we find one in the resynchronization set--loosely the set of tokens |
|
164
|
|
|
* that can follow the current rule. |
|
165
|
|
|
*/ |
|
166
|
4 |
|
public function recover(Parser $recognizer, RecognitionException $e) : void |
|
167
|
|
|
{ |
|
168
|
4 |
|
$inputStream = $recognizer->getInputStream(); |
|
169
|
|
|
|
|
170
|
4 |
|
if ($inputStream === null) { |
|
171
|
|
|
throw new \RuntimeException('Unexpected null input stream.'); |
|
172
|
|
|
} |
|
173
|
|
|
|
|
174
|
4 |
|
if ($this->lastErrorStates !== null |
|
175
|
4 |
|
&& $this->lastErrorIndex === $inputStream->getIndex() |
|
176
|
4 |
|
&& $this->lastErrorStates->contains($recognizer->getState()) |
|
177
|
|
|
) { |
|
178
|
|
|
// uh oh, another error at same token index and previously-visited |
|
179
|
|
|
// state in ATN; must be a case where LT(1) is in the recovery |
|
180
|
|
|
// token set so nothing got consumed. Consume a single token |
|
181
|
|
|
// at least to prevent an infinite loop; this is a failsafe. |
|
182
|
|
|
$recognizer->consume(); |
|
183
|
|
|
} |
|
184
|
|
|
|
|
185
|
4 |
|
$this->lastErrorIndex = $inputStream->getIndex(); |
|
186
|
|
|
|
|
187
|
4 |
|
if ($this->lastErrorStates === null) { |
|
188
|
4 |
|
$this->lastErrorStates = new IntervalSet(); |
|
189
|
|
|
} |
|
190
|
|
|
|
|
191
|
4 |
|
$this->lastErrorStates->addOne($recognizer->getState()); |
|
192
|
|
|
|
|
193
|
4 |
|
$followSet = $this->getErrorRecoverySet($recognizer); |
|
194
|
|
|
|
|
195
|
4 |
|
$this->consumeUntil($recognizer, $followSet); |
|
196
|
4 |
|
} |
|
197
|
|
|
|
|
198
|
|
|
/** |
|
199
|
|
|
* The default implementation of {@see ANTLRErrorStrategy::sync()} makes sure |
|
200
|
|
|
* that the current lookahead symbol is consistent with what were expecting |
|
201
|
|
|
* at this point in the ATN. You can call this anytime but ANTLR only |
|
202
|
|
|
* generates code to check before subrules/loops and each iteration. |
|
203
|
|
|
* |
|
204
|
|
|
* Implements Jim Idle's magic sync mechanism in closures and optional |
|
205
|
|
|
* subrules. E.g., |
|
206
|
|
|
* |
|
207
|
|
|
* a : sync ( stuff sync )* ; |
|
208
|
|
|
* sync : {consume to what can follow sync} ; |
|
209
|
|
|
* |
|
210
|
|
|
* At the start of a sub rule upon error, {@see sync} performs single |
|
211
|
|
|
* token deletion, if possible. If it can't do that, it bails on the current |
|
212
|
|
|
* rule and uses the default error recovery, which consumes until the |
|
213
|
|
|
* resynchronization set of the current rule. |
|
214
|
|
|
* |
|
215
|
|
|
* If the sub rule is optional (`(...)?`, `(...)*`, or block |
|
216
|
|
|
* with an empty alternative), then the expected set includes what follows |
|
217
|
|
|
* the subrule. |
|
218
|
|
|
* |
|
219
|
|
|
* During loop iteration, it consumes until it sees a token that can start a |
|
220
|
|
|
* sub rule or what follows loop. Yes, that is pretty aggressive. We opt to |
|
221
|
|
|
* stay in the loop as long as possible. |
|
222
|
|
|
* |
|
223
|
|
|
* ORIGINS |
|
224
|
|
|
* |
|
225
|
|
|
* Previous versions of ANTLR did a poor job of their recovery within loops. |
|
226
|
|
|
* A single mismatch token or missing token would force the parser to bail |
|
227
|
|
|
* out of the entire rules surrounding the loop. So, for rule |
|
228
|
|
|
* |
|
229
|
|
|
* classDef : 'class' ID '{' member* '}' |
|
230
|
|
|
* |
|
231
|
|
|
* input with an extra token between members would force the parser to |
|
232
|
|
|
* consume until it found the next class definition rather than the next |
|
233
|
|
|
* member definition of the current class. |
|
234
|
|
|
* |
|
235
|
|
|
* This functionality cost a little bit of effort because the parser has to |
|
236
|
|
|
* compare token set at the start of the loop and at each iteration. If for |
|
237
|
|
|
* some reason speed is suffering for you, you can turn off this |
|
238
|
|
|
* functionality by simply overriding this method as a blank { }. |
|
239
|
|
|
* |
|
240
|
|
|
* @throws RecognitionException |
|
241
|
|
|
*/ |
|
242
|
7 |
|
public function sync(Parser $recognizer) : void |
|
243
|
|
|
{ |
|
244
|
7 |
|
$interpreter = $recognizer->getInterpreter(); |
|
245
|
|
|
|
|
246
|
7 |
|
if ($interpreter === null) { |
|
247
|
|
|
throw new \RuntimeException('Unexpected null interpreter.'); |
|
248
|
|
|
} |
|
249
|
|
|
|
|
250
|
|
|
/** @var ATNState $s */ |
|
251
|
7 |
|
$s = $interpreter->atn->states[$recognizer->getState()]; |
|
252
|
|
|
|
|
253
|
|
|
// If already recovering, don't try to sync |
|
254
|
7 |
|
if ($this->inErrorRecoveryMode($recognizer)) { |
|
255
|
|
|
return; |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
7 |
|
$tokens = $recognizer->getInputStream(); |
|
259
|
|
|
|
|
260
|
7 |
|
if ($tokens === null) { |
|
261
|
|
|
throw new \RuntimeException('Unexpected null input stream.'); |
|
262
|
|
|
} |
|
263
|
|
|
|
|
264
|
7 |
|
$la = $tokens->LA(1); |
|
265
|
|
|
|
|
266
|
|
|
// try cheaper subset first; might get lucky. seems to shave a wee bit off |
|
267
|
7 |
|
$nextTokens = $recognizer->getATN()->nextTokens($s); |
|
268
|
|
|
|
|
269
|
7 |
|
if ($nextTokens->contains($la)) { |
|
270
|
|
|
// We are sure the token matches |
|
271
|
4 |
|
$this->nextTokensContext = null; |
|
272
|
4 |
|
$this->nextTokensState = ATNState::INVALID_STATE_NUMBER; |
|
273
|
|
|
|
|
274
|
4 |
|
return; |
|
275
|
|
|
} |
|
276
|
|
|
|
|
277
|
7 |
|
if ($nextTokens->contains(Token::EPSILON)) { |
|
278
|
4 |
|
if ($this->nextTokensContext === null) { |
|
279
|
|
|
// It's possible the next token won't match; information tracked |
|
280
|
|
|
// by sync is restricted for performance. |
|
281
|
4 |
|
$this->nextTokensContext = $recognizer->getContext(); |
|
282
|
4 |
|
$this->nextTokensState = $recognizer->getState(); |
|
283
|
|
|
} |
|
284
|
4 |
|
return; |
|
285
|
|
|
} |
|
286
|
|
|
|
|
287
|
3 |
|
switch ($s->getStateType()) { |
|
288
|
|
|
case ATNState::BLOCK_START: |
|
289
|
|
|
case ATNState::STAR_BLOCK_START: |
|
290
|
|
|
case ATNState::PLUS_BLOCK_START: |
|
291
|
|
|
case ATNState::STAR_LOOP_ENTRY: |
|
292
|
|
|
// report error and recover if possible |
|
293
|
3 |
|
if ($this->singleTokenDeletion($recognizer) !== null) { |
|
294
|
|
|
return; |
|
295
|
|
|
} |
|
296
|
|
|
|
|
297
|
3 |
|
throw new InputMismatchException($recognizer); |
|
298
|
|
|
|
|
299
|
|
|
case ATNState::PLUS_LOOP_BACK: |
|
300
|
|
|
case ATNState::STAR_LOOP_BACK: |
|
301
|
|
|
$this->reportUnwantedToken($recognizer); |
|
302
|
|
|
$expecting = $recognizer->getExpectedTokens(); |
|
303
|
|
|
$whatFollowsLoopIterationOrRule = $expecting->orSet($this->getErrorRecoverySet($recognizer)); |
|
304
|
|
|
$this->consumeUntil($recognizer, $whatFollowsLoopIterationOrRule); |
|
305
|
|
|
break; |
|
306
|
|
|
|
|
307
|
|
|
default: |
|
308
|
|
|
// do nothing if we can't identify the exact kind of ATN state |
|
309
|
|
|
break; |
|
310
|
|
|
} |
|
311
|
|
|
} |
|
312
|
|
|
|
|
313
|
|
|
/** |
|
314
|
|
|
* This is called by {@see DefaultErrorStrategy::reportError()} when |
|
315
|
|
|
* the exception is a {@see NoViableAltException}. |
|
316
|
|
|
* |
|
317
|
|
|
* @param Parser $recognizer The parser instance. |
|
318
|
|
|
* @param NoViableAltException $e The recognition exception. |
|
319
|
|
|
* |
|
320
|
|
|
* @see DefaultErrorStrategy::reportError() |
|
321
|
|
|
*/ |
|
322
|
|
|
protected function reportNoViableAlternative(Parser $recognizer, NoViableAltException $e) : void |
|
323
|
|
|
{ |
|
324
|
|
|
$tokens = $recognizer->getTokenStream(); |
|
325
|
|
|
|
|
326
|
|
|
$input = '<unknown input>'; |
|
327
|
|
|
|
|
328
|
|
|
if ($tokens !== null) { |
|
329
|
|
|
$startToken = $e->getStartToken(); |
|
330
|
|
|
|
|
331
|
|
|
if ($startToken === null) { |
|
332
|
|
|
throw new \RuntimeException('Unexpected null start token.'); |
|
333
|
|
|
} |
|
334
|
|
|
|
|
335
|
|
|
if ($startToken->getType() === Token::EOF) { |
|
336
|
|
|
$input = '<EOF>'; |
|
337
|
|
|
} else { |
|
338
|
|
|
$input = $tokens->getTextByTokens($e->getStartToken(), $e->getOffendingToken()); |
|
339
|
|
|
} |
|
340
|
|
|
} |
|
341
|
|
|
|
|
342
|
|
|
$msg = \sprintf('no viable alternative at input %s', $this->escapeWSAndQuote($input)); |
|
343
|
|
|
|
|
344
|
|
|
$recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); |
|
345
|
|
|
} |
|
346
|
|
|
|
|
347
|
|
|
/** |
|
348
|
|
|
* This is called by {@see DefaultErrorStrategy::reportError()} when |
|
349
|
|
|
* the exception is an {@see InputMismatchException}. |
|
350
|
|
|
* |
|
351
|
|
|
* @param Parser $recognizer The parser instance. |
|
352
|
|
|
* @param InputMismatchException $e The recognition exception. |
|
353
|
|
|
* |
|
354
|
|
|
* @see DefaultErrorStrategy::reportError() |
|
355
|
|
|
*/ |
|
356
|
4 |
|
protected function reportInputMismatch(Parser $recognizer, InputMismatchException $e) : void |
|
357
|
|
|
{ |
|
358
|
4 |
|
$expectedTokens = $e->getExpectedTokens(); |
|
359
|
|
|
|
|
360
|
4 |
|
if ($expectedTokens === null) { |
|
361
|
|
|
throw new \RuntimeException('Unexpected null expected tokens.'); |
|
362
|
|
|
} |
|
363
|
|
|
|
|
364
|
4 |
|
$msg = \sprintf( |
|
365
|
4 |
|
'mismatched input %s expecting %s', |
|
366
|
4 |
|
$this->getTokenErrorDisplay($e->getOffendingToken()), |
|
367
|
4 |
|
$expectedTokens->toStringVocabulary($recognizer->getVocabulary()) |
|
368
|
|
|
); |
|
369
|
|
|
|
|
370
|
4 |
|
$recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); |
|
371
|
4 |
|
} |
|
372
|
|
|
|
|
373
|
|
|
/** |
|
374
|
|
|
* This is called by {@see DefaultErrorStrategy::reportError()} when |
|
375
|
|
|
* the exception is a {@see FailedPredicateException}. |
|
376
|
|
|
* |
|
377
|
|
|
* @param Parser $recognizer The parser instance. |
|
378
|
|
|
* @param FailedPredicateException $e The recognition exception. |
|
379
|
|
|
* |
|
380
|
|
|
* @see DefaultErrorStrategy::reportError() |
|
381
|
|
|
*/ |
|
382
|
|
|
protected function reportFailedPredicate(Parser $recognizer, FailedPredicateException $e) : void |
|
383
|
|
|
{ |
|
384
|
|
|
$msg = \sprintf('rule %s %s', $recognizer->getCurrentRuleName(), $e->getMessage()); |
|
385
|
|
|
|
|
386
|
|
|
$recognizer->notifyErrorListeners($msg, $e->getOffendingToken(), $e); |
|
387
|
|
|
} |
|
388
|
|
|
|
|
389
|
|
|
/** |
|
390
|
|
|
* This method is called to report a syntax error which requires the removal |
|
391
|
|
|
* of a token from the input stream. At the time this method is called, the |
|
392
|
|
|
* erroneous symbol is current `LT(1)` symbol and has not yet been |
|
393
|
|
|
* removed from the input stream. When this method returns, |
|
394
|
|
|
* `$recognizer` is in error recovery mode. |
|
395
|
|
|
* |
|
396
|
|
|
* This method is called when {@see DefaultErrorStrategy::singleTokenDeletion()} |
|
397
|
|
|
* identifies single-token deletion as a viable recovery strategy for |
|
398
|
|
|
* a mismatched input error. |
|
399
|
|
|
* |
|
400
|
|
|
* The default implementation simply returns if the handler is already in |
|
401
|
|
|
* error recovery mode. Otherwise, it calls |
|
402
|
|
|
* {@see DefaultErrorStrategy::beginErrorCondition()} to enter error |
|
403
|
|
|
* recovery mode, followed by calling {@see Parser::notifyErrorListeners}. |
|
404
|
|
|
* |
|
405
|
|
|
* @param Parser $recognizer The parser instance. |
|
406
|
|
|
*/ |
|
407
|
|
|
protected function reportUnwantedToken(Parser $recognizer) : void |
|
408
|
|
|
{ |
|
409
|
|
|
if ($this->inErrorRecoveryMode($recognizer)) { |
|
410
|
|
|
return; |
|
411
|
|
|
} |
|
412
|
|
|
|
|
413
|
|
|
$this->beginErrorCondition($recognizer); |
|
414
|
|
|
|
|
415
|
|
|
$t = $recognizer->getCurrentToken(); |
|
416
|
|
|
$tokenName = $this->getTokenErrorDisplay($t); |
|
417
|
|
|
$expecting = $this->getExpectedTokens($recognizer); |
|
418
|
|
|
|
|
419
|
|
|
$msg = \sprintf( |
|
420
|
|
|
'extraneous input %s expecting %s', |
|
421
|
|
|
$tokenName, |
|
422
|
|
|
$expecting->toStringVocabulary($recognizer->getVocabulary()) |
|
423
|
|
|
); |
|
424
|
|
|
|
|
425
|
|
|
$recognizer->notifyErrorListeners($msg, $t); |
|
426
|
|
|
} |
|
427
|
|
|
|
|
428
|
|
|
/** |
|
429
|
|
|
* This method is called to report a syntax error which requires the |
|
430
|
|
|
* insertion of a missing token into the input stream. At the time this |
|
431
|
|
|
* method is called, the missing token has not yet been inserted. When this |
|
432
|
|
|
* method returns, `$recognizer` is in error recovery mode. |
|
433
|
|
|
* |
|
434
|
|
|
* This method is called when {@see DefaultErrorStrategy::singleTokenInsertion()} |
|
435
|
|
|
* identifies single-token insertion as a viable recovery strategy for |
|
436
|
|
|
* a mismatched input error. |
|
437
|
|
|
* |
|
438
|
|
|
* The default implementation simply returns if the handler is already in |
|
439
|
|
|
* error recovery mode. Otherwise, it calls |
|
440
|
|
|
* {@see DefaultErrorStrategy::beginErrorCondition()} to enter error |
|
441
|
|
|
* recovery mode, followed by calling {@see Parser::notifyErrorListeners()}. |
|
442
|
|
|
* |
|
443
|
|
|
* @param Parser $recognizer the parser instance |
|
444
|
|
|
*/ |
|
445
|
|
|
protected function reportMissingToken(Parser $recognizer) : void |
|
446
|
|
|
{ |
|
447
|
|
|
if ($this->inErrorRecoveryMode($recognizer)) { |
|
448
|
|
|
return; |
|
449
|
|
|
} |
|
450
|
|
|
|
|
451
|
|
|
$this->beginErrorCondition($recognizer); |
|
452
|
|
|
|
|
453
|
|
|
$t = $recognizer->getCurrentToken(); |
|
454
|
|
|
$expecting = $this->getExpectedTokens($recognizer); |
|
455
|
|
|
|
|
456
|
|
|
$msg = \sprintf( |
|
457
|
|
|
'missing %s at %s', |
|
458
|
|
|
$expecting->toStringVocabulary($recognizer->getVocabulary()), |
|
459
|
|
|
$this->getTokenErrorDisplay($t) |
|
460
|
|
|
); |
|
461
|
|
|
|
|
462
|
|
|
$recognizer->notifyErrorListeners($msg, $t); |
|
463
|
|
|
} |
|
464
|
|
|
|
|
465
|
|
|
/** |
|
466
|
|
|
* {@inheritdoc} |
|
467
|
|
|
* |
|
468
|
|
|
* The default implementation attempts to recover from the mismatched input |
|
469
|
|
|
* by using single token insertion and deletion as described below. If the |
|
470
|
|
|
* recovery attempt fails, this method throws an |
|
471
|
|
|
* {@see InputMismatchException}. |
|
472
|
|
|
* |
|
473
|
|
|
* EXTRA TOKEN (single token deletion) |
|
474
|
|
|
* |
|
475
|
|
|
* `LA(1)` is not what we are looking for. If `LA(2)` has the |
|
476
|
|
|
* right token, however, then assume `LA(1)` is some extra spurious |
|
477
|
|
|
* token and delete it. Then consume and return the next token (which was |
|
478
|
|
|
* the `LA(2)` token) as the successful result of the match operation. |
|
479
|
|
|
* |
|
480
|
|
|
* This recovery strategy is implemented by |
|
481
|
|
|
* {@see DefaultErrorStrategy::singleTokenDeletion()}. |
|
482
|
|
|
* |
|
483
|
|
|
* MISSING TOKEN (single token insertion) |
|
484
|
|
|
* |
|
485
|
|
|
* If current token (at `LA(1)`) is consistent with what could come |
|
486
|
|
|
* after the expected `LA(1)` token, then assume the token is missing |
|
487
|
|
|
* and use the parser's {@see TokenFactory} to create it on the fly. The |
|
488
|
|
|
* "insertion" is performed by returning the created token as the successful |
|
489
|
|
|
* result of the match operation. |
|
490
|
|
|
* |
|
491
|
|
|
* This recovery strategy is implemented by |
|
492
|
|
|
* {@see DefaultErrorStrategy::singleTokenInsertion()}. |
|
493
|
|
|
* |
|
494
|
|
|
* EXAMPLE |
|
495
|
|
|
* |
|
496
|
|
|
* For example, Input `i=(3;` is clearly missing the `')'`. When |
|
497
|
|
|
* the parser returns from the nested call to `expr`, it will have |
|
498
|
|
|
* call chain: |
|
499
|
|
|
* |
|
500
|
|
|
* stat → expr → atom |
|
501
|
|
|
* |
|
502
|
|
|
* and it will be trying to match the `')'` at this point in the |
|
503
|
|
|
* derivation: |
|
504
|
|
|
* |
|
505
|
|
|
* => ID '=' '(' INT ')' ('+' atom)* ';' |
|
506
|
|
|
* ^ |
|
507
|
|
|
* |
|
508
|
|
|
* The attempt to match `')'` will fail when it sees `';'` and call |
|
509
|
|
|
* {@see DefaultErrorStrategy::recoverInline()}. To recover, it sees that |
|
510
|
|
|
* `LA(1)==';'` is in the set of tokens that can follow the `')'` token |
|
511
|
|
|
* reference in rule `atom`. It can assume that you forgot the `')'`. |
|
512
|
|
|
* |
|
513
|
|
|
* @throws RecognitionException |
|
514
|
|
|
*/ |
|
515
|
1 |
|
public function recoverInline(Parser $recognizer) : Token |
|
516
|
|
|
{ |
|
517
|
|
|
// SINGLE TOKEN DELETION |
|
518
|
1 |
|
$matchedSymbol = $this->singleTokenDeletion($recognizer); |
|
519
|
|
|
|
|
520
|
1 |
|
if ($matchedSymbol !== null) { |
|
521
|
|
|
// we have deleted the extra token. |
|
522
|
|
|
// now, move past ttype token as if all were ok |
|
523
|
|
|
$recognizer->consume(); |
|
524
|
|
|
|
|
525
|
|
|
return $matchedSymbol; |
|
526
|
|
|
} |
|
527
|
|
|
|
|
528
|
|
|
// SINGLE TOKEN INSERTION |
|
529
|
1 |
|
if ($this->singleTokenInsertion($recognizer)) { |
|
530
|
|
|
return $this->getMissingSymbol($recognizer); |
|
531
|
|
|
} |
|
532
|
|
|
|
|
533
|
|
|
// even that didn't work; must throw the exception |
|
534
|
1 |
|
if ($this->nextTokensContext === null) { |
|
535
|
|
|
throw new InputMismatchException($recognizer); |
|
536
|
|
|
} |
|
537
|
|
|
|
|
538
|
1 |
|
throw new InputMismatchException($recognizer, $this->nextTokensState, $this->nextTokensContext); |
|
539
|
|
|
} |
|
540
|
|
|
|
|
541
|
|
|
/** |
|
542
|
|
|
* This method implements the single-token insertion inline error recovery |
|
543
|
|
|
* strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} |
|
544
|
|
|
* if the single-token deletion strategy fails to recover from the mismatched |
|
545
|
|
|
* input. If this method returns `true`, `$recognizer` will be in error |
|
546
|
|
|
* recovery mode. |
|
547
|
|
|
* |
|
548
|
|
|
* This method determines whether or not single-token insertion is viable by |
|
549
|
|
|
* checking if the `LA(1)` input symbol could be successfully matched |
|
550
|
|
|
* if it were instead the `LA(2)` symbol. If this method returns |
|
551
|
|
|
* `true`, the caller is responsible for creating and inserting a |
|
552
|
|
|
* token with the correct type to produce this behavior. |
|
553
|
|
|
* |
|
554
|
|
|
* @param Parser $recognizer The parser instance. |
|
555
|
|
|
* |
|
556
|
|
|
* @return bool `true` If single-token insertion is a viable recovery |
|
557
|
|
|
* strategy for the current mismatched input, otherwise `false`. |
|
558
|
|
|
*/ |
|
559
|
1 |
|
protected function singleTokenInsertion(Parser $recognizer) : bool |
|
560
|
|
|
{ |
|
561
|
1 |
|
$stream = $recognizer->getInputStream(); |
|
562
|
|
|
|
|
563
|
1 |
|
if ($stream === null) { |
|
564
|
|
|
throw new \RuntimeException('Unexpected null input stream.'); |
|
565
|
|
|
} |
|
566
|
|
|
|
|
567
|
1 |
|
$interpreter = $recognizer->getInterpreter(); |
|
568
|
|
|
|
|
569
|
1 |
|
if ($interpreter === null) { |
|
570
|
|
|
throw new \RuntimeException('Unexpected null interpreter.'); |
|
571
|
|
|
} |
|
572
|
|
|
|
|
573
|
1 |
|
$currentSymbolType = $stream->LA(1); |
|
574
|
|
|
|
|
575
|
|
|
// if current token is consistent with what could come after current |
|
576
|
|
|
// ATN state, then we know we're missing a token; error recovery |
|
577
|
|
|
// is free to conjure up and insert the missing token |
|
578
|
|
|
|
|
579
|
1 |
|
$atn = $interpreter->atn; |
|
580
|
|
|
/** @var ATNState $currentState */ |
|
581
|
1 |
|
$currentState = $atn->states[$recognizer->getState()]; |
|
582
|
1 |
|
$next = $currentState->getTransition(0)->target; |
|
583
|
1 |
|
$expectingAtLL2 = $atn->nextTokensInContext($next, $recognizer->getContext()); |
|
584
|
|
|
|
|
585
|
1 |
|
if ($expectingAtLL2->contains($currentSymbolType)) { |
|
586
|
|
|
$this->reportMissingToken($recognizer); |
|
587
|
|
|
|
|
588
|
|
|
return true; |
|
589
|
|
|
} |
|
590
|
|
|
|
|
591
|
1 |
|
return false; |
|
592
|
|
|
} |
|
593
|
|
|
|
|
594
|
|
|
/** |
|
595
|
|
|
* This method implements the single-token deletion inline error recovery |
|
596
|
|
|
* strategy. It is called by {@see DefaultErrorStrategy::recoverInline()} |
|
597
|
|
|
* to attempt to recover from mismatched input. If this method returns null, |
|
598
|
|
|
* the parser and error handler state will not have changed. If this method |
|
599
|
|
|
* returns non-null, `$recognizer` will _not_ be in error recovery mode |
|
600
|
|
|
* since the returned token was a successful match. |
|
601
|
|
|
* |
|
602
|
|
|
* If the single-token deletion is successful, this method calls |
|
603
|
|
|
* {@see DefaultErrorStrategy::reportUnwantedToken()} to report the error, |
|
604
|
|
|
* followed by {@see Parser::consume()} to actually "delete" the extraneous |
|
605
|
|
|
* token. Then, before returning {@see DefaultErrorStrategy::reportMatch()} |
|
606
|
|
|
* is called to signal a successful match. |
|
607
|
|
|
* |
|
608
|
|
|
* @param Parser $recognizer The parser instance. |
|
609
|
|
|
* |
|
610
|
|
|
* @return Token The successfully matched {@see Token} instance if |
|
611
|
|
|
* single-token deletion successfully recovers from |
|
612
|
|
|
* the mismatched input, otherwise `null`. |
|
613
|
|
|
*/ |
|
614
|
4 |
|
protected function singleTokenDeletion(Parser $recognizer) : ?Token |
|
615
|
|
|
{ |
|
616
|
4 |
|
$inputStream = $recognizer->getInputStream(); |
|
617
|
|
|
|
|
618
|
4 |
|
if ($inputStream === null) { |
|
619
|
|
|
throw new \RuntimeException('Unexpected null input stream.'); |
|
620
|
|
|
} |
|
621
|
|
|
|
|
622
|
4 |
|
$nextTokenType = $inputStream->LA(2); |
|
623
|
4 |
|
$expecting = $this->getExpectedTokens($recognizer); |
|
624
|
|
|
|
|
625
|
4 |
|
if ($expecting->contains($nextTokenType)) { |
|
626
|
|
|
$this->reportUnwantedToken($recognizer); |
|
627
|
|
|
$recognizer->consume(); // simply delete extra token |
|
628
|
|
|
// we want to return the token we're actually matching |
|
629
|
|
|
$matchedSymbol = $recognizer->getCurrentToken(); |
|
630
|
|
|
$this->reportMatch($recognizer); // we know current token is correct |
|
631
|
|
|
|
|
632
|
|
|
return $matchedSymbol; |
|
633
|
|
|
} |
|
634
|
|
|
|
|
635
|
4 |
|
return null; |
|
636
|
|
|
} |
|
637
|
|
|
|
|
638
|
|
|
/** Conjure up a missing token during error recovery. |
|
639
|
|
|
* |
|
640
|
|
|
* The recognizer attempts to recover from single missing |
|
641
|
|
|
* symbols. But, actions might refer to that missing symbol. |
|
642
|
|
|
* For example, x=ID {f($x);}. The action clearly assumes |
|
643
|
|
|
* that there has been an identifier matched previously and that |
|
644
|
|
|
* $x points at that token. If that token is missing, but |
|
645
|
|
|
* the next token in the stream is what we want we assume that |
|
646
|
|
|
* this token is missing and we keep going. Because we |
|
647
|
|
|
* have to return some token to replace the missing token, |
|
648
|
|
|
* we have to conjure one up. This method gives the user control |
|
649
|
|
|
* over the tokens returned for missing tokens. Mostly, |
|
650
|
|
|
* you will want to create something special for identifier |
|
651
|
|
|
* tokens. For literals such as '{' and ',', the default |
|
652
|
|
|
* action in the parser or tree parser works. It simply creates |
|
653
|
|
|
* a CommonToken of the appropriate type. The text will be the token. |
|
654
|
|
|
* If you change what tokens must be created by the lexer, |
|
655
|
|
|
* override this method to create the appropriate tokens. |
|
656
|
|
|
*/ |
|
657
|
|
|
protected function getMissingSymbol(Parser $recognizer) : Token |
|
658
|
|
|
{ |
|
659
|
|
|
$currentSymbol = $recognizer->getCurrentToken(); |
|
660
|
|
|
|
|
661
|
|
|
if ($currentSymbol === null) { |
|
662
|
|
|
throw new \RuntimeException('Unexpected null current token.'); |
|
663
|
|
|
} |
|
664
|
|
|
|
|
665
|
|
|
$inputStream = $recognizer->getInputStream(); |
|
666
|
|
|
|
|
667
|
|
|
if ($inputStream === null) { |
|
668
|
|
|
throw new \RuntimeException('Unexpected null input stream.'); |
|
669
|
|
|
} |
|
670
|
|
|
|
|
671
|
|
|
$tokenSource = $currentSymbol->getTokenSource(); |
|
672
|
|
|
|
|
673
|
|
|
if ($tokenSource === null) { |
|
674
|
|
|
throw new \RuntimeException('Unexpected null token source.'); |
|
675
|
|
|
} |
|
676
|
|
|
|
|
677
|
|
|
$expecting = $this->getExpectedTokens($recognizer); |
|
678
|
|
|
|
|
679
|
|
|
$expectedTokenType = Token::INVALID_TYPE; |
|
680
|
|
|
|
|
681
|
|
|
if (!$expecting->isNull()) { |
|
682
|
|
|
$expectedTokenType = $expecting->getMinElement(); // get any element |
|
683
|
|
|
} |
|
684
|
|
|
|
|
685
|
|
|
if ($expectedTokenType === Token::EOF) { |
|
686
|
|
|
$tokenText = '<missing EOF>'; |
|
687
|
|
|
} else { |
|
688
|
|
|
$tokenText = \sprintf('<missing %s>', $recognizer->getVocabulary()->getDisplayName($expectedTokenType)); |
|
689
|
|
|
} |
|
690
|
|
|
|
|
691
|
|
|
$current = $currentSymbol; |
|
692
|
|
|
$lookback = $inputStream->LT(-1); |
|
693
|
|
|
|
|
694
|
|
|
if ($current->getType() === Token::EOF && $lookback !== null) { |
|
695
|
|
|
$current = $lookback; |
|
696
|
|
|
} |
|
697
|
|
|
|
|
698
|
|
|
return $recognizer->getTokenFactory()->createEx( |
|
699
|
|
|
new Pair( |
|
700
|
|
|
$tokenSource, |
|
701
|
|
|
$tokenSource->getInputStream() |
|
702
|
|
|
), |
|
703
|
|
|
$expectedTokenType, |
|
704
|
|
|
$tokenText, |
|
705
|
|
|
Token::DEFAULT_CHANNEL, |
|
706
|
|
|
-1, |
|
707
|
|
|
-1, |
|
708
|
|
|
$current->getLine(), |
|
709
|
|
|
$current->getCharPositionInLine() |
|
710
|
|
|
); |
|
711
|
|
|
} |
|
712
|
|
|
|
|
713
|
4 |
|
protected function getExpectedTokens(Parser $recognizer) : IntervalSet |
|
714
|
|
|
{ |
|
715
|
4 |
|
return $recognizer->getExpectedTokens(); |
|
716
|
|
|
} |
|
717
|
|
|
|
|
718
|
|
|
/** |
|
719
|
|
|
* How should a token be displayed in an error message? The default |
|
720
|
|
|
* is to display just the text, but during development you might |
|
721
|
|
|
* want to have a lot of information spit out. Override in that case |
|
722
|
|
|
* to use (string) (which, for CommonToken, dumps everything about |
|
723
|
|
|
* the token). This is better than forcing you to override a method in |
|
724
|
|
|
* your token objects because you don't have to go modify your lexer |
|
725
|
|
|
* so that it creates a new Java type. |
|
726
|
|
|
*/ |
|
727
|
4 |
|
protected function getTokenErrorDisplay(?Token $t) : string |
|
728
|
|
|
{ |
|
729
|
4 |
|
if ($t === null) { |
|
730
|
|
|
return '<no token>'; |
|
731
|
|
|
} |
|
732
|
|
|
|
|
733
|
4 |
|
$s = $this->getSymbolText($t); |
|
734
|
|
|
|
|
735
|
4 |
|
if ($s === null) { |
|
736
|
|
|
if ($this->getSymbolType($t) === Token::EOF) { |
|
737
|
|
|
$s = '<EOF>'; |
|
738
|
|
|
} else { |
|
739
|
|
|
$s = '<' . $this->getSymbolType($t) . '>'; |
|
740
|
|
|
} |
|
741
|
|
|
} |
|
742
|
|
|
|
|
743
|
4 |
|
return $this->escapeWSAndQuote($s); |
|
744
|
|
|
} |
|
745
|
|
|
|
|
746
|
4 |
|
protected function getSymbolText(Token $symbol) : ?string |
|
747
|
|
|
{ |
|
748
|
4 |
|
return $symbol->getText(); |
|
749
|
|
|
} |
|
750
|
|
|
|
|
751
|
|
|
protected function getSymbolType(Token $symbol) : int |
|
752
|
|
|
{ |
|
753
|
|
|
return $symbol->getType(); |
|
754
|
|
|
} |
|
755
|
|
|
|
|
756
|
4 |
|
protected function escapeWSAndQuote(string $s) : string |
|
757
|
|
|
{ |
|
758
|
4 |
|
return "'" . StringUtils::escapeWhitespace($s) . "'"; |
|
759
|
|
|
} |
|
760
|
|
|
|
|
761
|
|
|
/** |
|
762
|
|
|
* Compute the error recovery set for the current rule. During |
|
763
|
|
|
* rule invocation, the parser pushes the set of tokens that can |
|
764
|
|
|
* follow that rule reference on the stack; this amounts to |
|
765
|
|
|
* computing FIRST of what follows the rule reference in the |
|
766
|
|
|
* enclosing rule. See LinearApproximator::FIRST. |
|
767
|
|
|
* This local follow set only includes tokens |
|
768
|
|
|
* from within the rule; i.e., the FIRST computation done by |
|
769
|
|
|
* ANTLR stops at the end of a rule. |
|
770
|
|
|
* |
|
771
|
|
|
* EXAMPLE |
|
772
|
|
|
* |
|
773
|
|
|
* When you find a "no viable alt exception", the input is not |
|
774
|
|
|
* consistent with any of the alternatives for rule r. The best |
|
775
|
|
|
* thing to do is to consume tokens until you see something that |
|
776
|
|
|
* can legally follow a call to r *or* any rule that called r. |
|
777
|
|
|
* You don't want the exact set of viable next tokens because the |
|
778
|
|
|
* input might just be missing a token--you might consume the |
|
779
|
|
|
* rest of the input looking for one of the missing tokens. |
|
780
|
|
|
* |
|
781
|
|
|
* Consider grammar: |
|
782
|
|
|
* |
|
783
|
|
|
* a : '[' b ']' |
|
784
|
|
|
* | '(' b ')' |
|
785
|
|
|
* ; |
|
786
|
|
|
* b : c '^' INT ; |
|
787
|
|
|
* c : ID |
|
788
|
|
|
* | INT |
|
789
|
|
|
* ; |
|
790
|
|
|
* |
|
791
|
|
|
* At each rule invocation, the set of tokens that could follow |
|
792
|
|
|
* that rule is pushed on a stack. Here are the various |
|
793
|
|
|
* context-sensitive follow sets: |
|
794
|
|
|
* |
|
795
|
|
|
* FOLLOW(b1_in_a) = FIRST(']') = ']' |
|
796
|
|
|
* FOLLOW(b2_in_a) = FIRST(')') = ')' |
|
797
|
|
|
* FOLLOW(c_in_b) = FIRST('^') = '^' |
|
798
|
|
|
* |
|
799
|
|
|
* Upon erroneous input "[]", the call chain is |
|
800
|
|
|
* |
|
801
|
|
|
* a -> b -> c |
|
802
|
|
|
* |
|
803
|
|
|
* and, hence, the follow context stack is: |
|
804
|
|
|
* |
|
805
|
|
|
* depth | follow set | start of rule execution |
|
806
|
|
|
* ------|------------|------------------------- |
|
807
|
|
|
* 0 | <EOF> | a (from main()) |
|
808
|
|
|
* 1 | ']' | b |
|
809
|
|
|
* 2 | '^' | c |
|
810
|
|
|
* |
|
811
|
|
|
* Notice that ')' is not included, because b would have to have |
|
812
|
|
|
* been called from a different context in rule a for ')' to be |
|
813
|
|
|
* included. |
|
814
|
|
|
* |
|
815
|
|
|
* For error recovery, we cannot consider FOLLOW(c) |
|
816
|
|
|
* (context-sensitive or otherwise). We need the combined set of |
|
817
|
|
|
* all context-sensitive FOLLOW sets--the set of all tokens that |
|
818
|
|
|
* could follow any reference in the call chain. We need to |
|
819
|
|
|
* resync to one of those tokens. Note that FOLLOW(c)='^' and if |
|
820
|
|
|
* we resync'd to that token, we'd consume until EOF. We need to |
|
821
|
|
|
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. |
|
822
|
|
|
* In this case, for input "[]", LA(1) is ']' and in the set, so we would |
|
823
|
|
|
* not consume anything. After printing an error, rule c would |
|
824
|
|
|
* return normally. Rule b would not find the required '^' though. |
|
825
|
|
|
* At this point, it gets a mismatched token error and throws an |
|
826
|
|
|
* exception (since LA(1) is not in the viable following token |
|
827
|
|
|
* set). The rule exception handler tries to recover, but finds |
|
828
|
|
|
* the same recovery set and doesn't consume anything. Rule b |
|
829
|
|
|
* exits normally returning to rule a. Now it finds the ']' (and |
|
830
|
|
|
* with the successful match exits errorRecovery mode). |
|
831
|
|
|
* |
|
832
|
|
|
* So, you can see that the parser walks up the call chain looking |
|
833
|
|
|
* for the token that was a member of the recovery set. |
|
834
|
|
|
* |
|
835
|
|
|
* Errors are not generated in errorRecovery mode. |
|
836
|
|
|
* |
|
837
|
|
|
* ANTLR's error recovery mechanism is based upon original ideas: |
|
838
|
|
|
* |
|
839
|
|
|
* "Algorithms + Data Structures = Programs" by Niklaus Wirth |
|
840
|
|
|
* |
|
841
|
|
|
* and |
|
842
|
|
|
* |
|
843
|
|
|
* "A note on error recovery in recursive descent parsers": |
|
844
|
|
|
* http://portal.acm.org/citation.cfm?id=947902.947905 |
|
845
|
|
|
* |
|
846
|
|
|
* Later, Josef Grosch had some good ideas: |
|
847
|
|
|
* |
|
848
|
|
|
* "Efficient and Comfortable Error Recovery in Recursive Descent |
|
849
|
|
|
* Parsers": |
|
850
|
|
|
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip |
|
851
|
|
|
* |
|
852
|
|
|
* Like Grosch I implement context-sensitive FOLLOW sets that are combined |
|
853
|
|
|
* at run-time upon error to avoid overhead during parsing. |
|
854
|
|
|
*/ |
|
855
|
4 |
|
protected function getErrorRecoverySet(Parser $recognizer) : IntervalSet |
|
856
|
|
|
{ |
|
857
|
4 |
|
$interpreter = $recognizer->getInterpreter(); |
|
858
|
|
|
|
|
859
|
4 |
|
if ($interpreter === null) { |
|
860
|
|
|
throw new \RuntimeException('Unexpected null interpreter.'); |
|
861
|
|
|
} |
|
862
|
|
|
|
|
863
|
4 |
|
$atn = $interpreter->atn; |
|
864
|
4 |
|
$ctx = $recognizer->getContext(); |
|
865
|
4 |
|
$recoverSet = new IntervalSet(); |
|
866
|
|
|
|
|
867
|
4 |
|
while ($ctx !== null && $ctx->invokingState >= 0) { |
|
868
|
|
|
// compute what follows who invoked us |
|
869
|
|
|
/** @var ATNState $invokingState */ |
|
870
|
|
|
$invokingState = $atn->states[$ctx->invokingState]; |
|
871
|
|
|
/** @var RuleTransition $rt */ |
|
872
|
|
|
$rt = $invokingState->getTransition(0); |
|
873
|
|
|
$follow = $atn->nextTokens($rt->followState); |
|
874
|
|
|
$recoverSet->addSet($follow); |
|
875
|
|
|
$ctx = $ctx->getParent(); |
|
876
|
|
|
} |
|
877
|
|
|
|
|
878
|
4 |
|
$recoverSet->removeOne(Token::EPSILON); |
|
879
|
|
|
|
|
880
|
4 |
|
return $recoverSet; |
|
881
|
|
|
} |
|
882
|
|
|
|
|
883
|
|
|
/** |
|
884
|
|
|
* Consume tokens until one matches the given token set. |
|
885
|
|
|
*/ |
|
886
|
4 |
|
protected function consumeUntil(Parser $recognizer, IntervalSet $set) : void |
|
887
|
|
|
{ |
|
888
|
4 |
|
$inputStream = $recognizer->getInputStream(); |
|
889
|
|
|
|
|
890
|
4 |
|
if ($inputStream === null) { |
|
891
|
|
|
throw new \RuntimeException('Unexpected null input stream.'); |
|
892
|
|
|
} |
|
893
|
|
|
|
|
894
|
4 |
|
$ttype = $inputStream->LA(1); |
|
895
|
|
|
|
|
896
|
4 |
|
while ($ttype !== Token::EOF && !$set->contains($ttype)) { |
|
897
|
1 |
|
$recognizer->consume(); |
|
898
|
1 |
|
$ttype = $inputStream->LA(1); |
|
899
|
|
|
} |
|
900
|
4 |
|
} |
|
901
|
|
|
} |
|
902
|
|
|
|
This check looks for parameters that have been defined for a function or method, but which are not used in the method body.