1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Antlr\Antlr4\Runtime\Atn; |
6
|
|
|
|
7
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\ATNState; |
8
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\RuleStopState; |
9
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\ActionTransition; |
10
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\PredicateTransition; |
11
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; |
12
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\Transition; |
13
|
|
|
use Antlr\Antlr4\Runtime\CharStream; |
14
|
|
|
use Antlr\Antlr4\Runtime\Dfa\DFA; |
15
|
|
|
use Antlr\Antlr4\Runtime\Dfa\DFAState; |
16
|
|
|
use Antlr\Antlr4\Runtime\Error\Exceptions\LexerNoViableAltException; |
17
|
|
|
use Antlr\Antlr4\Runtime\IntStream; |
18
|
|
|
use Antlr\Antlr4\Runtime\Lexer; |
19
|
|
|
use Antlr\Antlr4\Runtime\PredictionContexts\PredictionContext; |
20
|
|
|
use Antlr\Antlr4\Runtime\PredictionContexts\PredictionContextCache; |
21
|
|
|
use Antlr\Antlr4\Runtime\PredictionContexts\SingletonPredictionContext; |
22
|
|
|
use Antlr\Antlr4\Runtime\Token; |
23
|
|
|
use Antlr\Antlr4\Runtime\Utils\StringUtils; |
24
|
|
|
|
25
|
|
|
class LexerATNSimulator extends ATNSimulator |
26
|
|
|
{ |
27
|
|
|
public const MIN_DFA_EDGE = 0; |
28
|
|
|
public const MAX_DFA_EDGE = 127; // forces unicode to stay in ATN |
29
|
|
|
private const NEW_LINE_CODE = 10; |
30
|
|
|
|
31
|
|
|
/** @var array<string> */ |
32
|
|
|
public $log = []; |
33
|
|
|
|
34
|
|
|
/** @var bool */ |
35
|
|
|
public $debug = false; |
36
|
|
|
|
37
|
|
|
/** @var Lexer|null */ |
38
|
|
|
protected $recog; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* The current token's starting index into the character stream. |
42
|
|
|
* Shared across DFA to ATN simulation in case the ATN fails and the |
43
|
|
|
* DFA did not have a previous accept state. In this case, we use the |
44
|
|
|
* ATN-generated exception object. |
45
|
|
|
* |
46
|
|
|
* @var int |
47
|
|
|
*/ |
48
|
|
|
protected $startIndex = -1; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* The line number 1..n within the input. |
52
|
|
|
* |
53
|
|
|
* @var int |
54
|
|
|
*/ |
55
|
|
|
protected $line = 1; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* The index of the character relative to the beginning of the line 0..n-1. |
59
|
|
|
* |
60
|
|
|
* @var int |
61
|
|
|
*/ |
62
|
|
|
protected $charPositionInLine = 0; |
63
|
|
|
|
64
|
|
|
/** @var array<DFA> */ |
65
|
|
|
public $decisionToDFA = []; |
66
|
|
|
|
67
|
|
|
/** @var int */ |
68
|
|
|
protected $mode = Lexer::DEFAULT_MODE; |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Used during DFA/ATN exec to record the most recent accept configuration info. |
72
|
|
|
* |
73
|
|
|
* @var SimState |
74
|
|
|
*/ |
75
|
|
|
protected $prevAccept; |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* @param array<DFA> $decisionToDFA |
79
|
|
|
*/ |
80
|
7 |
|
public function __construct( |
81
|
|
|
?Lexer $recog, |
82
|
|
|
ATN $atn, |
83
|
|
|
array $decisionToDFA, |
84
|
|
|
PredictionContextCache $sharedContextCache |
85
|
|
|
) { |
86
|
7 |
|
parent::__construct($atn, $sharedContextCache); |
87
|
|
|
|
88
|
7 |
|
$this->decisionToDFA = $decisionToDFA; |
89
|
7 |
|
$this->recog = $recog; |
90
|
7 |
|
$this->prevAccept = new SimState(); |
91
|
7 |
|
} |
92
|
|
|
|
93
|
|
|
public function copyState(LexerATNSimulator $simulator) : void |
94
|
|
|
{ |
95
|
|
|
$this->charPositionInLine = $simulator->charPositionInLine; |
96
|
|
|
$this->line = $simulator->line; |
97
|
|
|
$this->mode = $simulator->mode; |
98
|
|
|
$this->startIndex = $simulator->startIndex; |
99
|
|
|
} |
100
|
|
|
|
101
|
7 |
|
public function getLine() : int |
102
|
|
|
{ |
103
|
7 |
|
return $this->line; |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
public function setLine(int $line) : void |
107
|
|
|
{ |
108
|
|
|
$this->line = $line; |
109
|
|
|
} |
110
|
|
|
|
111
|
7 |
|
public function getCharPositionInLine() : int |
112
|
|
|
{ |
113
|
7 |
|
return $this->charPositionInLine; |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
public function setCharPositionInLine(int $charPositionInLine) : void |
117
|
|
|
{ |
118
|
|
|
$this->charPositionInLine = $charPositionInLine; |
119
|
|
|
} |
120
|
|
|
|
121
|
7 |
|
public function match(CharStream $input, int $mode) : int |
122
|
|
|
{ |
123
|
7 |
|
static $match_calls; |
124
|
|
|
|
125
|
7 |
|
if ($match_calls === null) { |
126
|
1 |
|
$match_calls = 0; |
127
|
|
|
} |
128
|
|
|
|
129
|
7 |
|
$match_calls++; |
130
|
|
|
|
131
|
7 |
|
$this->mode = $mode; |
132
|
7 |
|
$mark = $input->mark(); |
133
|
|
|
|
134
|
|
|
try { |
135
|
7 |
|
$this->startIndex = $input->getIndex(); |
136
|
7 |
|
$this->prevAccept->reset(); |
137
|
|
|
|
138
|
7 |
|
$dfa = $this->decisionToDFA[$mode]; |
139
|
|
|
|
140
|
7 |
|
if ($dfa->s0 === null) { |
141
|
1 |
|
return $this->matchATN($input); |
142
|
|
|
} |
143
|
7 |
|
return $this->execATN($input, $dfa->s0); |
144
|
|
|
} finally { |
145
|
7 |
|
$input->release($mark); |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
public function reset() : void |
150
|
|
|
{ |
151
|
|
|
$this->prevAccept->reset(); |
152
|
|
|
$this->startIndex = -1; |
153
|
|
|
$this->line = 1; |
154
|
|
|
$this->charPositionInLine = 0; |
155
|
|
|
$this->mode = Lexer::DEFAULT_MODE; |
156
|
|
|
} |
157
|
|
|
|
158
|
1 |
|
protected function matchATN(CharStream $input) : int |
159
|
|
|
{ |
160
|
1 |
|
$startState = $this->atn->modeToStartState[$this->mode]; |
161
|
|
|
|
162
|
1 |
|
if ($this->debug) { |
163
|
|
|
$this->log[] = \sprintf('matchATN mode %d start: %s', $this->mode, (string) $startState); |
164
|
|
|
} |
165
|
|
|
|
166
|
1 |
|
$old_mode = $this->mode; |
167
|
|
|
|
168
|
1 |
|
$s0_closure = $this->computeStartState($input, $startState); |
169
|
1 |
|
$suppressEdge = $s0_closure->hasSemanticContext; |
170
|
1 |
|
$s0_closure->hasSemanticContext = false; |
171
|
|
|
|
172
|
1 |
|
$next = $this->addDFAState($s0_closure); |
173
|
|
|
|
174
|
1 |
|
if (!$suppressEdge) { |
175
|
1 |
|
$this->decisionToDFA[$this->mode]->s0 = $next; |
176
|
|
|
} |
177
|
|
|
|
178
|
1 |
|
$predict = $this->execATN($input, $next); |
179
|
|
|
|
180
|
1 |
|
if ($this->debug) { |
181
|
|
|
$this->log[] = \sprintf('DFA after matchATN: %s', $this->decisionToDFA[$old_mode]->toLexerString()); |
182
|
|
|
} |
183
|
|
|
|
184
|
1 |
|
return $predict; |
185
|
|
|
} |
186
|
|
|
|
187
|
7 |
|
protected function execATN(CharStream $input, DFAState $ds0) : int |
188
|
|
|
{ |
189
|
7 |
|
if ($this->debug) { |
190
|
|
|
$this->log[] = \sprintf('start state closure=%s', (string) $ds0->configs); |
191
|
|
|
} |
192
|
|
|
|
193
|
7 |
|
if ($ds0->isAcceptState) { |
194
|
|
|
// allow zero-length tokens |
195
|
|
|
$this->captureSimState($this->prevAccept, $input, $ds0); |
196
|
|
|
} |
197
|
|
|
|
198
|
7 |
|
$t = $input->LA(1); |
199
|
7 |
|
$s = $ds0; // s is current/from DFA state |
200
|
|
|
|
201
|
7 |
|
while (true) { |
202
|
7 |
|
if ($this->debug) { |
203
|
|
|
$this->log[] = \sprintf('execATN loop starting closure: %s', (string) $s->configs); |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
// As we move src->trg, src->trg, we keep track of the previous trg to |
207
|
|
|
// avoid looking up the DFA state again, which is expensive. |
208
|
|
|
// If the previous target was already part of the DFA, we might |
209
|
|
|
// be able to avoid doing a reach operation upon t. If s!=null, |
210
|
|
|
// it means that semantic predicates didn't prevent us from |
211
|
|
|
// creating a DFA state. Once we know s!=null, we check to see if |
212
|
|
|
// the DFA state has an edge already for t. If so, we can just reuse |
213
|
|
|
// it's configuration set; there's no point in re-computing it. |
214
|
|
|
// This is kind of like doing DFA simulation within the ATN |
215
|
|
|
// simulation because DFA simulation is really just a way to avoid |
216
|
|
|
// computing reach/closure sets. Technically, once we know that |
217
|
|
|
// we have a previously added DFA state, we could jump over to |
218
|
|
|
// the DFA simulator. But, that would mean popping back and forth |
219
|
|
|
// a lot and making things more complicated algorithmically. |
220
|
|
|
// This optimization makes a lot of sense for loops within DFA. |
221
|
|
|
// A character will take us back to an existing DFA state |
222
|
|
|
// that already has lots of edges out of it. e.g., .* in comments. |
223
|
|
|
// print("Target for:" + str(s) + " and:" + str(t)) |
224
|
7 |
|
$target = $this->getExistingTargetState($s, $t); |
225
|
|
|
|
226
|
7 |
|
if ($target === null) { |
227
|
7 |
|
$target = $this->computeTargetState($input, $s, $t); |
228
|
|
|
} |
229
|
|
|
|
230
|
7 |
|
if ($target === ATNSimulator::error()) { |
231
|
7 |
|
break; |
232
|
|
|
} |
233
|
|
|
|
234
|
|
|
// If this is a consumable input element, make sure to consume before |
235
|
|
|
// capturing the accept state so the input index, line, and char |
236
|
|
|
// position accurately reflect the state of the interpreter at the |
237
|
|
|
// end of the token. |
238
|
6 |
|
if ($t !== Token::EOF) { |
239
|
6 |
|
$this->consume($input); |
240
|
|
|
} |
241
|
|
|
|
242
|
6 |
|
if ($target->isAcceptState) { |
243
|
6 |
|
$this->captureSimState($this->prevAccept, $input, $target); |
244
|
6 |
|
if ($t === Token::EOF) { |
245
|
|
|
break; |
246
|
|
|
} |
247
|
|
|
} |
248
|
|
|
|
249
|
6 |
|
$t = $input->LA(1); |
250
|
6 |
|
$s = $target; // flip; current DFA target becomes new src/from state |
251
|
|
|
} |
252
|
|
|
|
253
|
7 |
|
return $this->failOrAccept($this->prevAccept, $input, $s->configs, $t); |
254
|
|
|
} |
255
|
|
|
|
256
|
|
|
/** |
257
|
|
|
* Get an existing target state for an edge in the DFA. If the target state |
258
|
|
|
* for the edge has not yet been computed or is otherwise not available, |
259
|
|
|
* this method returns `null`. |
260
|
|
|
* |
261
|
|
|
* @param DFAState $s The current DFA state |
262
|
|
|
* @param int $t The next input symbol |
263
|
|
|
* |
264
|
|
|
* @return DFAState|null The existing target DFA state for the given input symbol |
265
|
|
|
* `t`, or `null` if the target state for this edg |
266
|
|
|
* is not already cached. |
267
|
|
|
*/ |
268
|
7 |
|
protected function getExistingTargetState(DFAState $s, int $t) : ?DFAState |
269
|
|
|
{ |
270
|
7 |
|
if ($s->edges === null || $t < self::MIN_DFA_EDGE || $t > self::MAX_DFA_EDGE) { |
271
|
7 |
|
return null; |
272
|
|
|
} |
273
|
|
|
|
274
|
6 |
|
$target = $s->edges[$t - self::MIN_DFA_EDGE] ?? null; |
275
|
|
|
|
276
|
6 |
|
if ($this->debug && $target !== null) { |
277
|
|
|
$this->log[] = \sprintf('reuse state %d edge to %d', $s->stateNumber, $target->stateNumber); |
278
|
|
|
} |
279
|
|
|
|
280
|
6 |
|
return $target; |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
/** |
284
|
|
|
* Compute a target state for an edge in the DFA, and attempt to add the |
285
|
|
|
* computed state and corresponding edge to the DFA. |
286
|
|
|
* |
287
|
|
|
* @param CharStream $input The input stream |
288
|
|
|
* @param DFAState $s The current DFA state |
289
|
|
|
* @param int $t The next input symbol |
290
|
|
|
* |
291
|
|
|
* @return DFAState The computed target DFA state for the given input symbol |
292
|
|
|
* `t`. If `t` does not lead to a valid DFA state, this |
293
|
|
|
* method returns {@see LexerATNSimulator::ERROR}. |
294
|
|
|
*/ |
295
|
7 |
|
protected function computeTargetState(CharStream $input, DFAState $s, int $t) : DFAState |
296
|
|
|
{ |
297
|
7 |
|
$reach = new OrderedATNConfigSet(); |
298
|
|
|
|
299
|
|
|
// if we don't find an existing DFA state |
300
|
|
|
// Fill reach starting from closure, following t transitions |
301
|
7 |
|
$this->getReachableConfigSet($input, $s->configs, $reach, $t); |
302
|
|
|
|
303
|
7 |
|
if (\count($reach->elements()) === 0) { |
304
|
|
|
// we got nowhere on t from s |
305
|
7 |
|
if (!$reach->hasSemanticContext) { |
306
|
|
|
// we got nowhere on t, don't throw out this knowledge; it'd |
307
|
|
|
// cause a failover from DFA later. |
308
|
7 |
|
$this->addDFAEdge($s, $t, ATNSimulator::error()); |
309
|
|
|
} |
310
|
|
|
|
311
|
|
|
// stop when we can't match any more char |
312
|
7 |
|
return ATNSimulator::error(); |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
// Add an edge from s to target DFA found/created for reach |
316
|
4 |
|
return $this->addDFAEdgeATNConfigSet($s, $t, $reach) ?? ATNSimulator::error(); |
317
|
|
|
} |
318
|
|
|
|
319
|
7 |
|
protected function failOrAccept(SimState $prevAccept, CharStream $input, ATNConfigSet $reach, int $t) : int |
320
|
|
|
{ |
321
|
7 |
|
if ($this->prevAccept->getDfaState() !== null) { |
322
|
6 |
|
$dfaState = $prevAccept->getDfaState(); |
323
|
|
|
|
324
|
6 |
|
if ($dfaState === null) { |
325
|
|
|
throw new \RuntimeException('Unexpected null DFA State.'); |
326
|
|
|
} |
327
|
|
|
|
328
|
6 |
|
$lexerActionExecutor = $dfaState->lexerActionExecutor; |
329
|
|
|
|
330
|
6 |
|
$this->accept( |
331
|
6 |
|
$input, |
332
|
|
|
$lexerActionExecutor, |
333
|
6 |
|
$this->startIndex, |
334
|
6 |
|
$prevAccept->getIndex(), |
335
|
6 |
|
$prevAccept->getLine(), |
336
|
6 |
|
$prevAccept->getCharPos() |
337
|
|
|
); |
338
|
|
|
|
339
|
6 |
|
return $dfaState->prediction; |
340
|
|
|
} |
341
|
|
|
|
342
|
|
|
// if no accept and EOF is first char, return EOF |
343
|
1 |
|
if ($t === IntStream::EOF && $input->getIndex() === $this->startIndex) { |
344
|
1 |
|
return Token::EOF; |
345
|
|
|
} |
346
|
|
|
|
347
|
|
|
if ($this->recog === null) { |
348
|
|
|
throw new \RuntimeException('Unexpected null recognizer.'); |
349
|
|
|
} |
350
|
|
|
|
351
|
|
|
throw new LexerNoViableAltException($this->recog, $input, $this->startIndex, $reach); |
352
|
|
|
} |
353
|
|
|
|
354
|
|
|
/** |
355
|
|
|
* Given a starting configuration set, figure out all ATN configurations |
356
|
|
|
* we can reach upon input `t`. Parameter `reach` is a return parameter. |
357
|
|
|
*/ |
358
|
7 |
|
protected function getReachableConfigSet( |
359
|
|
|
CharStream $input, |
360
|
|
|
ATNConfigSet $closure, |
361
|
|
|
ATNConfigSet $reach, |
362
|
|
|
int $t |
363
|
|
|
) : void { |
364
|
|
|
// this is used to skip processing for configs which have a lower priority |
365
|
|
|
// than a config that already reached an accept state for the same rule |
366
|
7 |
|
$skipAlt = ATN::INVALID_ALT_NUMBER; |
367
|
|
|
|
368
|
7 |
|
foreach ($closure->elements() as $cfg) { |
369
|
7 |
|
if (!$cfg instanceof LexerATNConfig) { |
370
|
|
|
throw new \RuntimeException('Unexpected config type.'); |
371
|
|
|
} |
372
|
|
|
|
373
|
7 |
|
$currentAltReachedAcceptState = ($cfg->alt === $skipAlt); |
374
|
|
|
|
375
|
7 |
|
if ($currentAltReachedAcceptState && $cfg->isPassedThroughNonGreedyDecision()) { |
376
|
|
|
continue; |
377
|
|
|
} |
378
|
|
|
|
379
|
7 |
|
if ($this->debug) { |
380
|
|
|
$this->log[] = \sprintf( |
381
|
|
|
"testing %s at %s\n", |
382
|
|
|
$this->getTokenName($t), |
383
|
|
|
$cfg->toString(true) |
384
|
|
|
); |
385
|
|
|
} |
386
|
|
|
|
387
|
7 |
|
foreach ($cfg->state->getTransitions() as $trans) { |
388
|
6 |
|
$target = $this->getReachableTarget($trans, $t); |
389
|
|
|
|
390
|
6 |
|
if ($target !== null) { |
391
|
4 |
|
$lexerExecutor = $cfg->getLexerActionExecutor(); |
392
|
|
|
|
393
|
4 |
|
if ($lexerExecutor !== null) { |
394
|
|
|
$lexerExecutor = $lexerExecutor->fixOffsetBeforeMatch($input->getIndex() - $this->startIndex); |
395
|
|
|
} |
396
|
|
|
|
397
|
4 |
|
$treatEofAsEpsilon = ($t === Token::EOF); |
398
|
4 |
|
$config = new LexerATNConfig($cfg, $target, null, $lexerExecutor); |
399
|
|
|
|
400
|
4 |
|
if ($this->closure( |
401
|
4 |
|
$input, |
402
|
|
|
$config, |
403
|
|
|
$reach, |
404
|
|
|
$currentAltReachedAcceptState, |
405
|
4 |
|
true, |
406
|
|
|
$treatEofAsEpsilon |
407
|
|
|
)) { |
408
|
|
|
// any remaining configs for this alt have a lower priority |
409
|
|
|
// than the one that just reached an accept state. |
410
|
4 |
|
$skipAlt = $cfg->alt; |
411
|
|
|
} |
412
|
|
|
} |
413
|
|
|
} |
414
|
|
|
} |
415
|
7 |
|
} |
416
|
|
|
|
417
|
6 |
|
protected function accept( |
418
|
|
|
CharStream $input, |
419
|
|
|
?LexerActionExecutor $lexerActionExecutor, |
420
|
|
|
int $startIndex, |
421
|
|
|
int $index, |
422
|
|
|
int $line, |
423
|
|
|
int $charPos |
424
|
|
|
) : void { |
425
|
6 |
|
if ($this->debug) { |
426
|
|
|
$this->log[] = \sprintf('ACTION %s', (string) $lexerActionExecutor) . \PHP_EOL; |
427
|
|
|
} |
428
|
|
|
|
429
|
|
|
// seek to after last char in token |
430
|
6 |
|
$input->seek($index); |
431
|
6 |
|
$this->line = $line; |
432
|
6 |
|
$this->charPositionInLine = $charPos; |
433
|
|
|
|
434
|
6 |
|
if ($lexerActionExecutor !== null && $this->recog) { |
435
|
5 |
|
$lexerActionExecutor->execute($this->recog, $input, $startIndex); |
436
|
|
|
} |
437
|
6 |
|
} |
438
|
|
|
|
439
|
6 |
|
protected function getReachableTarget(Transition $trans, int $t) : ?ATNState |
440
|
|
|
{ |
441
|
6 |
|
if ($trans->matches($t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { |
442
|
4 |
|
return $trans->target; |
443
|
|
|
} |
444
|
|
|
|
445
|
6 |
|
return null; |
446
|
|
|
} |
447
|
|
|
|
448
|
1 |
|
protected function computeStartState(CharStream $input, ATNState $p) : OrderedATNConfigSet |
449
|
|
|
{ |
450
|
1 |
|
$initialContext = PredictionContext::empty(); |
451
|
1 |
|
$configs = new OrderedATNConfigSet(); |
452
|
|
|
|
453
|
1 |
|
foreach ($p->getTransitions() as $i => $t) { |
454
|
1 |
|
$target = $t->target; |
455
|
1 |
|
$cfg = new LexerATNConfig(null, $target, $initialContext, null, $i + 1); |
456
|
1 |
|
$this->closure($input, $cfg, $configs, false, false, false); |
457
|
|
|
} |
458
|
|
|
|
459
|
1 |
|
return $configs; |
460
|
|
|
} |
461
|
|
|
|
462
|
|
|
/** |
463
|
|
|
* Since the alternatives within any lexer decision are ordered by |
464
|
|
|
* preference, this method stops pursuing the closure as soon as an accept |
465
|
|
|
* state is reached. After the first accept state is reached by depth-first |
466
|
|
|
* search from `config`, all other (potentially reachable) states for |
467
|
|
|
* this rule would have a lower priority. |
468
|
|
|
* |
469
|
|
|
* @return bool `true` if an accept state is reached, otherwise `false`. |
470
|
|
|
*/ |
471
|
4 |
|
protected function closure( |
472
|
|
|
CharStream $input, |
473
|
|
|
LexerATNConfig $config, |
474
|
|
|
ATNConfigSet $configs, |
475
|
|
|
bool $currentAltReachedAcceptState, |
476
|
|
|
bool $speculative, |
477
|
|
|
bool $treatEofAsEpsilon |
478
|
|
|
) : bool { |
479
|
4 |
|
$cfg = null; |
480
|
|
|
|
481
|
4 |
|
if ($this->debug) { |
482
|
|
|
$this->log[] = \sprintf('closure(%s)', $config->toString(true)); |
483
|
|
|
} |
484
|
|
|
|
485
|
4 |
|
if ($config->state instanceof States\RuleStopState) { |
486
|
4 |
|
if ($this->debug) { |
487
|
|
|
if ($this->recog) { |
488
|
|
|
$this->log[] = \sprintf( |
489
|
|
|
"closure at %s rule stop %s\n", |
490
|
|
|
$this->recog->getRuleNames()[$config->state->ruleIndex], |
491
|
|
|
$config |
492
|
|
|
); |
493
|
|
|
} else { |
494
|
|
|
$this->log[] = \sprintf("closure at rule stop %s\n", $config); |
495
|
|
|
} |
496
|
|
|
} |
497
|
|
|
|
498
|
4 |
|
if ($config->context === null || $config->context->hasEmptyPath()) { |
499
|
4 |
|
if ($config->context === null || $config->context->isEmpty()) { |
500
|
4 |
|
$configs->add($config); |
501
|
|
|
|
502
|
4 |
|
return true; |
503
|
|
|
} |
504
|
|
|
|
505
|
|
|
$configs->add(new LexerATNConfig($config, $config->state, PredictionContext::empty())); |
506
|
|
|
$currentAltReachedAcceptState = true; |
507
|
|
|
} |
508
|
|
|
|
509
|
|
|
if ($config->context !== null && !$config->context->isEmpty()) { |
510
|
|
|
for ($i = 0; $i < $config->context->getLength(); $i++) { |
511
|
|
|
if ($config->context->getReturnState($i) !== PredictionContext::EMPTY_RETURN_STATE) { |
512
|
|
|
$newContext = $config->context->getParent($i);// "pop" return state |
513
|
|
|
$returnState = $this->atn->states[$config->context->getReturnState($i)]; |
514
|
|
|
$cfg = new LexerATNConfig($config, $returnState, $newContext); |
515
|
|
|
$currentAltReachedAcceptState = $this->closure( |
516
|
|
|
$input, |
517
|
|
|
$cfg, |
518
|
|
|
$configs, |
519
|
|
|
$currentAltReachedAcceptState, |
520
|
|
|
$speculative, |
521
|
|
|
$treatEofAsEpsilon |
522
|
|
|
); |
523
|
|
|
} |
524
|
|
|
} |
525
|
|
|
} |
526
|
|
|
|
527
|
|
|
return $currentAltReachedAcceptState; |
528
|
|
|
} |
529
|
|
|
|
530
|
|
|
// optimization |
531
|
4 |
|
if (!$config->state->epsilonOnlyTransitions) { |
532
|
3 |
|
if (!$currentAltReachedAcceptState || !$config->isPassedThroughNonGreedyDecision()) { |
533
|
3 |
|
$configs->add($config); |
534
|
|
|
} |
535
|
|
|
} |
536
|
|
|
|
537
|
4 |
|
foreach ($config->state->getTransitions() as $trans) { |
538
|
4 |
|
$cfg = $this->getEpsilonTarget($input, $config, $trans, $configs, $speculative, $treatEofAsEpsilon); |
539
|
|
|
|
540
|
4 |
|
if ($cfg !== null) { |
541
|
4 |
|
$currentAltReachedAcceptState = $this->closure( |
542
|
4 |
|
$input, |
543
|
|
|
$cfg, |
544
|
|
|
$configs, |
545
|
|
|
$currentAltReachedAcceptState, |
546
|
|
|
$speculative, |
547
|
|
|
$treatEofAsEpsilon |
548
|
|
|
); |
549
|
|
|
} |
550
|
|
|
} |
551
|
|
|
|
552
|
4 |
|
return $currentAltReachedAcceptState; |
553
|
|
|
} |
554
|
|
|
|
555
|
|
|
/** |
556
|
|
|
* side-effect: can alter configs.hasSemanticContext |
557
|
|
|
*/ |
558
|
4 |
|
protected function getEpsilonTarget( |
559
|
|
|
CharStream $input, |
560
|
|
|
LexerATNConfig $config, |
561
|
|
|
Transition $t, |
562
|
|
|
ATNConfigSet $configs, |
563
|
|
|
bool $speculative, |
564
|
|
|
bool $treatEofAsEpsilon |
565
|
|
|
) : ?LexerATNConfig { |
566
|
4 |
|
$cfg = null; |
567
|
|
|
|
568
|
4 |
|
switch ($t->getSerializationType()) { |
569
|
|
|
case Transition::RULE: |
570
|
|
|
if (!$t instanceof RuleTransition) { |
571
|
|
|
throw new \RuntimeException('Unexpected transition type.'); |
572
|
|
|
} |
573
|
|
|
|
574
|
|
|
$newContext = SingletonPredictionContext::create($config->context, $t->followState->stateNumber); |
575
|
|
|
$cfg = new LexerATNConfig($config, $t->target, $newContext); |
576
|
|
|
|
577
|
|
|
break; |
578
|
|
|
|
579
|
|
|
case Transition::PRECEDENCE: |
580
|
|
|
throw new \RuntimeException('Precedence predicates are not supported in lexers.'); |
581
|
|
|
|
582
|
|
|
case Transition::PREDICATE: |
583
|
|
|
// Track traversing semantic predicates. If we traverse, |
584
|
|
|
// we cannot add a DFA state for this "reach" computation |
585
|
|
|
// because the DFA would not test the predicate again in the |
586
|
|
|
// future. Rather than creating collections of semantic predicates |
587
|
|
|
// like v3 and testing them on prediction, v4 will test them on the |
588
|
|
|
// fly all the time using the ATN not the DFA. This is slower but |
589
|
|
|
// semantically it's not used that often. One of the key elements to |
590
|
|
|
// this predicate mechanism is not adding DFA states that see |
591
|
|
|
// predicates immediately afterwards in the ATN. For example, |
592
|
|
|
|
593
|
|
|
// a : ID {p1}? | ID {p2}? ; |
594
|
|
|
|
595
|
|
|
// should create the start state for rule 'a' (to save start state |
596
|
|
|
// competition), but should not create target of ID state. The |
597
|
|
|
// collection of ATN states the following ID references includes |
598
|
|
|
// states reached by traversing predicates. Since this is when we |
599
|
|
|
// test them, we cannot cash the DFA state target of ID. |
600
|
|
|
|
601
|
|
|
if (!$t instanceof PredicateTransition) { |
602
|
|
|
throw new \RuntimeException('Unexpected transition type.'); |
603
|
|
|
} |
604
|
|
|
|
605
|
|
|
if ($this->debug) { |
606
|
|
|
$this->log[] = \sprintf('EVAL rule %d:%d', $t->ruleIndex, $t->predIndex); |
607
|
|
|
} |
608
|
|
|
|
609
|
|
|
$configs->hasSemanticContext = true; |
610
|
|
|
|
611
|
|
|
if ($this->evaluatePredicate($input, $t->ruleIndex, $t->predIndex, $speculative)) { |
612
|
|
|
$cfg = new LexerATNConfig($config, $t->target); |
613
|
|
|
} |
614
|
|
|
|
615
|
|
|
break; |
616
|
|
|
|
617
|
|
|
case Transition::ACTION: |
618
|
2 |
|
if ($config->context === null || $config->context->hasEmptyPath()) { |
619
|
|
|
// execute actions anywhere in the start rule for a token. |
620
|
|
|
|
621
|
|
|
// TODO: if the entry rule is invoked recursively, some |
622
|
|
|
// actions may be executed during the recursive call. The |
623
|
|
|
// problem can appear when hasEmptyPath() is true but |
624
|
|
|
// isEmpty() is false. In this case, the config needs to be |
625
|
|
|
// split into two contexts - one with just the empty path |
626
|
|
|
// and another with everything but the empty path. |
627
|
|
|
// Unfortunately, the current algorithm does not allow |
628
|
|
|
// getEpsilonTarget to return two configurations, so |
629
|
|
|
// additional modifications are needed before we can support |
630
|
|
|
// the split operation. |
631
|
|
|
|
632
|
2 |
|
if (!$t instanceof ActionTransition) { |
633
|
|
|
throw new \RuntimeException('Unexpected transition type.'); |
634
|
|
|
} |
635
|
|
|
|
636
|
2 |
|
$lexerAction = $this->atn->lexerActions[$t->actionIndex]; |
637
|
|
|
|
638
|
2 |
|
$lexerActionExecutor = LexerActionExecutor::append($config->getLexerActionExecutor(), $lexerAction); |
639
|
|
|
|
640
|
2 |
|
$cfg = new LexerATNConfig($config, $t->target, null, $lexerActionExecutor); |
641
|
|
|
} else { |
642
|
|
|
// ignore actions in referenced rules |
643
|
|
|
$cfg = new LexerATNConfig($config, $t->target); |
644
|
|
|
} |
645
|
|
|
|
646
|
2 |
|
break; |
647
|
|
|
|
648
|
|
|
case Transition::EPSILON: |
649
|
4 |
|
$cfg = new LexerATNConfig($config, $t->target); |
650
|
|
|
|
651
|
4 |
|
break; |
652
|
|
|
|
653
|
|
|
case Transition::ATOM: |
654
|
|
|
case Transition::RANGE: |
655
|
|
|
case Transition::SET: |
656
|
3 |
|
if ($treatEofAsEpsilon) { |
657
|
|
|
if ($t->matches(Token::EOF, 0, Lexer::MAX_CHAR_VALUE)) { |
658
|
|
|
$cfg = new LexerATNConfig($config, $t->target); |
659
|
|
|
} |
660
|
|
|
} |
661
|
|
|
|
662
|
3 |
|
break; |
663
|
|
|
|
664
|
|
|
default: |
665
|
|
|
$cfg = null; |
666
|
|
|
} |
667
|
|
|
|
668
|
4 |
|
return $cfg; |
669
|
|
|
} |
670
|
|
|
|
671
|
|
|
/** |
672
|
|
|
* Evaluate a predicate specified in the lexer. |
673
|
|
|
* |
674
|
|
|
* If `speculative` is `true`, this method was called before |
675
|
|
|
* {@see LexerATNSimulator::consume()} for the matched character. This |
676
|
|
|
* method should call {@see LexerATNSimulator::consume()} before evaluating |
677
|
|
|
* the predicate to ensure position sensitive values, including |
678
|
|
|
* {@see Lexer::getText()}, {@see Lexer::getLine()}, and |
679
|
|
|
* {@see Lexer::getCharPositionInLine()}, properly reflect the current |
680
|
|
|
* lexer state. This method should restore `input` and the simulator |
681
|
|
|
* to the original state before returning (i.e. undo the actions made by |
682
|
|
|
* the call to {@see LexerATNSimulator::consume()}. |
683
|
|
|
* |
684
|
|
|
* @param CharStream $input The input stream. |
685
|
|
|
* @param int $ruleIndex The rule containing the predicate. |
686
|
|
|
* @param int $predIndex The index of the predicate within the rule. |
687
|
|
|
* @param bool $speculative `true` if the current index in `input` is |
688
|
|
|
* one character before the predicate's location. |
689
|
|
|
* |
690
|
|
|
* @return bool `true` If the specified predicate evaluates to `true`. |
691
|
|
|
*/ |
692
|
|
|
protected function evaluatePredicate(CharStream $input, int $ruleIndex, int $predIndex, bool $speculative) : bool |
693
|
|
|
{ |
694
|
|
|
if ($this->recog === null) { |
695
|
|
|
return true; |
696
|
|
|
} |
697
|
|
|
|
698
|
|
|
if (!$speculative) { |
699
|
|
|
return $this->recog->sempred(null, $ruleIndex, $predIndex); |
700
|
|
|
} |
701
|
|
|
|
702
|
|
|
$savedcolumn = $this->charPositionInLine; |
703
|
|
|
$savedLine = $this->line; |
704
|
|
|
$index = $input->getIndex(); |
705
|
|
|
$marker = $input->mark(); |
706
|
|
|
|
707
|
|
|
try { |
708
|
|
|
$this->consume($input); |
709
|
|
|
|
710
|
|
|
return $this->recog->sempred(null, $ruleIndex, $predIndex); |
711
|
|
|
} finally { |
712
|
|
|
$this->charPositionInLine = $savedcolumn; |
713
|
|
|
$this->line = $savedLine; |
714
|
|
|
$input->seek($index); |
715
|
|
|
$input->release($marker); |
716
|
|
|
} |
717
|
|
|
} |
718
|
|
|
|
719
|
6 |
|
protected function captureSimState(SimState $settings, CharStream $input, DFAState $dfaState) : void |
720
|
|
|
{ |
721
|
6 |
|
$settings->setIndex($input->getIndex()); |
722
|
6 |
|
$settings->setLine($this->line); |
723
|
6 |
|
$settings->setCharPos($this->charPositionInLine); |
724
|
6 |
|
$settings->setDfaState($dfaState); |
725
|
6 |
|
} |
726
|
|
|
|
727
|
4 |
|
protected function addDFAEdgeATNConfigSet(DFAState $from, int $t, ATNConfigSet $configs) : DFAState |
728
|
|
|
{ |
729
|
|
|
/* leading to this call, ATNConfigSet.hasSemanticContext is used as a |
730
|
|
|
* marker indicating dynamic predicate evaluation makes this edge |
731
|
|
|
* dependent on the specific input sequence, so the static edge in the |
732
|
|
|
* DFA should be omitted. The target DFAState is still created since |
733
|
|
|
* execATN has the ability to resynchronize with the DFA state cache |
734
|
|
|
* following the predicate evaluation step. |
735
|
|
|
* |
736
|
|
|
* TJP notes: next time through the DFA, we see a pred again and eval. |
737
|
|
|
* If that gets us to a previously created (but dangling) DFA |
738
|
|
|
* state, we can continue in pure DFA mode from there. |
739
|
|
|
*/ |
740
|
4 |
|
$suppressEdge = $configs->hasSemanticContext; |
741
|
4 |
|
$configs->hasSemanticContext = false; |
742
|
|
|
|
743
|
4 |
|
$to = $this->addDFAState($configs); |
744
|
|
|
|
745
|
4 |
|
if ($suppressEdge) { |
746
|
|
|
return $to; |
747
|
|
|
} |
748
|
|
|
|
749
|
4 |
|
$this->addDFAEdge($from, $t, $to); |
750
|
|
|
|
751
|
4 |
|
return $to; |
752
|
|
|
} |
753
|
|
|
|
754
|
7 |
|
protected function addDFAEdge(DFAState $from, int $t, ?DFAState $to) : void |
755
|
|
|
{ |
756
|
|
|
// add the edge |
757
|
7 |
|
if ($t < self::MIN_DFA_EDGE || $t > self::MAX_DFA_EDGE) { |
758
|
|
|
// Only track edges within the DFA bounds |
759
|
7 |
|
return; |
760
|
|
|
} |
761
|
|
|
|
762
|
5 |
|
if ($this->debug) { |
763
|
|
|
$this->log[] = \sprintf('EDGE %s->%s upon %d', $from, $to, $t); |
764
|
|
|
} |
765
|
|
|
|
766
|
5 |
|
if ($from->edges === null) { |
767
|
|
|
// make room for tokens 1..n and -1 masquerading as index 0 |
768
|
4 |
|
$from->edges = new \SplFixedArray(self::MAX_DFA_EDGE - self::MIN_DFA_EDGE + 1); |
769
|
|
|
} |
770
|
|
|
|
771
|
5 |
|
$from->edges[$t - self::MIN_DFA_EDGE] = $to; // connect |
772
|
5 |
|
} |
773
|
|
|
|
774
|
|
|
/** |
775
|
|
|
* Add a new DFA state if there isn't one with this set of configurations |
776
|
|
|
* already. This method also detects the first configuration containing |
777
|
|
|
* an ATN rule stop state. Later, when traversing the DFA, we will know |
778
|
|
|
* which rule to accept. |
779
|
|
|
*/ |
780
|
4 |
|
protected function addDFAState(ATNConfigSet $configs) : DFAState |
781
|
|
|
{ |
782
|
4 |
|
if ($configs->hasSemanticContext) { |
783
|
|
|
throw new \RuntimeException('ATN Config Set cannot have semantic context.'); |
784
|
|
|
} |
785
|
|
|
|
786
|
4 |
|
$proposed = new DFAState($configs); |
787
|
|
|
|
788
|
4 |
|
$firstConfigWithRuleStopState = null; |
789
|
|
|
|
790
|
4 |
|
foreach ($configs->elements() as $config) { |
791
|
4 |
|
if ($config->state instanceof RuleStopState) { |
792
|
4 |
|
$firstConfigWithRuleStopState = $config; |
793
|
|
|
|
794
|
4 |
|
break; |
795
|
|
|
} |
796
|
|
|
} |
797
|
|
|
|
798
|
4 |
|
if ($firstConfigWithRuleStopState !== null) { |
799
|
4 |
|
if (!$firstConfigWithRuleStopState instanceof LexerATNConfig) { |
800
|
|
|
throw new \RuntimeException('Unexpected ATN config type.'); |
801
|
|
|
} |
802
|
|
|
|
803
|
4 |
|
$proposed->isAcceptState = true; |
804
|
4 |
|
$proposed->lexerActionExecutor = $firstConfigWithRuleStopState->getLexerActionExecutor(); |
805
|
|
|
|
806
|
4 |
|
$prediction = $this->atn->ruleToTokenType[$firstConfigWithRuleStopState->state->ruleIndex]; |
807
|
|
|
|
808
|
4 |
|
$proposed->prediction = $prediction ?? 0; |
809
|
|
|
} |
810
|
|
|
|
811
|
4 |
|
$dfa = $this->decisionToDFA[$this->mode]; |
812
|
|
|
|
813
|
4 |
|
$existing = $dfa->states->get($proposed); |
814
|
|
|
|
815
|
4 |
|
if ($existing !== null && $existing instanceof DFAState) { |
816
|
1 |
|
return $existing; |
817
|
|
|
} |
818
|
|
|
|
819
|
4 |
|
$newState = $proposed; |
820
|
4 |
|
$newState->stateNumber = $dfa->states->count(); |
821
|
4 |
|
$configs->setReadonly(true); |
822
|
4 |
|
$newState->configs = $configs; |
823
|
4 |
|
$dfa->states->add($newState); |
824
|
|
|
|
825
|
4 |
|
return $newState; |
826
|
|
|
} |
827
|
|
|
|
828
|
|
|
public function getDFA(int $mode) : DFA |
829
|
|
|
{ |
830
|
|
|
return $this->decisionToDFA[$mode]; |
831
|
|
|
} |
832
|
|
|
|
833
|
|
|
/** |
834
|
|
|
* Get the text matched so far for the current token. |
835
|
|
|
*/ |
836
|
|
|
public function getText(CharStream $input) : string |
837
|
|
|
{ |
838
|
|
|
// index is first lookahead char, don't include. |
839
|
|
|
return $input->getText($this->startIndex, $input->getIndex() - 1); |
840
|
|
|
} |
841
|
|
|
|
842
|
6 |
|
public function consume(CharStream $input) : void |
843
|
|
|
{ |
844
|
6 |
|
$curChar = $input->LA(1); |
845
|
|
|
|
846
|
6 |
|
if ($curChar === self::NEW_LINE_CODE) { |
847
|
2 |
|
$this->line++; |
848
|
2 |
|
$this->charPositionInLine = 0; |
849
|
|
|
} else { |
850
|
6 |
|
$this->charPositionInLine++; |
851
|
|
|
} |
852
|
|
|
|
853
|
6 |
|
$input->consume(); |
854
|
6 |
|
} |
855
|
|
|
|
856
|
|
|
public function getTokenName(int $t) : ?string |
857
|
|
|
{ |
858
|
|
|
if ($t === -1) { |
859
|
|
|
return 'EOF'; |
860
|
|
|
} |
861
|
|
|
|
862
|
|
|
return \sprintf('\'%s\'', StringUtils::char($t)); |
863
|
|
|
} |
864
|
|
|
} |
865
|
|
|
|