|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
declare(strict_types=1); |
|
4
|
|
|
|
|
5
|
|
|
namespace Antlr\Antlr4\Runtime\Atn; |
|
6
|
|
|
|
|
7
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\ATNState; |
|
8
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\RuleStopState; |
|
9
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\ActionTransition; |
|
10
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\PredicateTransition; |
|
11
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; |
|
12
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\Transition; |
|
13
|
|
|
use Antlr\Antlr4\Runtime\CharStream; |
|
14
|
|
|
use Antlr\Antlr4\Runtime\Dfa\DFA; |
|
15
|
|
|
use Antlr\Antlr4\Runtime\Dfa\DFAState; |
|
16
|
|
|
use Antlr\Antlr4\Runtime\Error\Exceptions\LexerNoViableAltException; |
|
17
|
|
|
use Antlr\Antlr4\Runtime\IntStream; |
|
18
|
|
|
use Antlr\Antlr4\Runtime\Lexer; |
|
19
|
|
|
use Antlr\Antlr4\Runtime\PredictionContexts\PredictionContext; |
|
20
|
|
|
use Antlr\Antlr4\Runtime\PredictionContexts\PredictionContextCache; |
|
21
|
|
|
use Antlr\Antlr4\Runtime\PredictionContexts\SingletonPredictionContext; |
|
22
|
|
|
use Antlr\Antlr4\Runtime\Token; |
|
23
|
|
|
use Antlr\Antlr4\Runtime\Utils\StringUtils; |
|
24
|
|
|
|
|
25
|
|
|
class LexerATNSimulator extends ATNSimulator |
|
26
|
|
|
{ |
|
27
|
|
|
public const MIN_DFA_EDGE = 0; |
|
28
|
|
|
public const MAX_DFA_EDGE = 127; // forces unicode to stay in ATN |
|
29
|
|
|
private const NEW_LINE_CODE = 10; |
|
30
|
|
|
|
|
31
|
|
|
/** @var array<string> */ |
|
32
|
|
|
public $log = []; |
|
33
|
|
|
|
|
34
|
|
|
/** @var bool */ |
|
35
|
|
|
public $debug = false; |
|
36
|
|
|
|
|
37
|
|
|
/** @var Lexer|null */ |
|
38
|
|
|
protected $recog; |
|
39
|
|
|
|
|
40
|
|
|
/** |
|
41
|
|
|
* The current token's starting index into the character stream. |
|
42
|
|
|
* Shared across DFA to ATN simulation in case the ATN fails and the |
|
43
|
|
|
* DFA did not have a previous accept state. In this case, we use the |
|
44
|
|
|
* ATN-generated exception object. |
|
45
|
|
|
* |
|
46
|
|
|
* @var int |
|
47
|
|
|
*/ |
|
48
|
|
|
protected $startIndex = -1; |
|
49
|
|
|
|
|
50
|
|
|
/** |
|
51
|
|
|
* The line number 1..n within the input. |
|
52
|
|
|
* |
|
53
|
|
|
* @var int |
|
54
|
|
|
*/ |
|
55
|
|
|
protected $line = 1; |
|
56
|
|
|
|
|
57
|
|
|
/** |
|
58
|
|
|
* The index of the character relative to the beginning of the line 0..n-1. |
|
59
|
|
|
* |
|
60
|
|
|
* @var int |
|
61
|
|
|
*/ |
|
62
|
|
|
protected $charPositionInLine = 0; |
|
63
|
|
|
|
|
64
|
|
|
/** @var array<DFA> */ |
|
65
|
|
|
public $decisionToDFA = []; |
|
66
|
|
|
|
|
67
|
|
|
/** @var int */ |
|
68
|
|
|
protected $mode = Lexer::DEFAULT_MODE; |
|
69
|
|
|
|
|
70
|
|
|
/** |
|
71
|
|
|
* Used during DFA/ATN exec to record the most recent accept configuration info. |
|
72
|
|
|
* |
|
73
|
|
|
* @var SimState |
|
74
|
|
|
*/ |
|
75
|
|
|
protected $prevAccept; |
|
76
|
|
|
|
|
77
|
|
|
/** |
|
78
|
|
|
* @param array<DFA> $decisionToDFA |
|
79
|
|
|
*/ |
|
80
|
7 |
|
public function __construct( |
|
81
|
|
|
?Lexer $recog, |
|
82
|
|
|
ATN $atn, |
|
83
|
|
|
array $decisionToDFA, |
|
84
|
|
|
PredictionContextCache $sharedContextCache |
|
85
|
|
|
) { |
|
86
|
7 |
|
parent::__construct($atn, $sharedContextCache); |
|
87
|
|
|
|
|
88
|
7 |
|
$this->decisionToDFA = $decisionToDFA; |
|
89
|
7 |
|
$this->recog = $recog; |
|
90
|
7 |
|
$this->prevAccept = new SimState(); |
|
91
|
7 |
|
} |
|
92
|
|
|
|
|
93
|
|
|
public function copyState(LexerATNSimulator $simulator) : void |
|
94
|
|
|
{ |
|
95
|
|
|
$this->charPositionInLine = $simulator->charPositionInLine; |
|
96
|
|
|
$this->line = $simulator->line; |
|
97
|
|
|
$this->mode = $simulator->mode; |
|
98
|
|
|
$this->startIndex = $simulator->startIndex; |
|
99
|
|
|
} |
|
100
|
|
|
|
|
101
|
7 |
|
public function getLine() : int |
|
102
|
|
|
{ |
|
103
|
7 |
|
return $this->line; |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
public function setLine(int $line) : void |
|
107
|
|
|
{ |
|
108
|
|
|
$this->line = $line; |
|
109
|
|
|
} |
|
110
|
|
|
|
|
111
|
7 |
|
public function getCharPositionInLine() : int |
|
112
|
|
|
{ |
|
113
|
7 |
|
return $this->charPositionInLine; |
|
114
|
|
|
} |
|
115
|
|
|
|
|
116
|
|
|
public function setCharPositionInLine(int $charPositionInLine) : void |
|
117
|
|
|
{ |
|
118
|
|
|
$this->charPositionInLine = $charPositionInLine; |
|
119
|
|
|
} |
|
120
|
|
|
|
|
121
|
7 |
|
public function match(CharStream $input, int $mode) : int |
|
122
|
|
|
{ |
|
123
|
7 |
|
static $match_calls; |
|
124
|
|
|
|
|
125
|
7 |
|
if ($match_calls === null) { |
|
126
|
1 |
|
$match_calls = 0; |
|
127
|
|
|
} |
|
128
|
|
|
|
|
129
|
7 |
|
$match_calls++; |
|
130
|
|
|
|
|
131
|
7 |
|
$this->mode = $mode; |
|
132
|
7 |
|
$mark = $input->mark(); |
|
133
|
|
|
|
|
134
|
|
|
try { |
|
135
|
7 |
|
$this->startIndex = $input->getIndex(); |
|
136
|
7 |
|
$this->prevAccept->reset(); |
|
137
|
|
|
|
|
138
|
7 |
|
$dfa = $this->decisionToDFA[$mode]; |
|
139
|
|
|
|
|
140
|
7 |
|
if ($dfa->s0 === null) { |
|
141
|
1 |
|
return $this->matchATN($input); |
|
142
|
|
|
} |
|
143
|
7 |
|
return $this->execATN($input, $dfa->s0); |
|
144
|
|
|
} finally { |
|
145
|
7 |
|
$input->release($mark); |
|
146
|
|
|
} |
|
147
|
|
|
} |
|
148
|
|
|
|
|
149
|
|
|
public function reset() : void |
|
150
|
|
|
{ |
|
151
|
|
|
$this->prevAccept->reset(); |
|
152
|
|
|
$this->startIndex = -1; |
|
153
|
|
|
$this->line = 1; |
|
154
|
|
|
$this->charPositionInLine = 0; |
|
155
|
|
|
$this->mode = Lexer::DEFAULT_MODE; |
|
156
|
|
|
} |
|
157
|
|
|
|
|
158
|
1 |
|
protected function matchATN(CharStream $input) : int |
|
159
|
|
|
{ |
|
160
|
1 |
|
$startState = $this->atn->modeToStartState[$this->mode]; |
|
161
|
|
|
|
|
162
|
1 |
|
if ($this->debug) { |
|
163
|
|
|
$this->log[] = \sprintf('matchATN mode %d start: %s', $this->mode, (string) $startState); |
|
164
|
|
|
} |
|
165
|
|
|
|
|
166
|
1 |
|
$old_mode = $this->mode; |
|
167
|
|
|
|
|
168
|
1 |
|
$s0_closure = $this->computeStartState($input, $startState); |
|
169
|
1 |
|
$suppressEdge = $s0_closure->hasSemanticContext; |
|
170
|
1 |
|
$s0_closure->hasSemanticContext = false; |
|
171
|
|
|
|
|
172
|
1 |
|
$next = $this->addDFAState($s0_closure); |
|
173
|
|
|
|
|
174
|
1 |
|
if (!$suppressEdge) { |
|
175
|
1 |
|
$this->decisionToDFA[$this->mode]->s0 = $next; |
|
176
|
|
|
} |
|
177
|
|
|
|
|
178
|
1 |
|
$predict = $this->execATN($input, $next); |
|
179
|
|
|
|
|
180
|
1 |
|
if ($this->debug) { |
|
181
|
|
|
$this->log[] = \sprintf('DFA after matchATN: %s', $this->decisionToDFA[$old_mode]->toLexerString()); |
|
182
|
|
|
} |
|
183
|
|
|
|
|
184
|
1 |
|
return $predict; |
|
185
|
|
|
} |
|
186
|
|
|
|
|
187
|
7 |
|
protected function execATN(CharStream $input, DFAState $ds0) : int |
|
188
|
|
|
{ |
|
189
|
7 |
|
if ($this->debug) { |
|
190
|
|
|
$this->log[] = \sprintf('start state closure=%s', (string) $ds0->configs); |
|
191
|
|
|
} |
|
192
|
|
|
|
|
193
|
7 |
|
if ($ds0->isAcceptState) { |
|
194
|
|
|
// allow zero-length tokens |
|
195
|
|
|
$this->captureSimState($this->prevAccept, $input, $ds0); |
|
196
|
|
|
} |
|
197
|
|
|
|
|
198
|
7 |
|
$t = $input->LA(1); |
|
199
|
7 |
|
$s = $ds0; // s is current/from DFA state |
|
200
|
|
|
|
|
201
|
7 |
|
while (true) { |
|
202
|
7 |
|
if ($this->debug) { |
|
203
|
|
|
$this->log[] = \sprintf('execATN loop starting closure: %s', (string) $s->configs); |
|
204
|
|
|
} |
|
205
|
|
|
|
|
206
|
|
|
// As we move src->trg, src->trg, we keep track of the previous trg to |
|
207
|
|
|
// avoid looking up the DFA state again, which is expensive. |
|
208
|
|
|
// If the previous target was already part of the DFA, we might |
|
209
|
|
|
// be able to avoid doing a reach operation upon t. If s!=null, |
|
210
|
|
|
// it means that semantic predicates didn't prevent us from |
|
211
|
|
|
// creating a DFA state. Once we know s!=null, we check to see if |
|
212
|
|
|
// the DFA state has an edge already for t. If so, we can just reuse |
|
213
|
|
|
// it's configuration set; there's no point in re-computing it. |
|
214
|
|
|
// This is kind of like doing DFA simulation within the ATN |
|
215
|
|
|
// simulation because DFA simulation is really just a way to avoid |
|
216
|
|
|
// computing reach/closure sets. Technically, once we know that |
|
217
|
|
|
// we have a previously added DFA state, we could jump over to |
|
218
|
|
|
// the DFA simulator. But, that would mean popping back and forth |
|
219
|
|
|
// a lot and making things more complicated algorithmically. |
|
220
|
|
|
// This optimization makes a lot of sense for loops within DFA. |
|
221
|
|
|
// A character will take us back to an existing DFA state |
|
222
|
|
|
// that already has lots of edges out of it. e.g., .* in comments. |
|
223
|
|
|
// print("Target for:" + str(s) + " and:" + str(t)) |
|
224
|
7 |
|
$target = $this->getExistingTargetState($s, $t); |
|
225
|
|
|
|
|
226
|
7 |
|
if ($target === null) { |
|
227
|
7 |
|
$target = $this->computeTargetState($input, $s, $t); |
|
228
|
|
|
} |
|
229
|
|
|
|
|
230
|
7 |
|
if ($target === ATNSimulator::error()) { |
|
231
|
7 |
|
break; |
|
232
|
|
|
} |
|
233
|
|
|
|
|
234
|
|
|
// If this is a consumable input element, make sure to consume before |
|
235
|
|
|
// capturing the accept state so the input index, line, and char |
|
236
|
|
|
// position accurately reflect the state of the interpreter at the |
|
237
|
|
|
// end of the token. |
|
238
|
6 |
|
if ($t !== Token::EOF) { |
|
239
|
6 |
|
$this->consume($input); |
|
240
|
|
|
} |
|
241
|
|
|
|
|
242
|
6 |
|
if ($target->isAcceptState) { |
|
243
|
6 |
|
$this->captureSimState($this->prevAccept, $input, $target); |
|
244
|
6 |
|
if ($t === Token::EOF) { |
|
245
|
|
|
break; |
|
246
|
|
|
} |
|
247
|
|
|
} |
|
248
|
|
|
|
|
249
|
6 |
|
$t = $input->LA(1); |
|
250
|
6 |
|
$s = $target; // flip; current DFA target becomes new src/from state |
|
251
|
|
|
} |
|
252
|
|
|
|
|
253
|
7 |
|
return $this->failOrAccept($this->prevAccept, $input, $s->configs, $t); |
|
254
|
|
|
} |
|
255
|
|
|
|
|
256
|
|
|
/** |
|
257
|
|
|
* Get an existing target state for an edge in the DFA. If the target state |
|
258
|
|
|
* for the edge has not yet been computed or is otherwise not available, |
|
259
|
|
|
* this method returns `null`. |
|
260
|
|
|
* |
|
261
|
|
|
* @param DFAState $s The current DFA state |
|
262
|
|
|
* @param int $t The next input symbol |
|
263
|
|
|
* |
|
264
|
|
|
* @return DFAState|null The existing target DFA state for the given input symbol |
|
265
|
|
|
* `t`, or `null` if the target state for this edg |
|
266
|
|
|
* is not already cached. |
|
267
|
|
|
*/ |
|
268
|
7 |
|
protected function getExistingTargetState(DFAState $s, int $t) : ?DFAState |
|
269
|
|
|
{ |
|
270
|
7 |
|
if ($s->edges === null || $t < self::MIN_DFA_EDGE || $t > self::MAX_DFA_EDGE) { |
|
271
|
7 |
|
return null; |
|
272
|
|
|
} |
|
273
|
|
|
|
|
274
|
6 |
|
$target = $s->edges[$t - self::MIN_DFA_EDGE] ?? null; |
|
275
|
|
|
|
|
276
|
6 |
|
if ($this->debug && $target !== null) { |
|
277
|
|
|
$this->log[] = \sprintf('reuse state %d edge to %d', $s->stateNumber, $target->stateNumber); |
|
278
|
|
|
} |
|
279
|
|
|
|
|
280
|
6 |
|
return $target; |
|
281
|
|
|
} |
|
282
|
|
|
|
|
283
|
|
|
/** |
|
284
|
|
|
* Compute a target state for an edge in the DFA, and attempt to add the |
|
285
|
|
|
* computed state and corresponding edge to the DFA. |
|
286
|
|
|
* |
|
287
|
|
|
* @param CharStream $input The input stream |
|
288
|
|
|
* @param DFAState $s The current DFA state |
|
289
|
|
|
* @param int $t The next input symbol |
|
290
|
|
|
* |
|
291
|
|
|
* @return DFAState The computed target DFA state for the given input symbol |
|
292
|
|
|
* `t`. If `t` does not lead to a valid DFA state, this |
|
293
|
|
|
* method returns {@see LexerATNSimulator::ERROR}. |
|
294
|
|
|
*/ |
|
295
|
7 |
|
protected function computeTargetState(CharStream $input, DFAState $s, int $t) : DFAState |
|
296
|
|
|
{ |
|
297
|
7 |
|
$reach = new OrderedATNConfigSet(); |
|
298
|
|
|
|
|
299
|
|
|
// if we don't find an existing DFA state |
|
300
|
|
|
// Fill reach starting from closure, following t transitions |
|
301
|
7 |
|
$this->getReachableConfigSet($input, $s->configs, $reach, $t); |
|
302
|
|
|
|
|
303
|
7 |
|
if (\count($reach->elements()) === 0) { |
|
304
|
|
|
// we got nowhere on t from s |
|
305
|
7 |
|
if (!$reach->hasSemanticContext) { |
|
306
|
|
|
// we got nowhere on t, don't throw out this knowledge; it'd |
|
307
|
|
|
// cause a failover from DFA later. |
|
308
|
7 |
|
$this->addDFAEdge($s, $t, ATNSimulator::error()); |
|
309
|
|
|
} |
|
310
|
|
|
|
|
311
|
|
|
// stop when we can't match any more char |
|
312
|
7 |
|
return ATNSimulator::error(); |
|
313
|
|
|
} |
|
314
|
|
|
|
|
315
|
|
|
// Add an edge from s to target DFA found/created for reach |
|
316
|
4 |
|
return $this->addDFAEdgeATNConfigSet($s, $t, $reach) ?? ATNSimulator::error(); |
|
317
|
|
|
} |
|
318
|
|
|
|
|
319
|
7 |
|
protected function failOrAccept(SimState $prevAccept, CharStream $input, ATNConfigSet $reach, int $t) : int |
|
320
|
|
|
{ |
|
321
|
7 |
|
if ($this->prevAccept->getDfaState() !== null) { |
|
322
|
6 |
|
$dfaState = $prevAccept->getDfaState(); |
|
323
|
|
|
|
|
324
|
6 |
|
if ($dfaState === null) { |
|
325
|
|
|
throw new \RuntimeException('Unexpected null DFA State.'); |
|
326
|
|
|
} |
|
327
|
|
|
|
|
328
|
6 |
|
$lexerActionExecutor = $dfaState->lexerActionExecutor; |
|
329
|
|
|
|
|
330
|
6 |
|
$this->accept( |
|
331
|
6 |
|
$input, |
|
332
|
|
|
$lexerActionExecutor, |
|
333
|
6 |
|
$this->startIndex, |
|
334
|
6 |
|
$prevAccept->getIndex(), |
|
335
|
6 |
|
$prevAccept->getLine(), |
|
336
|
6 |
|
$prevAccept->getCharPos() |
|
337
|
|
|
); |
|
338
|
|
|
|
|
339
|
6 |
|
return $dfaState->prediction; |
|
340
|
|
|
} |
|
341
|
|
|
|
|
342
|
|
|
// if no accept and EOF is first char, return EOF |
|
343
|
1 |
|
if ($t === IntStream::EOF && $input->getIndex() === $this->startIndex) { |
|
344
|
1 |
|
return Token::EOF; |
|
345
|
|
|
} |
|
346
|
|
|
|
|
347
|
|
|
if ($this->recog === null) { |
|
348
|
|
|
throw new \RuntimeException('Unexpected null recognizer.'); |
|
349
|
|
|
} |
|
350
|
|
|
|
|
351
|
|
|
throw new LexerNoViableAltException($this->recog, $input, $this->startIndex, $reach); |
|
352
|
|
|
} |
|
353
|
|
|
|
|
354
|
|
|
/** |
|
355
|
|
|
* Given a starting configuration set, figure out all ATN configurations |
|
356
|
|
|
* we can reach upon input `t`. Parameter `reach` is a return parameter. |
|
357
|
|
|
*/ |
|
358
|
7 |
|
protected function getReachableConfigSet( |
|
359
|
|
|
CharStream $input, |
|
360
|
|
|
ATNConfigSet $closure, |
|
361
|
|
|
ATNConfigSet $reach, |
|
362
|
|
|
int $t |
|
363
|
|
|
) : void { |
|
364
|
|
|
// this is used to skip processing for configs which have a lower priority |
|
365
|
|
|
// than a config that already reached an accept state for the same rule |
|
366
|
7 |
|
$skipAlt = ATN::INVALID_ALT_NUMBER; |
|
367
|
|
|
|
|
368
|
7 |
|
foreach ($closure->elements() as $cfg) { |
|
369
|
7 |
|
if (!$cfg instanceof LexerATNConfig) { |
|
370
|
|
|
throw new \RuntimeException('Unexpected config type.'); |
|
371
|
|
|
} |
|
372
|
|
|
|
|
373
|
7 |
|
$currentAltReachedAcceptState = ($cfg->alt === $skipAlt); |
|
374
|
|
|
|
|
375
|
7 |
|
if ($currentAltReachedAcceptState && $cfg->isPassedThroughNonGreedyDecision()) { |
|
376
|
|
|
continue; |
|
377
|
|
|
} |
|
378
|
|
|
|
|
379
|
7 |
|
if ($this->debug) { |
|
380
|
|
|
$this->log[] = \sprintf( |
|
381
|
|
|
"testing %s at %s\n", |
|
382
|
|
|
$this->getTokenName($t), |
|
383
|
|
|
$cfg->toString(true) |
|
384
|
|
|
); |
|
385
|
|
|
} |
|
386
|
|
|
|
|
387
|
7 |
|
foreach ($cfg->state->getTransitions() as $trans) { |
|
388
|
6 |
|
$target = $this->getReachableTarget($trans, $t); |
|
389
|
|
|
|
|
390
|
6 |
|
if ($target !== null) { |
|
391
|
4 |
|
$lexerExecutor = $cfg->getLexerActionExecutor(); |
|
392
|
|
|
|
|
393
|
4 |
|
if ($lexerExecutor !== null) { |
|
394
|
|
|
$lexerExecutor = $lexerExecutor->fixOffsetBeforeMatch($input->getIndex() - $this->startIndex); |
|
395
|
|
|
} |
|
396
|
|
|
|
|
397
|
4 |
|
$treatEofAsEpsilon = ($t === Token::EOF); |
|
398
|
4 |
|
$config = new LexerATNConfig($cfg, $target, null, $lexerExecutor); |
|
399
|
|
|
|
|
400
|
4 |
|
if ($this->closure( |
|
401
|
4 |
|
$input, |
|
402
|
|
|
$config, |
|
403
|
|
|
$reach, |
|
404
|
|
|
$currentAltReachedAcceptState, |
|
405
|
4 |
|
true, |
|
406
|
|
|
$treatEofAsEpsilon |
|
407
|
|
|
)) { |
|
408
|
|
|
// any remaining configs for this alt have a lower priority |
|
409
|
|
|
// than the one that just reached an accept state. |
|
410
|
4 |
|
$skipAlt = $cfg->alt; |
|
411
|
|
|
} |
|
412
|
|
|
} |
|
413
|
|
|
} |
|
414
|
|
|
} |
|
415
|
7 |
|
} |
|
416
|
|
|
|
|
417
|
6 |
|
protected function accept( |
|
418
|
|
|
CharStream $input, |
|
419
|
|
|
?LexerActionExecutor $lexerActionExecutor, |
|
420
|
|
|
int $startIndex, |
|
421
|
|
|
int $index, |
|
422
|
|
|
int $line, |
|
423
|
|
|
int $charPos |
|
424
|
|
|
) : void { |
|
425
|
6 |
|
if ($this->debug) { |
|
426
|
|
|
$this->log[] = \sprintf('ACTION %s', (string) $lexerActionExecutor) . \PHP_EOL; |
|
427
|
|
|
} |
|
428
|
|
|
|
|
429
|
|
|
// seek to after last char in token |
|
430
|
6 |
|
$input->seek($index); |
|
431
|
6 |
|
$this->line = $line; |
|
432
|
6 |
|
$this->charPositionInLine = $charPos; |
|
433
|
|
|
|
|
434
|
6 |
|
if ($lexerActionExecutor !== null && $this->recog) { |
|
435
|
5 |
|
$lexerActionExecutor->execute($this->recog, $input, $startIndex); |
|
436
|
|
|
} |
|
437
|
6 |
|
} |
|
438
|
|
|
|
|
439
|
6 |
|
protected function getReachableTarget(Transition $trans, int $t) : ?ATNState |
|
440
|
|
|
{ |
|
441
|
6 |
|
if ($trans->matches($t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)) { |
|
442
|
4 |
|
return $trans->target; |
|
443
|
|
|
} |
|
444
|
|
|
|
|
445
|
6 |
|
return null; |
|
446
|
|
|
} |
|
447
|
|
|
|
|
448
|
1 |
|
protected function computeStartState(CharStream $input, ATNState $p) : OrderedATNConfigSet |
|
449
|
|
|
{ |
|
450
|
1 |
|
$initialContext = PredictionContext::empty(); |
|
451
|
1 |
|
$configs = new OrderedATNConfigSet(); |
|
452
|
|
|
|
|
453
|
1 |
|
foreach ($p->getTransitions() as $i => $t) { |
|
454
|
1 |
|
$target = $t->target; |
|
455
|
1 |
|
$cfg = new LexerATNConfig(null, $target, $initialContext, null, $i + 1); |
|
456
|
1 |
|
$this->closure($input, $cfg, $configs, false, false, false); |
|
457
|
|
|
} |
|
458
|
|
|
|
|
459
|
1 |
|
return $configs; |
|
460
|
|
|
} |
|
461
|
|
|
|
|
462
|
|
|
/** |
|
463
|
|
|
* Since the alternatives within any lexer decision are ordered by |
|
464
|
|
|
* preference, this method stops pursuing the closure as soon as an accept |
|
465
|
|
|
* state is reached. After the first accept state is reached by depth-first |
|
466
|
|
|
* search from `config`, all other (potentially reachable) states for |
|
467
|
|
|
* this rule would have a lower priority. |
|
468
|
|
|
* |
|
469
|
|
|
* @return bool `true` if an accept state is reached, otherwise `false`. |
|
470
|
|
|
*/ |
|
471
|
4 |
|
protected function closure( |
|
472
|
|
|
CharStream $input, |
|
473
|
|
|
LexerATNConfig $config, |
|
474
|
|
|
ATNConfigSet $configs, |
|
475
|
|
|
bool $currentAltReachedAcceptState, |
|
476
|
|
|
bool $speculative, |
|
477
|
|
|
bool $treatEofAsEpsilon |
|
478
|
|
|
) : bool { |
|
479
|
4 |
|
$cfg = null; |
|
480
|
|
|
|
|
481
|
4 |
|
if ($this->debug) { |
|
482
|
|
|
$this->log[] = \sprintf('closure(%s)', $config->toString(true)); |
|
483
|
|
|
} |
|
484
|
|
|
|
|
485
|
4 |
|
if ($config->state instanceof States\RuleStopState) { |
|
486
|
4 |
|
if ($this->debug) { |
|
487
|
|
|
if ($this->recog) { |
|
488
|
|
|
$this->log[] = \sprintf( |
|
489
|
|
|
"closure at %s rule stop %s\n", |
|
490
|
|
|
$this->recog->getRuleNames()[$config->state->ruleIndex], |
|
491
|
|
|
$config |
|
492
|
|
|
); |
|
493
|
|
|
} else { |
|
494
|
|
|
$this->log[] = \sprintf("closure at rule stop %s\n", $config); |
|
495
|
|
|
} |
|
496
|
|
|
} |
|
497
|
|
|
|
|
498
|
4 |
|
if ($config->context === null || $config->context->hasEmptyPath()) { |
|
499
|
4 |
|
if ($config->context === null || $config->context->isEmpty()) { |
|
500
|
4 |
|
$configs->add($config); |
|
501
|
|
|
|
|
502
|
4 |
|
return true; |
|
503
|
|
|
} |
|
504
|
|
|
|
|
505
|
|
|
$configs->add(new LexerATNConfig($config, $config->state, PredictionContext::empty())); |
|
506
|
|
|
$currentAltReachedAcceptState = true; |
|
507
|
|
|
} |
|
508
|
|
|
|
|
509
|
|
|
if ($config->context !== null && !$config->context->isEmpty()) { |
|
510
|
|
|
for ($i = 0; $i < $config->context->getLength(); $i++) { |
|
511
|
|
|
if ($config->context->getReturnState($i) !== PredictionContext::EMPTY_RETURN_STATE) { |
|
512
|
|
|
$newContext = $config->context->getParent($i);// "pop" return state |
|
513
|
|
|
$returnState = $this->atn->states[$config->context->getReturnState($i)]; |
|
514
|
|
|
$cfg = new LexerATNConfig($config, $returnState, $newContext); |
|
515
|
|
|
$currentAltReachedAcceptState = $this->closure( |
|
516
|
|
|
$input, |
|
517
|
|
|
$cfg, |
|
518
|
|
|
$configs, |
|
519
|
|
|
$currentAltReachedAcceptState, |
|
520
|
|
|
$speculative, |
|
521
|
|
|
$treatEofAsEpsilon |
|
522
|
|
|
); |
|
523
|
|
|
} |
|
524
|
|
|
} |
|
525
|
|
|
} |
|
526
|
|
|
|
|
527
|
|
|
return $currentAltReachedAcceptState; |
|
528
|
|
|
} |
|
529
|
|
|
|
|
530
|
|
|
// optimization |
|
531
|
4 |
|
if (!$config->state->epsilonOnlyTransitions) { |
|
532
|
3 |
|
if (!$currentAltReachedAcceptState || !$config->isPassedThroughNonGreedyDecision()) { |
|
533
|
3 |
|
$configs->add($config); |
|
534
|
|
|
} |
|
535
|
|
|
} |
|
536
|
|
|
|
|
537
|
4 |
|
foreach ($config->state->getTransitions() as $trans) { |
|
538
|
4 |
|
$cfg = $this->getEpsilonTarget($input, $config, $trans, $configs, $speculative, $treatEofAsEpsilon); |
|
539
|
|
|
|
|
540
|
4 |
|
if ($cfg !== null) { |
|
541
|
4 |
|
$currentAltReachedAcceptState = $this->closure( |
|
542
|
4 |
|
$input, |
|
543
|
|
|
$cfg, |
|
544
|
|
|
$configs, |
|
545
|
|
|
$currentAltReachedAcceptState, |
|
546
|
|
|
$speculative, |
|
547
|
|
|
$treatEofAsEpsilon |
|
548
|
|
|
); |
|
549
|
|
|
} |
|
550
|
|
|
} |
|
551
|
|
|
|
|
552
|
4 |
|
return $currentAltReachedAcceptState; |
|
553
|
|
|
} |
|
554
|
|
|
|
|
555
|
|
|
/** |
|
556
|
|
|
* side-effect: can alter configs.hasSemanticContext |
|
557
|
|
|
*/ |
|
558
|
4 |
|
protected function getEpsilonTarget( |
|
559
|
|
|
CharStream $input, |
|
560
|
|
|
LexerATNConfig $config, |
|
561
|
|
|
Transition $t, |
|
562
|
|
|
ATNConfigSet $configs, |
|
563
|
|
|
bool $speculative, |
|
564
|
|
|
bool $treatEofAsEpsilon |
|
565
|
|
|
) : ?LexerATNConfig { |
|
566
|
4 |
|
$cfg = null; |
|
567
|
|
|
|
|
568
|
4 |
|
switch ($t->getSerializationType()) { |
|
569
|
|
|
case Transition::RULE: |
|
570
|
|
|
if (!$t instanceof RuleTransition) { |
|
571
|
|
|
throw new \RuntimeException('Unexpected transition type.'); |
|
572
|
|
|
} |
|
573
|
|
|
|
|
574
|
|
|
$newContext = SingletonPredictionContext::create($config->context, $t->followState->stateNumber); |
|
575
|
|
|
$cfg = new LexerATNConfig($config, $t->target, $newContext); |
|
576
|
|
|
|
|
577
|
|
|
break; |
|
578
|
|
|
|
|
579
|
|
|
case Transition::PRECEDENCE: |
|
580
|
|
|
throw new \RuntimeException('Precedence predicates are not supported in lexers.'); |
|
581
|
|
|
|
|
582
|
|
|
case Transition::PREDICATE: |
|
583
|
|
|
// Track traversing semantic predicates. If we traverse, |
|
584
|
|
|
// we cannot add a DFA state for this "reach" computation |
|
585
|
|
|
// because the DFA would not test the predicate again in the |
|
586
|
|
|
// future. Rather than creating collections of semantic predicates |
|
587
|
|
|
// like v3 and testing them on prediction, v4 will test them on the |
|
588
|
|
|
// fly all the time using the ATN not the DFA. This is slower but |
|
589
|
|
|
// semantically it's not used that often. One of the key elements to |
|
590
|
|
|
// this predicate mechanism is not adding DFA states that see |
|
591
|
|
|
// predicates immediately afterwards in the ATN. For example, |
|
592
|
|
|
|
|
593
|
|
|
// a : ID {p1}? | ID {p2}? ; |
|
594
|
|
|
|
|
595
|
|
|
// should create the start state for rule 'a' (to save start state |
|
596
|
|
|
// competition), but should not create target of ID state. The |
|
597
|
|
|
// collection of ATN states the following ID references includes |
|
598
|
|
|
// states reached by traversing predicates. Since this is when we |
|
599
|
|
|
// test them, we cannot cash the DFA state target of ID. |
|
600
|
|
|
|
|
601
|
|
|
if (!$t instanceof PredicateTransition) { |
|
602
|
|
|
throw new \RuntimeException('Unexpected transition type.'); |
|
603
|
|
|
} |
|
604
|
|
|
|
|
605
|
|
|
if ($this->debug) { |
|
606
|
|
|
$this->log[] = \sprintf('EVAL rule %d:%d', $t->ruleIndex, $t->predIndex); |
|
607
|
|
|
} |
|
608
|
|
|
|
|
609
|
|
|
$configs->hasSemanticContext = true; |
|
610
|
|
|
|
|
611
|
|
|
if ($this->evaluatePredicate($input, $t->ruleIndex, $t->predIndex, $speculative)) { |
|
612
|
|
|
$cfg = new LexerATNConfig($config, $t->target); |
|
613
|
|
|
} |
|
614
|
|
|
|
|
615
|
|
|
break; |
|
616
|
|
|
|
|
617
|
|
|
case Transition::ACTION: |
|
618
|
2 |
|
if ($config->context === null || $config->context->hasEmptyPath()) { |
|
619
|
|
|
// execute actions anywhere in the start rule for a token. |
|
620
|
|
|
|
|
621
|
|
|
// TODO: if the entry rule is invoked recursively, some |
|
622
|
|
|
// actions may be executed during the recursive call. The |
|
623
|
|
|
// problem can appear when hasEmptyPath() is true but |
|
624
|
|
|
// isEmpty() is false. In this case, the config needs to be |
|
625
|
|
|
// split into two contexts - one with just the empty path |
|
626
|
|
|
// and another with everything but the empty path. |
|
627
|
|
|
// Unfortunately, the current algorithm does not allow |
|
628
|
|
|
// getEpsilonTarget to return two configurations, so |
|
629
|
|
|
// additional modifications are needed before we can support |
|
630
|
|
|
// the split operation. |
|
631
|
|
|
|
|
632
|
2 |
|
if (!$t instanceof ActionTransition) { |
|
633
|
|
|
throw new \RuntimeException('Unexpected transition type.'); |
|
634
|
|
|
} |
|
635
|
|
|
|
|
636
|
2 |
|
$lexerAction = $this->atn->lexerActions[$t->actionIndex]; |
|
637
|
|
|
|
|
638
|
2 |
|
$lexerActionExecutor = LexerActionExecutor::append($config->getLexerActionExecutor(), $lexerAction); |
|
639
|
|
|
|
|
640
|
2 |
|
$cfg = new LexerATNConfig($config, $t->target, null, $lexerActionExecutor); |
|
641
|
|
|
} else { |
|
642
|
|
|
// ignore actions in referenced rules |
|
643
|
|
|
$cfg = new LexerATNConfig($config, $t->target); |
|
644
|
|
|
} |
|
645
|
|
|
|
|
646
|
2 |
|
break; |
|
647
|
|
|
|
|
648
|
|
|
case Transition::EPSILON: |
|
649
|
4 |
|
$cfg = new LexerATNConfig($config, $t->target); |
|
650
|
|
|
|
|
651
|
4 |
|
break; |
|
652
|
|
|
|
|
653
|
|
|
case Transition::ATOM: |
|
654
|
|
|
case Transition::RANGE: |
|
655
|
|
|
case Transition::SET: |
|
656
|
3 |
|
if ($treatEofAsEpsilon) { |
|
657
|
|
|
if ($t->matches(Token::EOF, 0, Lexer::MAX_CHAR_VALUE)) { |
|
658
|
|
|
$cfg = new LexerATNConfig($config, $t->target); |
|
659
|
|
|
} |
|
660
|
|
|
} |
|
661
|
|
|
|
|
662
|
3 |
|
break; |
|
663
|
|
|
|
|
664
|
|
|
default: |
|
665
|
|
|
$cfg = null; |
|
666
|
|
|
} |
|
667
|
|
|
|
|
668
|
4 |
|
return $cfg; |
|
669
|
|
|
} |
|
670
|
|
|
|
|
671
|
|
|
/** |
|
672
|
|
|
* Evaluate a predicate specified in the lexer. |
|
673
|
|
|
* |
|
674
|
|
|
* If `speculative` is `true`, this method was called before |
|
675
|
|
|
* {@see LexerATNSimulator::consume()} for the matched character. This |
|
676
|
|
|
* method should call {@see LexerATNSimulator::consume()} before evaluating |
|
677
|
|
|
* the predicate to ensure position sensitive values, including |
|
678
|
|
|
* {@see Lexer::getText()}, {@see Lexer::getLine()}, and |
|
679
|
|
|
* {@see Lexer::getCharPositionInLine()}, properly reflect the current |
|
680
|
|
|
* lexer state. This method should restore `input` and the simulator |
|
681
|
|
|
* to the original state before returning (i.e. undo the actions made by |
|
682
|
|
|
* the call to {@see LexerATNSimulator::consume()}. |
|
683
|
|
|
* |
|
684
|
|
|
* @param CharStream $input The input stream. |
|
685
|
|
|
* @param int $ruleIndex The rule containing the predicate. |
|
686
|
|
|
* @param int $predIndex The index of the predicate within the rule. |
|
687
|
|
|
* @param bool $speculative `true` if the current index in `input` is |
|
688
|
|
|
* one character before the predicate's location. |
|
689
|
|
|
* |
|
690
|
|
|
* @return bool `true` If the specified predicate evaluates to `true`. |
|
691
|
|
|
*/ |
|
692
|
|
|
protected function evaluatePredicate(CharStream $input, int $ruleIndex, int $predIndex, bool $speculative) : bool |
|
693
|
|
|
{ |
|
694
|
|
|
if ($this->recog === null) { |
|
695
|
|
|
return true; |
|
696
|
|
|
} |
|
697
|
|
|
|
|
698
|
|
|
if (!$speculative) { |
|
699
|
|
|
return $this->recog->sempred(null, $ruleIndex, $predIndex); |
|
700
|
|
|
} |
|
701
|
|
|
|
|
702
|
|
|
$savedcolumn = $this->charPositionInLine; |
|
703
|
|
|
$savedLine = $this->line; |
|
704
|
|
|
$index = $input->getIndex(); |
|
705
|
|
|
$marker = $input->mark(); |
|
706
|
|
|
|
|
707
|
|
|
try { |
|
708
|
|
|
$this->consume($input); |
|
709
|
|
|
|
|
710
|
|
|
return $this->recog->sempred(null, $ruleIndex, $predIndex); |
|
711
|
|
|
} finally { |
|
712
|
|
|
$this->charPositionInLine = $savedcolumn; |
|
713
|
|
|
$this->line = $savedLine; |
|
714
|
|
|
$input->seek($index); |
|
715
|
|
|
$input->release($marker); |
|
716
|
|
|
} |
|
717
|
|
|
} |
|
718
|
|
|
|
|
719
|
6 |
|
protected function captureSimState(SimState $settings, CharStream $input, DFAState $dfaState) : void |
|
720
|
|
|
{ |
|
721
|
6 |
|
$settings->setIndex($input->getIndex()); |
|
722
|
6 |
|
$settings->setLine($this->line); |
|
723
|
6 |
|
$settings->setCharPos($this->charPositionInLine); |
|
724
|
6 |
|
$settings->setDfaState($dfaState); |
|
725
|
6 |
|
} |
|
726
|
|
|
|
|
727
|
4 |
|
protected function addDFAEdgeATNConfigSet(DFAState $from, int $t, ATNConfigSet $configs) : DFAState |
|
728
|
|
|
{ |
|
729
|
|
|
/* leading to this call, ATNConfigSet.hasSemanticContext is used as a |
|
730
|
|
|
* marker indicating dynamic predicate evaluation makes this edge |
|
731
|
|
|
* dependent on the specific input sequence, so the static edge in the |
|
732
|
|
|
* DFA should be omitted. The target DFAState is still created since |
|
733
|
|
|
* execATN has the ability to resynchronize with the DFA state cache |
|
734
|
|
|
* following the predicate evaluation step. |
|
735
|
|
|
* |
|
736
|
|
|
* TJP notes: next time through the DFA, we see a pred again and eval. |
|
737
|
|
|
* If that gets us to a previously created (but dangling) DFA |
|
738
|
|
|
* state, we can continue in pure DFA mode from there. |
|
739
|
|
|
*/ |
|
740
|
4 |
|
$suppressEdge = $configs->hasSemanticContext; |
|
741
|
4 |
|
$configs->hasSemanticContext = false; |
|
742
|
|
|
|
|
743
|
4 |
|
$to = $this->addDFAState($configs); |
|
744
|
|
|
|
|
745
|
4 |
|
if ($suppressEdge) { |
|
746
|
|
|
return $to; |
|
747
|
|
|
} |
|
748
|
|
|
|
|
749
|
4 |
|
$this->addDFAEdge($from, $t, $to); |
|
750
|
|
|
|
|
751
|
4 |
|
return $to; |
|
752
|
|
|
} |
|
753
|
|
|
|
|
754
|
7 |
|
protected function addDFAEdge(DFAState $from, int $t, ?DFAState $to) : void |
|
755
|
|
|
{ |
|
756
|
|
|
// add the edge |
|
757
|
7 |
|
if ($t < self::MIN_DFA_EDGE || $t > self::MAX_DFA_EDGE) { |
|
758
|
|
|
// Only track edges within the DFA bounds |
|
759
|
7 |
|
return; |
|
760
|
|
|
} |
|
761
|
|
|
|
|
762
|
5 |
|
if ($this->debug) { |
|
763
|
|
|
$this->log[] = \sprintf('EDGE %s->%s upon %d', $from, $to, $t); |
|
764
|
|
|
} |
|
765
|
|
|
|
|
766
|
5 |
|
if ($from->edges === null) { |
|
767
|
|
|
// make room for tokens 1..n and -1 masquerading as index 0 |
|
768
|
4 |
|
$from->edges = new \SplFixedArray(self::MAX_DFA_EDGE - self::MIN_DFA_EDGE + 1); |
|
769
|
|
|
} |
|
770
|
|
|
|
|
771
|
5 |
|
$from->edges[$t - self::MIN_DFA_EDGE] = $to; // connect |
|
772
|
5 |
|
} |
|
773
|
|
|
|
|
774
|
|
|
/** |
|
775
|
|
|
* Add a new DFA state if there isn't one with this set of configurations |
|
776
|
|
|
* already. This method also detects the first configuration containing |
|
777
|
|
|
* an ATN rule stop state. Later, when traversing the DFA, we will know |
|
778
|
|
|
* which rule to accept. |
|
779
|
|
|
*/ |
|
780
|
4 |
|
protected function addDFAState(ATNConfigSet $configs) : DFAState |
|
781
|
|
|
{ |
|
782
|
4 |
|
if ($configs->hasSemanticContext) { |
|
783
|
|
|
throw new \RuntimeException('ATN Config Set cannot have semantic context.'); |
|
784
|
|
|
} |
|
785
|
|
|
|
|
786
|
4 |
|
$proposed = new DFAState($configs); |
|
787
|
|
|
|
|
788
|
4 |
|
$firstConfigWithRuleStopState = null; |
|
789
|
|
|
|
|
790
|
4 |
|
foreach ($configs->elements() as $config) { |
|
791
|
4 |
|
if ($config->state instanceof RuleStopState) { |
|
792
|
4 |
|
$firstConfigWithRuleStopState = $config; |
|
793
|
|
|
|
|
794
|
4 |
|
break; |
|
795
|
|
|
} |
|
796
|
|
|
} |
|
797
|
|
|
|
|
798
|
4 |
|
if ($firstConfigWithRuleStopState !== null) { |
|
799
|
4 |
|
if (!$firstConfigWithRuleStopState instanceof LexerATNConfig) { |
|
800
|
|
|
throw new \RuntimeException('Unexpected ATN config type.'); |
|
801
|
|
|
} |
|
802
|
|
|
|
|
803
|
4 |
|
$proposed->isAcceptState = true; |
|
804
|
4 |
|
$proposed->lexerActionExecutor = $firstConfigWithRuleStopState->getLexerActionExecutor(); |
|
805
|
|
|
|
|
806
|
4 |
|
$prediction = $this->atn->ruleToTokenType[$firstConfigWithRuleStopState->state->ruleIndex]; |
|
807
|
|
|
|
|
808
|
4 |
|
$proposed->prediction = $prediction ?? 0; |
|
809
|
|
|
} |
|
810
|
|
|
|
|
811
|
4 |
|
$dfa = $this->decisionToDFA[$this->mode]; |
|
812
|
|
|
|
|
813
|
4 |
|
$existing = $dfa->states->get($proposed); |
|
814
|
|
|
|
|
815
|
4 |
|
if ($existing !== null && $existing instanceof DFAState) { |
|
816
|
1 |
|
return $existing; |
|
817
|
|
|
} |
|
818
|
|
|
|
|
819
|
4 |
|
$newState = $proposed; |
|
820
|
4 |
|
$newState->stateNumber = $dfa->states->count(); |
|
821
|
4 |
|
$configs->setReadonly(true); |
|
822
|
4 |
|
$newState->configs = $configs; |
|
823
|
4 |
|
$dfa->states->add($newState); |
|
824
|
|
|
|
|
825
|
4 |
|
return $newState; |
|
826
|
|
|
} |
|
827
|
|
|
|
|
828
|
|
|
public function getDFA(int $mode) : DFA |
|
829
|
|
|
{ |
|
830
|
|
|
return $this->decisionToDFA[$mode]; |
|
831
|
|
|
} |
|
832
|
|
|
|
|
833
|
|
|
/** |
|
834
|
|
|
* Get the text matched so far for the current token. |
|
835
|
|
|
*/ |
|
836
|
|
|
public function getText(CharStream $input) : string |
|
837
|
|
|
{ |
|
838
|
|
|
// index is first lookahead char, don't include. |
|
839
|
|
|
return $input->getText($this->startIndex, $input->getIndex() - 1); |
|
840
|
|
|
} |
|
841
|
|
|
|
|
842
|
6 |
|
public function consume(CharStream $input) : void |
|
843
|
|
|
{ |
|
844
|
6 |
|
$curChar = $input->LA(1); |
|
845
|
|
|
|
|
846
|
6 |
|
if ($curChar === self::NEW_LINE_CODE) { |
|
847
|
2 |
|
$this->line++; |
|
848
|
2 |
|
$this->charPositionInLine = 0; |
|
849
|
|
|
} else { |
|
850
|
6 |
|
$this->charPositionInLine++; |
|
851
|
|
|
} |
|
852
|
|
|
|
|
853
|
6 |
|
$input->consume(); |
|
854
|
6 |
|
} |
|
855
|
|
|
|
|
856
|
|
|
public function getTokenName(int $t) : ?string |
|
857
|
|
|
{ |
|
858
|
|
|
if ($t === -1) { |
|
859
|
|
|
return 'EOF'; |
|
860
|
|
|
} |
|
861
|
|
|
|
|
862
|
|
|
return \sprintf('\'%s\'', StringUtils::char($t)); |
|
863
|
|
|
} |
|
864
|
|
|
} |
|
865
|
|
|
|