1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace Antlr\Antlr4\Runtime\Atn; |
6
|
|
|
|
7
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerAction; |
8
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerActionType; |
9
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerChannelAction; |
10
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerCustomAction; |
11
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerModeAction; |
12
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerMoreAction; |
13
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerPopModeAction; |
14
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerPushModeAction; |
15
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerSkipAction; |
16
|
|
|
use Antlr\Antlr4\Runtime\Atn\Actions\LexerTypeAction; |
17
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\ATNState; |
18
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\BasicBlockStartState; |
19
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\BasicState; |
20
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\BlockEndState; |
21
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\BlockStartState; |
22
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\DecisionState; |
23
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\LoopEndState; |
24
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\PlusBlockStartState; |
25
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\PlusLoopbackState; |
26
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\RuleStartState; |
27
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\RuleStopState; |
28
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\StarBlockStartState; |
29
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\StarLoopbackState; |
30
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\StarLoopEntryState; |
31
|
|
|
use Antlr\Antlr4\Runtime\Atn\States\TokensStartState; |
32
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\ActionTransition; |
33
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\AtomTransition; |
34
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\EpsilonTransition; |
35
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\NotSetTransition; |
36
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\PrecedencePredicateTransition; |
37
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\PredicateTransition; |
38
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\RangeTransition; |
39
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\RuleTransition; |
40
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\SetTransition; |
41
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\Transition; |
42
|
|
|
use Antlr\Antlr4\Runtime\Atn\Transitions\WildcardTransition; |
43
|
|
|
use Antlr\Antlr4\Runtime\IntervalSet; |
44
|
|
|
use Antlr\Antlr4\Runtime\Token; |
45
|
|
|
use Antlr\Antlr4\Runtime\Utils\StringUtils; |
46
|
|
|
|
47
|
|
|
final class ATNDeserializer |
48
|
|
|
{ |
49
|
|
|
/** |
50
|
|
|
* This value should never change. Updates following this version are |
51
|
|
|
* reflected as change in the unique ID SERIALIZED_UUID. |
52
|
|
|
*/ |
53
|
|
|
public const SERIALIZED_VERSION = 3; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* This is the earliest supported serialized UUID. |
57
|
|
|
* Stick to serialized version for now, we don't need a UUID instance. |
58
|
|
|
*/ |
59
|
|
|
private const BASE_SERIALIZED_UUID = 'AADB8D7E-AEEF-4415-AD2B-8204D6CF042E'; |
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* This UUID indicates the serialized ATN contains two sets of IntervalSets, |
63
|
|
|
* where the second set's values are encoded as 32-bit integers to support |
64
|
|
|
* the full Unicode SMP range up to U+10FFFF. |
65
|
|
|
*/ |
66
|
|
|
private const ADDED_UNICODE_SMP = '59627784-3BE5-417A-B9EB-8131A7286089'; |
67
|
|
|
|
68
|
|
|
/** |
69
|
|
|
* This list contains all of the currently supported UUIDs, ordered by when |
70
|
|
|
* the feature first appeared in this branch. |
71
|
|
|
*/ |
72
|
|
|
private const SUPPORTED_UUIDS = [ |
73
|
|
|
self::BASE_SERIALIZED_UUID, |
74
|
|
|
self::ADDED_UNICODE_SMP, |
75
|
|
|
]; |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* This is the current serialized UUID. |
79
|
|
|
*/ |
80
|
|
|
private const SERIALIZED_UUID = self::ADDED_UNICODE_SMP; |
81
|
|
|
|
82
|
|
|
/** @var ATNDeserializationOptions */ |
83
|
|
|
private $deserializationOptions; |
84
|
|
|
|
85
|
|
|
/** @var array<int> */ |
86
|
|
|
private $data = []; |
87
|
|
|
|
88
|
|
|
/** @var int */ |
89
|
|
|
private $pos = 0; |
90
|
|
|
|
91
|
|
|
/** @var string */ |
92
|
|
|
private $uuid = ''; |
93
|
|
|
|
94
|
|
|
/** @var array<int, callable|null>|null */ |
95
|
|
|
private $stateFactories; |
|
|
|
|
96
|
|
|
|
97
|
|
|
/** @var array<int, callable|null>|null */ |
98
|
|
|
private $actionFactories; |
|
|
|
|
99
|
|
|
|
100
|
1 |
|
public function __construct(?ATNDeserializationOptions $options = null) |
101
|
|
|
{ |
102
|
1 |
|
$this->deserializationOptions = $options ?? ATNDeserializationOptions::defaultOptions(); |
103
|
1 |
|
} |
104
|
|
|
|
105
|
|
|
/** |
106
|
|
|
* Determines if a particular serialized representation of an ATN supports |
107
|
|
|
* a particular feature, identified by the {@see UUID} used for serializing |
108
|
|
|
* the ATN at the time the feature was first introduced. |
109
|
|
|
* |
110
|
|
|
* @param string $feature The {@see UUID} marking the first time the |
111
|
|
|
* feature was supported in the serialized ATN. |
112
|
|
|
* @param string $actualUuid The {@see UUID} of the actual serialized ATN |
113
|
|
|
* which is currently being deserialized. |
114
|
|
|
* |
115
|
|
|
* @return bool `true` if the `actualUuid` value represents a serialized |
116
|
|
|
* ATN at or after the feature identified by `feature` was |
117
|
|
|
* introduced; otherwise, `false`. |
118
|
|
|
*/ |
119
|
1 |
|
protected function isFeatureSupported(string $feature, string $actualUuid) : bool |
120
|
|
|
{ |
121
|
1 |
|
$featureIndex = \array_search($feature, self::SUPPORTED_UUIDS, true); |
122
|
|
|
|
123
|
1 |
|
if ($featureIndex === false) { |
124
|
|
|
return false; |
125
|
|
|
} |
126
|
|
|
|
127
|
1 |
|
$actualUuidIndex = \array_search($actualUuid, self::SUPPORTED_UUIDS, true); |
128
|
|
|
|
129
|
1 |
|
return $actualUuidIndex >= $featureIndex; |
130
|
|
|
} |
131
|
|
|
|
132
|
1 |
|
public function deserialize(string $data) : ATN |
133
|
|
|
{ |
134
|
1 |
|
$this->reset($data); |
135
|
1 |
|
$this->checkVersion(); |
136
|
1 |
|
$this->checkUUID(); |
137
|
1 |
|
$atn = $this->readATN(); |
138
|
1 |
|
$this->readStates($atn); |
139
|
1 |
|
$this->readRules($atn); |
140
|
1 |
|
$this->readModes($atn); |
141
|
1 |
|
$sets = []; |
142
|
|
|
|
143
|
|
|
// First, deserialize sets with 16-bit arguments <= U+FFFF. |
144
|
1 |
|
$this->readSets($sets, function () { |
145
|
1 |
|
return $this->readInt(); |
146
|
1 |
|
}); |
147
|
|
|
|
148
|
|
|
// Next, if the ATN was serialized with the Unicode SMP feature, |
149
|
|
|
// deserialize sets with 32-bit arguments <= U+10FFFF. |
150
|
|
|
|
151
|
1 |
|
if ($this->isFeatureSupported(self::ADDED_UNICODE_SMP, $this->uuid)) { |
152
|
1 |
|
$this->readSets($sets, function () { |
153
|
|
|
return $this->readInt32(); |
154
|
1 |
|
}); |
155
|
|
|
} |
156
|
|
|
|
157
|
1 |
|
$this->readEdges($atn, $sets); |
158
|
1 |
|
$this->readDecisions($atn); |
159
|
1 |
|
$this->readLexerActions($atn); |
160
|
1 |
|
$this->markPrecedenceDecisions($atn); |
161
|
1 |
|
$this->verifyATN($atn); |
162
|
|
|
|
163
|
1 |
|
if ($atn->grammarType === ATN::ATN_TYPE_PARSER |
164
|
1 |
|
&& $this->deserializationOptions->isGenerateRuleBypassTransitions()) { |
165
|
|
|
$this->generateRuleBypassTransitions($atn); |
166
|
|
|
// re-verify after modification |
167
|
|
|
$this->verifyATN($atn); |
168
|
|
|
} |
169
|
|
|
|
170
|
1 |
|
return $atn; |
171
|
|
|
} |
172
|
|
|
|
173
|
1 |
|
private function reset(string $data) : void |
174
|
|
|
{ |
175
|
1 |
|
$characters = \preg_split('//u', $data, -1, \PREG_SPLIT_NO_EMPTY); |
176
|
|
|
|
177
|
1 |
|
if ($characters === false) { |
178
|
|
|
return; |
179
|
|
|
} |
180
|
|
|
|
181
|
1 |
|
$this->data = [StringUtils::codePoint($characters[0])]; |
182
|
1 |
|
for ($i = 1, $length = \count($characters); $i < $length; $i++) { |
183
|
1 |
|
$code = StringUtils::codePoint($characters[$i]); |
184
|
1 |
|
$this->data[] = $code > 1 ? $code - 2 : $code + 65533; |
185
|
|
|
} |
186
|
|
|
|
187
|
1 |
|
$this->pos = 0; |
188
|
1 |
|
} |
189
|
|
|
|
190
|
|
|
|
191
|
1 |
|
private function checkVersion() : void |
192
|
|
|
{ |
193
|
1 |
|
$version = $this->readInt(); |
194
|
|
|
|
195
|
1 |
|
if ($version !== self::SERIALIZED_VERSION) { |
196
|
|
|
throw new \InvalidArgumentException(\sprintf( |
197
|
|
|
'Could not deserialize ATN with version %d (expected %d).', |
198
|
|
|
$version, |
199
|
|
|
self::SERIALIZED_VERSION |
200
|
|
|
)); |
201
|
|
|
} |
202
|
1 |
|
} |
203
|
|
|
|
204
|
1 |
|
private function checkUUID() : void |
205
|
|
|
{ |
206
|
1 |
|
$uuid = $this->readUUID(); |
207
|
|
|
|
208
|
1 |
|
if (!\in_array($uuid, self::SUPPORTED_UUIDS, true)) { |
209
|
|
|
throw new \InvalidArgumentException(\sprintf( |
210
|
|
|
'Could not deserialize ATN with UUID: %s (expected %s or a legacy UUID).', |
211
|
|
|
$uuid, |
212
|
|
|
self::SERIALIZED_UUID |
213
|
|
|
)); |
214
|
|
|
} |
215
|
|
|
|
216
|
1 |
|
$this->uuid = $uuid; |
217
|
1 |
|
} |
218
|
|
|
|
219
|
1 |
|
private function readATN() : ATN |
220
|
|
|
{ |
221
|
1 |
|
$grammarType = $this->readInt(); |
222
|
1 |
|
$maxTokenType = $this->readInt(); |
223
|
|
|
|
224
|
1 |
|
return new ATN($grammarType, $maxTokenType); |
225
|
|
|
} |
226
|
|
|
|
227
|
1 |
|
private function readStates(ATN $atn) : void |
228
|
|
|
{ |
229
|
1 |
|
$loopBackStateNumbers = []; |
230
|
1 |
|
$endStateNumbers = []; |
231
|
1 |
|
$nstates = $this->readInt(); |
232
|
|
|
|
233
|
1 |
|
for ($i=0; $i < $nstates; $i++) { |
234
|
1 |
|
$stype = $this->readInt(); |
235
|
|
|
|
236
|
|
|
// ignore bad type of states |
237
|
1 |
|
if ($stype === ATNState::INVALID_TYPE) { |
238
|
|
|
$atn->addState(null); |
239
|
|
|
|
240
|
|
|
continue; |
241
|
|
|
} |
242
|
|
|
|
243
|
1 |
|
$ruleIndex = $this->readInt(); |
244
|
|
|
|
245
|
1 |
|
if ($ruleIndex === 0xFFFF) { |
246
|
|
|
$ruleIndex = -1; |
247
|
|
|
} |
248
|
|
|
|
249
|
1 |
|
$s = $this->stateFactory($stype, $ruleIndex); |
250
|
|
|
|
251
|
1 |
|
if ($stype === ATNState::LOOP_END) { |
252
|
|
|
// special case |
253
|
1 |
|
$loopBackStateNumber = $this->readInt(); |
254
|
|
|
|
255
|
1 |
|
if (!$s instanceof LoopEndState) { |
256
|
|
|
throw new \RuntimeException('Unexpected ATN State'); |
257
|
|
|
} |
258
|
|
|
|
259
|
1 |
|
$loopBackStateNumbers[] = [$s, $loopBackStateNumber]; |
260
|
1 |
|
} elseif ($s instanceof BlockStartState) { |
261
|
1 |
|
$endStateNumber = $this->readInt(); |
262
|
|
|
|
263
|
1 |
|
$endStateNumbers[] = [$s, $endStateNumber]; |
264
|
|
|
} |
265
|
|
|
|
266
|
1 |
|
$atn->addState($s); |
267
|
|
|
} |
268
|
|
|
|
269
|
|
|
// delay the assignment of loop back and end states until we know all the |
270
|
|
|
// state instances have been initialized |
271
|
1 |
|
foreach ($loopBackStateNumbers as $pair) { |
272
|
1 |
|
$pair[0]->loopBackState = $atn->states[$pair[1]]; |
273
|
|
|
} |
274
|
|
|
|
275
|
1 |
|
foreach ($endStateNumbers as $pair) { |
276
|
1 |
|
$endState = $atn->states[$pair[1]]; |
277
|
|
|
|
278
|
1 |
|
if (!$endState instanceof BlockEndState) { |
279
|
|
|
throw new \RuntimeException('Unexpected ATN State'); |
280
|
|
|
} |
281
|
|
|
|
282
|
1 |
|
$pair[0]->endState = $endState; |
283
|
|
|
} |
284
|
|
|
|
285
|
1 |
|
$numNonGreedyStates = $this->readInt(); |
286
|
|
|
|
287
|
1 |
|
for ($j=0; $j < $numNonGreedyStates; $j++) { |
288
|
|
|
$decisionState = $atn->states[$this->readInt()]; |
289
|
|
|
|
290
|
|
|
if (!$decisionState instanceof DecisionState) { |
291
|
|
|
throw new \RuntimeException('Unexpected ATN State'); |
292
|
|
|
} |
293
|
|
|
|
294
|
|
|
$decisionState->nonGreedy = true; |
295
|
|
|
} |
296
|
|
|
|
297
|
1 |
|
$numPrecedenceStates = $this->readInt(); |
298
|
|
|
|
299
|
1 |
|
for ($j=0; $j < $numPrecedenceStates; $j++) { |
300
|
1 |
|
$ruleStartState = $atn->states[$this->readInt()]; |
301
|
|
|
|
302
|
1 |
|
if (!$ruleStartState instanceof RuleStartState) { |
303
|
|
|
throw new \RuntimeException('Unexpected ATN State'); |
304
|
|
|
} |
305
|
|
|
|
306
|
1 |
|
$ruleStartState->isLeftRecursiveRule = true; |
307
|
|
|
} |
308
|
1 |
|
} |
309
|
|
|
|
310
|
1 |
|
private function readRules(ATN $atn) : void |
311
|
|
|
{ |
312
|
1 |
|
$nRules = $this->readInt(); |
313
|
|
|
|
314
|
1 |
|
$atn->ruleToTokenType = []; |
315
|
1 |
|
$atn->ruleToStartState = []; |
316
|
1 |
|
for ($i=0; $i < $nRules; $i++) { |
317
|
1 |
|
$s = $this->readInt(); |
318
|
1 |
|
$startState = $atn->states[$s]; |
319
|
|
|
|
320
|
1 |
|
if (!$startState instanceof RuleStartState) { |
321
|
|
|
throw new \RuntimeException('Unexpected ATN State'); |
322
|
|
|
} |
323
|
|
|
|
324
|
1 |
|
$atn->ruleToStartState[$i] = $startState; |
325
|
|
|
|
326
|
1 |
|
if ($atn->grammarType === ATN::ATN_TYPE_LEXER) { |
327
|
1 |
|
$tokenType = $this->readInt(); |
328
|
|
|
|
329
|
1 |
|
if ($tokenType === 0xFFFF) { |
330
|
|
|
$tokenType = Token::EOF; |
331
|
|
|
} |
332
|
|
|
|
333
|
1 |
|
$atn->ruleToTokenType[$i] = $tokenType; |
334
|
|
|
} |
335
|
|
|
} |
336
|
|
|
|
337
|
1 |
|
$atn->ruleToStopState = []; |
338
|
1 |
|
foreach ($atn->states as $state) { |
339
|
1 |
|
if (!$state instanceof RuleStopState) { |
340
|
1 |
|
continue; |
341
|
|
|
} |
342
|
|
|
|
343
|
1 |
|
$atn->ruleToStopState[$state->ruleIndex] = $state; |
344
|
1 |
|
$atn->ruleToStartState[$state->ruleIndex]->stopState = $state; |
345
|
|
|
} |
346
|
1 |
|
} |
347
|
|
|
|
348
|
1 |
|
private function readModes(ATN $atn) : void |
349
|
|
|
{ |
350
|
1 |
|
$nmodes = $this->readInt(); |
351
|
|
|
|
352
|
1 |
|
for ($i=0; $i < $nmodes; $i++) { |
353
|
1 |
|
$tokensStartState = $atn->states[$this->readInt()]; |
354
|
|
|
|
355
|
1 |
|
if (!$tokensStartState instanceof TokensStartState) { |
356
|
|
|
throw new \RuntimeException('Unexpected ATN State'); |
357
|
|
|
} |
358
|
|
|
|
359
|
1 |
|
$atn->modeToStartState[] = $tokensStartState; |
360
|
|
|
} |
361
|
1 |
|
} |
362
|
|
|
|
363
|
|
|
/** |
364
|
|
|
* @param array<IntervalSet> $sets |
365
|
|
|
*/ |
366
|
1 |
|
private function readSets(array &$sets, callable $readUnicode) : void |
367
|
|
|
{ |
368
|
1 |
|
$m = $this->readInt(); |
369
|
|
|
|
370
|
1 |
|
for ($i=0; $i < $m; $i++) { |
371
|
1 |
|
$iset = new IntervalSet(); |
372
|
|
|
|
373
|
1 |
|
$sets[] = $iset; |
374
|
1 |
|
$n = $this->readInt(); |
375
|
1 |
|
$containsEof = $this->readInt(); |
376
|
|
|
|
377
|
1 |
|
if ($containsEof !== 0) { |
378
|
|
|
$iset->addOne(-1); |
379
|
|
|
} |
380
|
|
|
|
381
|
1 |
|
for ($j=0; $j < $n; $j++) { |
382
|
1 |
|
$i1 = $readUnicode(); |
383
|
1 |
|
$i2 = $readUnicode(); |
384
|
1 |
|
$iset->addRange($i1, $i2); |
385
|
|
|
} |
386
|
|
|
} |
387
|
1 |
|
} |
388
|
|
|
|
389
|
|
|
/** |
390
|
|
|
* @param array<IntervalSet> $sets |
391
|
|
|
*/ |
392
|
1 |
|
private function readEdges(ATN $atn, array &$sets) : void |
393
|
|
|
{ |
394
|
1 |
|
$nEdges = $this->readInt(); |
395
|
|
|
|
396
|
1 |
|
for ($i=0; $i < $nEdges; $i++) { |
397
|
1 |
|
$src = $this->readInt(); |
398
|
1 |
|
$trg = $this->readInt(); |
399
|
1 |
|
$ttype = $this->readInt(); |
400
|
1 |
|
$arg1 = $this->readInt(); |
401
|
1 |
|
$arg2 = $this->readInt(); |
402
|
1 |
|
$arg3 = $this->readInt(); |
403
|
1 |
|
$trans = $this->edgeFactory($atn, $ttype, $src, $trg, $arg1, $arg2, $arg3, $sets); |
404
|
1 |
|
$srcState = $atn->states[$src]; |
405
|
1 |
|
$srcState->addTransition($trans); |
406
|
|
|
} |
407
|
|
|
|
408
|
|
|
// edges for rule stop states can be derived, so they aren't serialized |
409
|
1 |
|
foreach ($atn->states as $state) { |
410
|
1 |
|
foreach ($state->getTransitions() as $t) { |
411
|
1 |
|
if (!$t instanceof RuleTransition) { |
412
|
1 |
|
continue; |
413
|
|
|
} |
414
|
|
|
|
415
|
1 |
|
$outermostPrecedenceReturn = -1; |
416
|
1 |
|
if ($atn->ruleToStartState[$t->target->ruleIndex]->isLeftRecursiveRule) { |
417
|
1 |
|
if ($t->precedence === 0) { |
418
|
1 |
|
$outermostPrecedenceReturn = $t->target->ruleIndex; |
419
|
|
|
} |
420
|
|
|
} |
421
|
|
|
|
422
|
1 |
|
$trans = new EpsilonTransition($t->followState, $outermostPrecedenceReturn); |
423
|
1 |
|
$atn->ruleToStopState[$t->target->ruleIndex]->addTransition($trans); |
424
|
|
|
} |
425
|
|
|
} |
426
|
|
|
|
427
|
1 |
|
foreach ($atn->states as $state) { |
428
|
1 |
|
if ($state instanceof BlockStartState) { |
429
|
|
|
// we need to know the end state to set its start state |
430
|
1 |
|
if ($state->endState === null) { |
431
|
|
|
throw new \RuntimeException('Unexpected null EndState.'); |
432
|
|
|
} |
433
|
|
|
|
434
|
|
|
// block end states can only be associated to a single block start state |
435
|
1 |
|
if ($state->endState->startState !== null) { |
436
|
|
|
throw new \RuntimeException('Unexpected null StartState.'); |
437
|
|
|
} |
438
|
|
|
|
439
|
1 |
|
$state->endState->startState = $state; |
440
|
|
|
} |
441
|
|
|
|
442
|
1 |
|
if ($state instanceof PlusLoopbackState) { |
443
|
1 |
|
foreach ($state->getTransitions() as $t) { |
444
|
1 |
|
$target = $t->target; |
445
|
|
|
|
446
|
1 |
|
if ($target instanceof PlusBlockStartState) { |
447
|
1 |
|
$target->loopBackState = $state; |
448
|
|
|
} |
449
|
|
|
} |
450
|
1 |
|
} elseif ($state instanceof StarLoopbackState) { |
451
|
1 |
|
foreach ($state->getTransitions() as $t) { |
452
|
1 |
|
$target = $t->target; |
453
|
|
|
|
454
|
1 |
|
if ($target instanceof StarLoopEntryState) { |
455
|
1 |
|
$target->loopBackState = $state; |
456
|
|
|
} |
457
|
|
|
} |
458
|
|
|
} |
459
|
|
|
} |
460
|
1 |
|
} |
461
|
|
|
|
462
|
1 |
|
private function readDecisions(ATN $atn) : void |
463
|
|
|
{ |
464
|
1 |
|
$decisions = $this->readInt(); |
465
|
|
|
|
466
|
1 |
|
for ($i = 0; $i < $decisions; $i++) { |
467
|
1 |
|
$s = $this->readInt(); |
468
|
|
|
/** @var DecisionState $decState */ |
469
|
1 |
|
$decState = $atn->states[$s]; |
470
|
|
|
|
471
|
1 |
|
$atn->decisionToState[] = $decState; |
472
|
|
|
|
473
|
1 |
|
$decState->decision = $i; |
474
|
|
|
} |
475
|
1 |
|
} |
476
|
|
|
|
477
|
1 |
|
private function readLexerActions(ATN $atn) : void |
478
|
|
|
{ |
479
|
1 |
|
if ($atn->grammarType === ATN::ATN_TYPE_LEXER) { |
480
|
1 |
|
$count = $this->readInt(); |
481
|
|
|
|
482
|
1 |
|
$atn->lexerActions = []; |
483
|
1 |
|
for ($i = 0; $i < $count; $i++) { |
484
|
1 |
|
$actionType = $this->readInt(); |
485
|
1 |
|
$data1 = $this->readInt(); |
486
|
|
|
|
487
|
1 |
|
if ($data1 === 0xFFFF) { |
488
|
|
|
$data1 = -1; |
489
|
|
|
} |
490
|
|
|
|
491
|
1 |
|
$data2 = $this->readInt(); |
492
|
|
|
|
493
|
1 |
|
if ($data2 === 0xFFFF) { |
494
|
|
|
$data2 = -1; |
495
|
|
|
} |
496
|
|
|
|
497
|
1 |
|
$lexerAction = $this->lexerActionFactory($actionType, $data1, $data2); |
498
|
1 |
|
$atn->lexerActions[$i] = $lexerAction; |
499
|
|
|
} |
500
|
|
|
} |
501
|
1 |
|
} |
502
|
|
|
|
503
|
|
|
private function generateRuleBypassTransitions(ATN $atn) : void |
504
|
|
|
{ |
505
|
|
|
$count = \count($atn->ruleToStartState); |
506
|
|
|
|
507
|
|
|
for ($i = 0; $i < $count; $i++) { |
508
|
|
|
$atn->ruleToTokenType[$i] = $atn->maxTokenType + $i + 1; |
509
|
|
|
} |
510
|
|
|
|
511
|
|
|
for ($i = 0; $i < $count; $i++) { |
512
|
|
|
$this->generateRuleBypassTransition($atn, $i); |
513
|
|
|
} |
514
|
|
|
} |
515
|
|
|
|
516
|
|
|
private function generateRuleBypassTransition(ATN $atn, int $idx) : void |
517
|
|
|
{ |
518
|
|
|
$bypassStart = new BasicBlockStartState(); |
519
|
|
|
$bypassStart->ruleIndex = $idx; |
520
|
|
|
$atn->addState($bypassStart); |
521
|
|
|
|
522
|
|
|
$bypassStop = new BlockEndState(); |
523
|
|
|
$bypassStop->ruleIndex = $idx; |
524
|
|
|
$atn->addState($bypassStop); |
525
|
|
|
|
526
|
|
|
$bypassStart->endState = $bypassStop; |
527
|
|
|
$atn->defineDecisionState($bypassStart); |
528
|
|
|
|
529
|
|
|
$bypassStop->startState = $bypassStart; |
530
|
|
|
|
531
|
|
|
$excludeTransition = null; |
532
|
|
|
if ($atn->ruleToStartState[$idx]->isLeftRecursiveRule) { |
533
|
|
|
// wrap from the beginning of the rule to the StarLoopEntryState |
534
|
|
|
$endState = null; |
535
|
|
|
|
536
|
|
|
foreach ($atn->states as $state) { |
537
|
|
|
if ($this->stateIsEndStateFor($state, $idx)) { |
538
|
|
|
$endState = $state; |
539
|
|
|
|
540
|
|
|
if (!$state instanceof LoopEndState) { |
541
|
|
|
throw new \RuntimeException('Unexpected state type.'); |
542
|
|
|
} |
543
|
|
|
|
544
|
|
|
if ($state->loopBackState === null) { |
545
|
|
|
throw new \RuntimeException('Unexpected null loop back state.'); |
546
|
|
|
} |
547
|
|
|
|
548
|
|
|
$excludeTransition = $state->loopBackState->getTransition(0); |
549
|
|
|
|
550
|
|
|
break; |
551
|
|
|
} |
552
|
|
|
} |
553
|
|
|
|
554
|
|
|
if ($excludeTransition === null) { |
555
|
|
|
throw new \RuntimeException('Couldn\'t identify final state of the precedence rule prefix section.'); |
556
|
|
|
} |
557
|
|
|
} else { |
558
|
|
|
$endState = $atn->ruleToStopState[$idx]; |
559
|
|
|
} |
560
|
|
|
|
561
|
|
|
// all non-excluded transitions that currently target end state need to target blockEnd instead |
562
|
|
|
// TODO:looks like a bug |
563
|
|
|
foreach ($atn->states as $state) { |
564
|
|
|
foreach ($state->getTransitions() as $transition) { |
565
|
|
|
if ($excludeTransition !== null && $transition->equals($excludeTransition)) { |
566
|
|
|
continue; |
567
|
|
|
} |
568
|
|
|
|
569
|
|
|
if ($endState !== null && $transition->target->equals($endState)) { |
570
|
|
|
$transition->target = $bypassStop; |
571
|
|
|
} |
572
|
|
|
} |
573
|
|
|
} |
574
|
|
|
|
575
|
|
|
// all transitions leaving the rule start state need to leave blockStart instead |
576
|
|
|
$ruleToStartState = $atn->ruleToStartState[$idx]; |
577
|
|
|
$count = $ruleToStartState->getNumberOfTransitions(); |
578
|
|
|
|
579
|
|
|
while ($count > 0) { |
580
|
|
|
$bypassStart->addTransition($ruleToStartState->getTransition($count-1)); |
581
|
|
|
$ruleToStartState->setTransitions(\array_slice($ruleToStartState->getTransitions(), -1)); |
582
|
|
|
} |
583
|
|
|
|
584
|
|
|
// link the new states |
585
|
|
|
$atn->ruleToStartState[$idx]->addTransition(new EpsilonTransition($bypassStart)); |
586
|
|
|
|
587
|
|
|
if ($endState === null) { |
588
|
|
|
throw new \RuntimeException('Unexpected null end state.'); |
589
|
|
|
} |
590
|
|
|
|
591
|
|
|
$bypassStop->addTransition(new EpsilonTransition($endState)); |
592
|
|
|
|
593
|
|
|
$matchState = new BasicState(); |
594
|
|
|
$atn->addState($matchState); |
595
|
|
|
$matchState->addTransition(new AtomTransition($bypassStop, $atn->ruleToTokenType[$idx] ?? 0)); |
596
|
|
|
$bypassStart->addTransition(new EpsilonTransition($matchState)); |
597
|
|
|
} |
598
|
|
|
|
599
|
|
|
private function stateIsEndStateFor(ATNState $state, int $idx) : ?ATNState |
600
|
|
|
{ |
601
|
|
|
if ($state->ruleIndex !== $idx) { |
602
|
|
|
return null; |
603
|
|
|
} |
604
|
|
|
|
605
|
|
|
if (!$state instanceof StarLoopEntryState) { |
606
|
|
|
return null; |
607
|
|
|
} |
608
|
|
|
|
609
|
|
|
$maybeLoopEndState = $state->getTransition($state->getNumberOfTransitions() - 1)->target; |
610
|
|
|
|
611
|
|
|
if (!$maybeLoopEndState instanceof LoopEndState) { |
612
|
|
|
return null; |
613
|
|
|
} |
614
|
|
|
|
615
|
|
|
if ($maybeLoopEndState->epsilonOnlyTransitions |
616
|
|
|
&& $maybeLoopEndState->getTransition(0)->target instanceof RuleStopState) { |
617
|
|
|
return $state; |
618
|
|
|
} |
619
|
|
|
|
620
|
|
|
return null; |
621
|
|
|
} |
622
|
|
|
|
623
|
|
|
/** |
624
|
|
|
* Analyze the {@see StarLoopEntryState} states in the specified ATN to set |
625
|
|
|
* the {@see StarLoopEntryState::$isPrecedenceDecision} field to the correct |
626
|
|
|
* value. |
627
|
|
|
* |
628
|
|
|
* @param ATN $atn The ATN. |
629
|
|
|
*/ |
630
|
1 |
|
private function markPrecedenceDecisions(ATN $atn) : void |
631
|
|
|
{ |
632
|
1 |
|
foreach ($atn->states as $state) { |
633
|
1 |
|
if (!$state instanceof StarLoopEntryState) { |
634
|
1 |
|
continue; |
635
|
|
|
} |
636
|
|
|
|
637
|
|
|
// We analyze the ATN to determine if this ATN decision state is the |
638
|
|
|
// decision for the closure block that determines whether a |
639
|
|
|
// precedence rule should continue or complete. |
640
|
1 |
|
if ($atn->ruleToStartState[$state->ruleIndex]->isLeftRecursiveRule) { |
641
|
1 |
|
$maybeLoopEndState = $state->getTransition($state->getNumberOfTransitions() - 1)->target; |
642
|
|
|
|
643
|
1 |
|
if ($maybeLoopEndState instanceof LoopEndState) { |
644
|
1 |
|
if ($maybeLoopEndState->epsilonOnlyTransitions |
645
|
1 |
|
&& $maybeLoopEndState->getTransition(0)->target instanceof RuleStopState) { |
646
|
1 |
|
$state->isPrecedenceDecision = true; |
647
|
|
|
} |
648
|
|
|
} |
649
|
|
|
} |
650
|
|
|
} |
651
|
1 |
|
} |
652
|
|
|
|
653
|
1 |
|
private function verifyATN(ATN $atn) : void |
654
|
|
|
{ |
655
|
1 |
|
if (!$this->deserializationOptions->isVerifyATN()) { |
656
|
|
|
return; |
657
|
|
|
} |
658
|
|
|
|
659
|
|
|
// verify assumptions |
660
|
1 |
|
foreach ($atn->states as $state) { |
661
|
1 |
|
$this->checkCondition($state->epsilonOnlyTransitions || $state->getNumberOfTransitions() <= 1); |
662
|
|
|
|
663
|
|
|
switch (true) { |
664
|
1 |
|
case $state instanceof PlusBlockStartState: |
665
|
1 |
|
$this->checkCondition($state->loopBackState !== null); |
666
|
|
|
|
667
|
1 |
|
break; |
668
|
|
|
|
669
|
1 |
|
case $state instanceof StarLoopEntryState: |
670
|
1 |
|
$this->checkCondition($state->loopBackState !== null); |
671
|
1 |
|
$this->checkCondition($state->getNumberOfTransitions() === 2); |
672
|
|
|
|
673
|
1 |
|
if ($state->getTransition(0)->target instanceof StarBlockStartState) { |
674
|
1 |
|
$this->checkCondition($state->getTransition(1)->target instanceof LoopEndState); |
675
|
1 |
|
$this->checkCondition(!$state->nonGreedy); |
676
|
|
|
} elseif ($state->getTransition(0)->target instanceof LoopEndState) { |
677
|
|
|
$this->checkCondition($state->getTransition(1)->target instanceof StarBlockStartState); |
678
|
|
|
$this->checkCondition($state->nonGreedy); |
679
|
|
|
} else { |
680
|
|
|
throw new \InvalidArgumentException('IllegalState'); |
681
|
|
|
} |
682
|
|
|
|
683
|
1 |
|
break; |
684
|
|
|
|
685
|
1 |
|
case $state instanceof StarLoopbackState: |
686
|
1 |
|
$this->checkCondition($state->getNumberOfTransitions() === 1); |
687
|
1 |
|
$this->checkCondition($state->getTransition(0)->target instanceof StarLoopEntryState); |
688
|
|
|
|
689
|
1 |
|
break; |
690
|
|
|
|
691
|
1 |
|
case $state instanceof LoopEndState: |
692
|
1 |
|
$this->checkCondition($state->loopBackState !== null); |
693
|
|
|
|
694
|
1 |
|
break; |
695
|
|
|
|
696
|
1 |
|
case $state instanceof RuleStartState: |
697
|
1 |
|
$this->checkCondition($state->stopState !== null); |
698
|
|
|
|
699
|
1 |
|
break; |
700
|
|
|
|
701
|
1 |
|
case $state instanceof BlockStartState: |
702
|
1 |
|
$this->checkCondition($state->endState !== null); |
703
|
|
|
|
704
|
1 |
|
break; |
705
|
|
|
|
706
|
1 |
|
case $state instanceof BlockEndState: |
707
|
1 |
|
$this->checkCondition($state->startState !== null); |
708
|
|
|
|
709
|
1 |
|
break; |
710
|
|
|
|
711
|
1 |
|
case $state instanceof DecisionState: |
712
|
1 |
|
$this->checkCondition($state->getNumberOfTransitions() <= 1 || $state->decision >= 0); |
713
|
|
|
|
714
|
1 |
|
break; |
715
|
|
|
|
716
|
|
|
default: |
717
|
1 |
|
$this->checkCondition($state->getNumberOfTransitions() <= 1 || $state instanceof RuleStopState); |
718
|
|
|
} |
719
|
|
|
} |
720
|
1 |
|
} |
721
|
|
|
|
722
|
1 |
|
private function checkCondition(?bool $condition, $message = 'IllegalState') : void |
723
|
|
|
{ |
724
|
1 |
|
if ($condition === null) { |
725
|
|
|
throw new \InvalidArgumentException($message); |
726
|
|
|
} |
727
|
1 |
|
} |
728
|
|
|
|
729
|
1 |
|
private function readInt() : int |
730
|
|
|
{ |
731
|
1 |
|
return $this->data[$this->pos++]; |
732
|
|
|
} |
733
|
|
|
|
734
|
|
|
private function readInt32() : int |
735
|
|
|
{ |
736
|
|
|
$low = $this->readInt(); |
737
|
|
|
$high = $this->readInt(); |
738
|
|
|
|
739
|
|
|
return $low | ($high << 16); |
740
|
|
|
} |
741
|
|
|
|
742
|
1 |
|
private function readUUID() : string |
743
|
|
|
{ |
744
|
1 |
|
$bb = []; |
745
|
1 |
|
for ($i=0; $i < 8; $i++) { |
746
|
1 |
|
$int = $this->readInt(); |
747
|
1 |
|
$bb[] = $int & 0xFF; |
748
|
1 |
|
$bb[] = ($int >> 8) & 0xFF; |
749
|
|
|
} |
750
|
|
|
|
751
|
1 |
|
$bb = \array_reverse($bb); |
752
|
1 |
|
$hex = \strtoupper(\bin2hex(\implode(\array_map('chr', $bb)))); |
753
|
|
|
|
754
|
1 |
|
return \vsprintf('%s%s-%s-%s-%s-%s%s%s', \str_split($hex, 4)); |
755
|
|
|
} |
756
|
|
|
|
757
|
|
|
/** |
758
|
|
|
* @param array<IntervalSet> $sets |
759
|
|
|
*/ |
760
|
1 |
|
private function edgeFactory( |
761
|
|
|
ATN $atn, |
762
|
|
|
int $type, |
763
|
|
|
int $src, |
|
|
|
|
764
|
|
|
int $trg, |
765
|
|
|
int $arg1, |
766
|
|
|
int $arg2, |
767
|
|
|
int $arg3, |
768
|
|
|
array $sets |
769
|
|
|
) : Transition { |
770
|
1 |
|
$target = $atn->states[$trg]; |
771
|
|
|
|
772
|
|
|
switch ($type) { |
773
|
1 |
|
case Transition::EPSILON: |
774
|
1 |
|
return new EpsilonTransition($target); |
775
|
|
|
|
776
|
1 |
|
case Transition::RANGE: |
777
|
|
|
return $arg3 !== 0 ? |
778
|
|
|
new RangeTransition($target, Token::EOF, $arg2) : |
779
|
|
|
new RangeTransition($target, $arg1, $arg2); |
780
|
|
|
|
781
|
1 |
|
case Transition::RULE: |
782
|
1 |
|
$ruleStart = $atn->states[$arg1]; |
783
|
|
|
|
784
|
1 |
|
if (!$ruleStart instanceof RuleStartState) { |
785
|
|
|
throw new \RuntimeException('Unexpected transition type.'); |
786
|
|
|
} |
787
|
|
|
|
788
|
1 |
|
return new RuleTransition($ruleStart, $arg2, $arg3, $target); |
789
|
|
|
|
790
|
1 |
|
case Transition::PREDICATE: |
791
|
|
|
return new PredicateTransition($target, $arg1, $arg2, $arg3 !== 0); |
792
|
|
|
|
793
|
1 |
|
case Transition::PRECEDENCE: |
794
|
1 |
|
return new PrecedencePredicateTransition($target, $arg1); |
795
|
|
|
|
796
|
1 |
|
case Transition::ATOM: |
797
|
1 |
|
return $arg3 !== 0 ? new AtomTransition($target, Token::EOF) : new AtomTransition($target, $arg1); |
798
|
|
|
|
799
|
1 |
|
case Transition::ACTION: |
800
|
1 |
|
return new ActionTransition($target, $arg1, $arg2, $arg3 !== 0); |
801
|
|
|
|
802
|
1 |
|
case Transition::SET: |
803
|
1 |
|
return new SetTransition($target, $sets[$arg1]); |
804
|
|
|
|
805
|
|
|
case Transition::NOT_SET: |
806
|
|
|
return new NotSetTransition($target, $sets[$arg1]); |
807
|
|
|
|
808
|
|
|
case Transition::WILDCARD: |
809
|
|
|
return new WildcardTransition($target); |
810
|
|
|
|
811
|
|
|
default: |
812
|
|
|
throw new \InvalidArgumentException(\sprintf( |
813
|
|
|
'The specified transition type: %d is not valid.', |
814
|
|
|
$type |
815
|
|
|
)); |
816
|
|
|
} |
817
|
|
|
} |
818
|
|
|
|
819
|
1 |
|
private function stateFactory(int $type, int $ruleIndex) : ?ATNState |
820
|
|
|
{ |
821
|
|
|
switch ($type) { |
822
|
1 |
|
case ATNState::INVALID_TYPE: |
823
|
|
|
return null; |
824
|
|
|
|
825
|
1 |
|
case ATNState::BASIC: |
826
|
1 |
|
$s = new BasicState(); |
827
|
|
|
|
828
|
1 |
|
break; |
829
|
|
|
|
830
|
1 |
|
case ATNState::RULE_START: |
831
|
1 |
|
$s = new RuleStartState(); |
832
|
|
|
|
833
|
1 |
|
break; |
834
|
|
|
|
835
|
1 |
|
case ATNState::BLOCK_START: |
836
|
1 |
|
$s = new BasicBlockStartState(); |
837
|
|
|
|
838
|
1 |
|
break; |
839
|
|
|
|
840
|
1 |
|
case ATNState::PLUS_BLOCK_START: |
841
|
1 |
|
$s = new PlusBlockStartState(); |
842
|
|
|
|
843
|
1 |
|
break; |
844
|
|
|
|
845
|
1 |
|
case ATNState::STAR_BLOCK_START: |
846
|
1 |
|
$s = new StarBlockStartState(); |
847
|
|
|
|
848
|
1 |
|
break; |
849
|
|
|
|
850
|
1 |
|
case ATNState::TOKEN_START: |
851
|
1 |
|
$s = new TokensStartState(); |
852
|
|
|
|
853
|
1 |
|
break; |
854
|
|
|
|
855
|
1 |
|
case ATNState::RULE_STOP: |
856
|
1 |
|
$s = new RuleStopState(); |
857
|
|
|
|
858
|
1 |
|
break; |
859
|
|
|
|
860
|
1 |
|
case ATNState::BLOCK_END: |
861
|
1 |
|
$s = new BlockEndState(); |
862
|
|
|
|
863
|
1 |
|
break; |
864
|
|
|
|
865
|
1 |
|
case ATNState::STAR_LOOP_BACK: |
866
|
1 |
|
$s = new StarLoopbackState(); |
867
|
|
|
|
868
|
1 |
|
break; |
869
|
|
|
|
870
|
1 |
|
case ATNState::STAR_LOOP_ENTRY: |
871
|
1 |
|
$s = new StarLoopEntryState(); |
872
|
|
|
|
873
|
1 |
|
break; |
874
|
|
|
|
875
|
1 |
|
case ATNState::PLUS_LOOP_BACK: |
876
|
1 |
|
$s = new PlusLoopbackState(); |
877
|
|
|
|
878
|
1 |
|
break; |
879
|
|
|
|
880
|
1 |
|
case ATNState::LOOP_END: |
881
|
1 |
|
$s = new LoopEndState(); |
882
|
|
|
|
883
|
1 |
|
break; |
884
|
|
|
|
885
|
|
|
default: |
886
|
|
|
throw new \InvalidArgumentException(\sprintf( |
887
|
|
|
'The specified state type %d is not valid.', |
888
|
|
|
$type |
889
|
|
|
)); |
890
|
|
|
} |
891
|
|
|
|
892
|
1 |
|
$s->ruleIndex = $ruleIndex; |
893
|
|
|
|
894
|
1 |
|
return $s; |
895
|
|
|
} |
896
|
|
|
|
897
|
1 |
|
private function lexerActionFactory(int $type, int $data1, int $data2) : LexerAction |
898
|
|
|
{ |
899
|
|
|
switch ($type) { |
900
|
1 |
|
case LexerActionType::CHANNEL: |
901
|
|
|
return new LexerChannelAction($data1); |
902
|
|
|
|
903
|
1 |
|
case LexerActionType::CUSTOM: |
904
|
|
|
return new LexerCustomAction($data1, $data2); |
905
|
|
|
|
906
|
1 |
|
case LexerActionType::MODE: |
907
|
|
|
return new LexerModeAction($data1); |
908
|
|
|
|
909
|
1 |
|
case LexerActionType::MORE: |
910
|
|
|
return LexerMoreAction::instance(); |
911
|
|
|
|
912
|
1 |
|
case LexerActionType::POP_MODE: |
913
|
|
|
return LexerPopModeAction::instance(); |
914
|
|
|
|
915
|
1 |
|
case LexerActionType::PUSH_MODE: |
916
|
|
|
return new LexerPushModeAction($data1); |
917
|
|
|
|
918
|
1 |
|
case LexerActionType::SKIP: |
919
|
1 |
|
return LexerSkipAction::instance(); |
920
|
|
|
|
921
|
|
|
case LexerActionType::TYPE: |
922
|
|
|
return new LexerTypeAction($data1); |
923
|
|
|
|
924
|
|
|
default: |
925
|
|
|
throw new \InvalidArgumentException(\sprintf( |
926
|
|
|
'The specified lexer action type %d is not valid.', |
927
|
|
|
$type |
928
|
|
|
)); |
929
|
|
|
} |
930
|
|
|
} |
931
|
|
|
} |
932
|
|
|
|