1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* This file is part of Railt package. |
4
|
|
|
* |
5
|
|
|
* For the full copyright and license information, please view the LICENSE |
6
|
|
|
* file that was distributed with this source code. |
7
|
|
|
*/ |
8
|
|
|
declare(strict_types=1); |
9
|
|
|
|
10
|
|
|
namespace Railt\Compiler\Grammar; |
11
|
|
|
|
12
|
|
|
use Railt\Compiler\Exception\GrammarException; |
13
|
|
|
use Railt\Compiler\Grammar\Builder\AbstractBuilder; |
14
|
|
|
use Railt\Compiler\Grammar\Builder\Alternation; |
15
|
|
|
use Railt\Compiler\Grammar\Builder\Concatenation; |
16
|
|
|
use Railt\Compiler\Grammar\Builder\Repetition; |
17
|
|
|
use Railt\Compiler\Grammar\Builder\Terminal; |
18
|
|
|
use Railt\Compiler\Grammar\Delegate\RuleDelegate; |
19
|
|
|
use Railt\Lexer\LexerInterface; |
20
|
|
|
use Railt\Lexer\Result\Eoi; |
21
|
|
|
use Railt\Parser\Rule\Rule; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* Analyze rules and transform them into atomic rules operations. |
25
|
|
|
*/ |
26
|
|
|
class Analyzer |
27
|
|
|
{ |
28
|
|
|
/** |
29
|
|
|
* Tokens representing rules. |
30
|
|
|
* @var array |
31
|
|
|
*/ |
32
|
|
|
protected $tokens; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* @var array|RuleDelegate[] |
36
|
|
|
*/ |
37
|
|
|
protected $rules = []; |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Parsed rules. |
41
|
|
|
* @var array|AbstractBuilder[] |
42
|
|
|
*/ |
43
|
|
|
protected $parsedRules; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* Counter to auto-name transitional rules. |
47
|
|
|
* @var int |
48
|
|
|
*/ |
49
|
|
|
protected $transitionalRuleCounter = 0; |
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
* Rule name being analyzed. |
53
|
|
|
* @var string |
54
|
|
|
*/ |
55
|
|
|
private $ruleName; |
56
|
|
|
|
57
|
|
|
/** |
58
|
|
|
* Analyzer constructor. |
59
|
|
|
* @param LexerInterface $lexer |
60
|
|
|
*/ |
61
|
|
|
public function __construct(LexerInterface $lexer) |
62
|
|
|
{ |
63
|
|
|
$this->tokens = $lexer->getTokenDefinitions(); |
|
|
|
|
64
|
|
|
} |
65
|
|
|
|
66
|
|
|
/** |
67
|
|
|
* @param RuleDelegate $delegate |
68
|
|
|
*/ |
69
|
|
|
public function addRuleDelegate(RuleDelegate $delegate): void |
70
|
|
|
{ |
71
|
|
|
$this->rules[$delegate->getRuleName()] = $delegate; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* Build the analyzer of the rules (does not analyze the rules). |
76
|
|
|
* |
77
|
|
|
* @return Rule[]|\Traversable |
78
|
|
|
* @throws GrammarException |
79
|
|
|
*/ |
80
|
|
|
public function analyze(): iterable |
81
|
|
|
{ |
82
|
|
|
if (\count($this->rules) === 0) { |
83
|
|
|
throw new GrammarException('No rules specified'); |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
$this->parsedRules = []; |
87
|
|
|
|
88
|
|
|
foreach ($this->rules as $delegate) { |
89
|
|
|
$this->ruleName = $delegate->getRuleName(); |
90
|
|
|
$nodeId = $delegate->isKept() ? $delegate->getRuleName() : null; |
91
|
|
|
|
92
|
|
|
$pNodeId = $nodeId; |
93
|
|
|
$rule = $this->rule($delegate->getInnerTokens(), $pNodeId); |
94
|
|
|
|
95
|
|
|
if ($rule === null) { |
96
|
|
|
$error = \sprintf('Error while parsing rule %s.', $delegate->getRuleName()); |
97
|
|
|
throw new GrammarException($error, 1); |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
$zeRule = $this->parsedRules[$rule]; |
101
|
|
|
$zeRule->setName($delegate->getRuleName()); |
102
|
|
|
|
103
|
|
|
if ($nodeId !== null) { |
104
|
|
|
$zeRule->setDefaultId($nodeId); |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
unset($this->parsedRules[$rule]); |
108
|
|
|
$this->parsedRules[$delegate->getRuleName()] = $zeRule; |
109
|
|
|
} |
110
|
|
|
|
111
|
|
|
foreach ($this->parsedRules as $builder) { |
112
|
|
|
yield $builder->build(); |
113
|
|
|
} |
114
|
|
|
} |
115
|
|
|
|
116
|
|
|
/** |
117
|
|
|
* Implementation of “rule”. |
118
|
|
|
* |
119
|
|
|
* @param LookaheadIterator $tokens |
120
|
|
|
* @param string|null $pNodeId |
121
|
|
|
* @return string|int|null |
122
|
|
|
* @throws GrammarException |
123
|
|
|
*/ |
124
|
|
|
protected function rule(LookaheadIterator $tokens, &$pNodeId) |
125
|
|
|
{ |
126
|
|
|
return $this->choice($tokens, $pNodeId); |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
/** |
130
|
|
|
* Implementation of “choice”. |
131
|
|
|
* |
132
|
|
|
* @param LookaheadIterator $tokens |
133
|
|
|
* @param string|null $pNodeId |
134
|
|
|
* @return string|int|null |
135
|
|
|
* @throws GrammarException |
136
|
|
|
*/ |
137
|
|
|
protected function choice(LookaheadIterator $tokens, &$pNodeId) |
138
|
|
|
{ |
139
|
|
|
$children = []; |
140
|
|
|
|
141
|
|
|
// concatenation() … |
142
|
|
|
$nNodeId = $pNodeId; |
143
|
|
|
$rule = $this->concatenation($tokens, $nNodeId); |
144
|
|
|
|
145
|
|
|
if ($rule === null) { |
146
|
|
|
return null; |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
if ($nNodeId !== null) { |
150
|
|
|
$this->parsedRules[$rule]->setNodeId($nNodeId); |
151
|
|
|
} |
152
|
|
|
|
153
|
|
|
$children[] = $rule; |
154
|
|
|
$others = false; |
155
|
|
|
|
156
|
|
|
// … ( ::or:: concatenation() )* |
|
|
|
|
157
|
|
|
while ($tokens->current()->getName() === Parser::T_OR) { |
158
|
|
|
$tokens->next(); |
159
|
|
|
$others = true; |
160
|
|
|
$nNodeId = $pNodeId; |
161
|
|
|
$rule = $this->concatenation($tokens, $nNodeId); |
162
|
|
|
|
163
|
|
|
if ($rule === null) { |
164
|
|
|
return null; |
165
|
|
|
} |
166
|
|
|
|
167
|
|
|
if ($nNodeId !== null) { |
168
|
|
|
$this->parsedRules[$rule]->setNodeId($nNodeId); |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
$children[] = $rule; |
172
|
|
|
} |
173
|
|
|
|
174
|
|
|
$pNodeId = null; |
175
|
|
|
|
176
|
|
|
if ($others === false) { |
177
|
|
|
return $rule; |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
$name = $this->transitionalRuleCounter++; |
181
|
|
|
|
182
|
|
|
$this->parsedRules[$name] = new Alternation($name, $children); |
183
|
|
|
|
184
|
|
|
return $name; |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
/** |
188
|
|
|
* Implementation of “concatenation”. |
189
|
|
|
* |
190
|
|
|
* @param LookaheadIterator $tokens |
191
|
|
|
* @param string|null $pNodeId |
192
|
|
|
* @return string|int|null |
193
|
|
|
* @throws GrammarException |
194
|
|
|
*/ |
195
|
|
|
protected function concatenation(LookaheadIterator $tokens, &$pNodeId) |
196
|
|
|
{ |
197
|
|
|
$children = []; |
198
|
|
|
|
199
|
|
|
// repetition() … |
200
|
|
|
$rule = $this->repetition($tokens, $pNodeId); |
201
|
|
|
|
202
|
|
|
if ($rule === null) { |
203
|
|
|
return null; |
204
|
|
|
} |
205
|
|
|
|
206
|
|
|
$children[] = $rule; |
207
|
|
|
$others = false; |
208
|
|
|
|
209
|
|
|
// … repetition()* |
210
|
|
|
while (null !== $r1 = $this->repetition($tokens, $pNodeId)) { |
211
|
|
|
$children[] = $r1; |
212
|
|
|
$others = true; |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
if ($others === false && $pNodeId === null) { |
216
|
|
|
return $rule; |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
$name = $this->transitionalRuleCounter++; |
220
|
|
|
|
221
|
|
|
$this->parsedRules[$name] = new Concatenation($name, $children); |
222
|
|
|
|
223
|
|
|
return $name; |
224
|
|
|
} |
225
|
|
|
|
226
|
|
|
/** |
227
|
|
|
* Implementation of “repetition”. |
228
|
|
|
* |
229
|
|
|
* @param LookaheadIterator $tokens |
230
|
|
|
* @param string|null $pNodeId |
231
|
|
|
* @return string|int|null |
232
|
|
|
* @throws GrammarException |
233
|
|
|
*/ |
234
|
|
|
protected function repetition(LookaheadIterator $tokens, &$pNodeId) |
235
|
|
|
{ |
236
|
|
|
[$min, $max] = [null, null]; |
|
|
|
|
237
|
|
|
|
238
|
|
|
// simple() … |
239
|
|
|
$children = $this->simple($tokens, $pNodeId); |
240
|
|
|
|
241
|
|
|
if ($children === null) { |
242
|
|
|
return null; |
243
|
|
|
} |
244
|
|
|
|
245
|
|
|
// … quantifier()? |
|
|
|
|
246
|
|
|
switch ($tokens->current()->getName()) { |
247
|
|
|
case Parser::T_REPEAT_ZERO_OR_ONE: |
248
|
|
|
[$min, $max] = [0, 1]; |
|
|
|
|
249
|
|
|
$tokens->next(); |
250
|
|
|
break; |
251
|
|
|
|
252
|
|
|
case Parser::T_REPEAT_ONE_OR_MORE: |
253
|
|
|
[$min, $max] = [1, -1]; |
|
|
|
|
254
|
|
|
$tokens->next(); |
255
|
|
|
break; |
256
|
|
|
|
257
|
|
|
case Parser::T_REPEAT_ZERO_OR_MORE: |
258
|
|
|
[$min, $max] = [0, -1]; |
|
|
|
|
259
|
|
|
$tokens->next(); |
260
|
|
|
break; |
261
|
|
|
|
262
|
|
|
case Parser::T_REPEAT_N_TO_M: |
263
|
|
|
$min = (int)$tokens->current()->getValue(1); |
264
|
|
|
$max = (int)$tokens->current()->getValue(2); |
265
|
|
|
$tokens->next(); |
266
|
|
|
break; |
267
|
|
|
|
268
|
|
|
case Parser::T_REPEAT_ZERO_TO_M: |
269
|
|
|
[$min, $max] = [0, (int)$tokens->current()->getValue(1)]; |
|
|
|
|
270
|
|
|
$tokens->next(); |
271
|
|
|
break; |
272
|
|
|
|
273
|
|
|
case Parser::T_REPEAT_N_OR_MORE: |
274
|
|
|
[$min, $max] = [(int)$tokens->current()->getValue(1), -1]; |
|
|
|
|
275
|
|
|
$tokens->next(); |
276
|
|
|
break; |
277
|
|
|
|
278
|
|
|
case Parser::T_REPEAT_EXACTLY_N: |
279
|
|
|
$min = $max = (int)$tokens->current()->getValue(1); |
280
|
|
|
$tokens->next(); |
281
|
|
|
break; |
282
|
|
|
} |
283
|
|
|
|
284
|
|
|
// … <node>? |
285
|
|
|
if ($tokens->current()->getName() === Parser::T_KEPT_NAME) { |
286
|
|
|
$tokens->next(); |
287
|
|
|
$pNodeId = $tokens->current()->getValue(); |
288
|
|
|
$tokens->next(); |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
if ($min === null) { |
|
|
|
|
292
|
|
|
return $children; |
293
|
|
|
} |
294
|
|
|
|
295
|
|
|
if ($max !== -1 && $max < $min) { |
296
|
|
|
$error = 'Upper bound %d must be greater or equal to lower bound %d in rule %s.'; |
297
|
|
|
$error = \sprintf($error, $max, $min, $this->ruleName); |
|
|
|
|
298
|
|
|
throw new GrammarException($error, 2); |
299
|
|
|
} |
300
|
|
|
|
301
|
|
|
$name = $this->transitionalRuleCounter++; |
302
|
|
|
|
303
|
|
|
$this->parsedRules[$name] = new Repetition($name, $min, $max, $children); |
304
|
|
|
|
305
|
|
|
return $name; |
306
|
|
|
} |
307
|
|
|
|
308
|
|
|
/** |
309
|
|
|
* Implementation of “simple”. |
310
|
|
|
* |
311
|
|
|
* @param LookaheadIterator $tokens |
312
|
|
|
* @param int|string|null $pNodeId |
313
|
|
|
* @return string|int|null |
314
|
|
|
* @throws GrammarException |
315
|
|
|
*/ |
316
|
|
|
protected function simple(LookaheadIterator $tokens, &$pNodeId) |
317
|
|
|
{ |
318
|
|
|
switch ($tokens->current()->getName()) { |
319
|
|
|
case Parser::T_GROUP_OPEN: |
320
|
|
|
return $this->group($tokens, $pNodeId); |
321
|
|
|
|
322
|
|
|
case Parser::T_TOKEN_SKIPPED: |
323
|
|
|
return $this->token($tokens, false); |
324
|
|
|
|
325
|
|
|
case Parser::T_TOKEN_KEPT: |
326
|
|
|
return $this->token($tokens, true); |
327
|
|
|
|
328
|
|
|
case Parser::T_INVOKE: |
329
|
|
|
return $this->invoke($tokens); |
330
|
|
|
|
331
|
|
|
default: |
332
|
|
|
return null; |
333
|
|
|
} |
334
|
|
|
} |
335
|
|
|
|
336
|
|
|
/** |
337
|
|
|
* @param LookaheadIterator $tokens |
338
|
|
|
* @param int|string|null $pNodeId |
339
|
|
|
* @return int|null|string |
340
|
|
|
* @throws GrammarException |
341
|
|
|
*/ |
342
|
|
|
protected function group(LookaheadIterator $tokens, &$pNodeId) |
343
|
|
|
{ |
344
|
|
|
$tokens->next(); |
345
|
|
|
$rule = $this->choice($tokens, $pNodeId); |
346
|
|
|
|
347
|
|
|
if ($rule === null) { |
348
|
|
|
return null; |
349
|
|
|
} |
350
|
|
|
|
351
|
|
|
if ($tokens->current()->getName() !== Parser::T_GROUP_CLOSE) { |
352
|
|
|
return null; |
353
|
|
|
} |
354
|
|
|
|
355
|
|
|
$tokens->next(); |
356
|
|
|
|
357
|
|
|
return $rule; |
358
|
|
|
} |
359
|
|
|
|
360
|
|
|
/** |
361
|
|
|
* @param LookaheadIterator $tokens |
362
|
|
|
* @param bool $kept |
363
|
|
|
* @return int|string|null |
364
|
|
|
* @throws GrammarException |
365
|
|
|
*/ |
366
|
|
|
protected function token(LookaheadIterator $tokens, bool $kept = true) |
367
|
|
|
{ |
368
|
|
|
$tokenName = $tokens->current()->getValue(1); |
369
|
|
|
|
370
|
|
|
if (false) { // TODO |
|
|
|
|
371
|
|
|
$error = \sprintf('Token %s does not exist in rule %s.', $tokenName, $this->ruleName); |
372
|
|
|
throw new GrammarException($error, 4); |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
$name = $this->transitionalRuleCounter++; |
376
|
|
|
|
377
|
|
|
$this->parsedRules[$name] = new Terminal($name, $tokenName, $kept); |
378
|
|
|
$tokens->next(); |
379
|
|
|
|
380
|
|
|
return $name; |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
/** |
384
|
|
|
* @param LookaheadIterator $tokens |
385
|
|
|
* @return int|string |
386
|
|
|
* @throws GrammarException |
387
|
|
|
*/ |
388
|
|
|
protected function invoke(LookaheadIterator $tokens) |
389
|
|
|
{ |
390
|
|
|
$tokenName = $tokens->current()->getValue(1); |
391
|
|
|
|
392
|
|
|
$isEmptyRule = ! \array_key_exists($tokenName, $this->rules); |
393
|
|
|
|
394
|
|
|
if ($isEmptyRule) { |
395
|
|
|
$error = \vsprintf('Cannot call rule %s() in rule %s because it does not exist.', [ |
396
|
|
|
$tokenName, |
397
|
|
|
$this->ruleName, |
398
|
|
|
]); |
399
|
|
|
|
400
|
|
|
throw new GrammarException($error, 5); |
401
|
|
|
} |
402
|
|
|
|
403
|
|
|
if ($tokens->getNext()->getName() === Eoi::T_NAME) { |
404
|
|
|
$name = $this->transitionalRuleCounter++; |
405
|
|
|
$this->parsedRules[$name] = new Concatenation($name, [$tokenName]); |
406
|
|
|
} else { |
407
|
|
|
$name = $tokenName; |
408
|
|
|
} |
409
|
|
|
|
410
|
|
|
$tokens->next(); |
411
|
|
|
|
412
|
|
|
return $name; |
413
|
|
|
} |
414
|
|
|
} |
415
|
|
|
|
Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.
For example, imagine you have a variable
$accountId
that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to theid
property of an instance of theAccount
class. This class holds a proper account, so the id value must no longer be false.Either this assignment is in error or a type check should be added for that assignment.