1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* This file is part of Railt package. |
4
|
|
|
* |
5
|
|
|
* For the full copyright and license information, please view the LICENSE |
6
|
|
|
* file that was distributed with this source code. |
7
|
|
|
*/ |
8
|
|
|
declare(strict_types=1); |
9
|
|
|
|
10
|
|
|
namespace Railt\Compiler\Grammar; |
11
|
|
|
|
12
|
|
|
use Railt\Compiler\Exception\GrammarException; |
13
|
|
|
use Railt\Compiler\Grammar\Builder\AbstractBuilder; |
14
|
|
|
use Railt\Compiler\Grammar\Builder\Alternation; |
15
|
|
|
use Railt\Compiler\Grammar\Builder\Concatenation; |
16
|
|
|
use Railt\Compiler\Grammar\Builder\Repetition; |
17
|
|
|
use Railt\Compiler\Grammar\Builder\Terminal; |
18
|
|
|
use Railt\Compiler\Grammar\Delegate\RuleDelegate; |
19
|
|
|
use Railt\Lexer\Result\Eoi; |
20
|
|
|
use Railt\Parser\Rule\Rule; |
21
|
|
|
|
22
|
|
|
/** |
23
|
|
|
* Analyze rules and transform them into atomic rules operations. |
24
|
|
|
*/ |
25
|
|
|
class Analyzer |
26
|
|
|
{ |
27
|
|
|
/** |
28
|
|
|
* @var array|RuleDelegate[] |
29
|
|
|
*/ |
30
|
|
|
protected $rules = []; |
31
|
|
|
|
32
|
|
|
/** |
33
|
|
|
* Parsed rules. |
34
|
|
|
* @var array|AbstractBuilder[] |
35
|
|
|
*/ |
36
|
|
|
protected $parsedRules; |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Counter to auto-name transitional rules. |
40
|
|
|
* @var int |
41
|
|
|
*/ |
42
|
|
|
protected $transitionalRuleCounter = 0; |
43
|
|
|
|
44
|
|
|
/** |
45
|
|
|
* Rule name being analyzed. |
46
|
|
|
* @var string |
47
|
|
|
*/ |
48
|
|
|
private $ruleName; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* @param RuleDelegate $delegate |
52
|
|
|
*/ |
53
|
|
|
public function addRuleDelegate(RuleDelegate $delegate): void |
54
|
|
|
{ |
55
|
|
|
$this->rules[$delegate->getRuleName()] = $delegate; |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
/** |
59
|
|
|
* Build the analyzer of the rules (does not analyze the rules). |
60
|
|
|
* |
61
|
|
|
* @return Rule[]|\Traversable |
62
|
|
|
* @throws GrammarException |
63
|
|
|
*/ |
64
|
|
|
public function analyze(): iterable |
65
|
|
|
{ |
66
|
|
|
if (\count($this->rules) === 0) { |
67
|
|
|
throw new GrammarException('No rules specified'); |
68
|
|
|
} |
69
|
|
|
|
70
|
|
|
$this->parsedRules = []; |
71
|
|
|
|
72
|
|
|
foreach ($this->rules as $delegate) { |
73
|
|
|
$this->ruleName = $delegate->getRuleName(); |
74
|
|
|
$nodeId = $delegate->isKept() ? $delegate->getRuleName() : null; |
75
|
|
|
|
76
|
|
|
$pNodeId = $nodeId; |
77
|
|
|
$rule = $this->rule($delegate->getInnerTokens(), $pNodeId); |
78
|
|
|
|
79
|
|
|
if ($rule === null) { |
80
|
|
|
$error = \sprintf('Error while parsing rule %s.', $delegate->getRuleName()); |
81
|
|
|
throw new GrammarException($error, 1); |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
$zeRule = $this->parsedRules[$rule]; |
85
|
|
|
$zeRule->setName($delegate->getRuleName()); |
86
|
|
|
|
87
|
|
|
if ($nodeId !== null) { |
88
|
|
|
$zeRule->setDefaultId($nodeId); |
89
|
|
|
} |
90
|
|
|
|
91
|
|
|
unset($this->parsedRules[$rule]); |
92
|
|
|
$this->parsedRules[$delegate->getRuleName()] = $zeRule; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
foreach ($this->parsedRules as $builder) { |
96
|
|
|
yield $builder->build(); |
97
|
|
|
} |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* Implementation of “rule”. |
102
|
|
|
* |
103
|
|
|
* @param LookaheadIterator $tokens |
104
|
|
|
* @param string|null $pNodeId |
105
|
|
|
* @return string|int|null |
106
|
|
|
* @throws GrammarException |
107
|
|
|
*/ |
108
|
|
|
protected function rule(LookaheadIterator $tokens, &$pNodeId) |
109
|
|
|
{ |
110
|
|
|
return $this->choice($tokens, $pNodeId); |
111
|
|
|
} |
112
|
|
|
|
113
|
|
|
/** |
114
|
|
|
* Implementation of “choice”. |
115
|
|
|
* |
116
|
|
|
* @param LookaheadIterator $tokens |
117
|
|
|
* @param string|null $pNodeId |
118
|
|
|
* @return string|int|null |
119
|
|
|
* @throws GrammarException |
120
|
|
|
*/ |
121
|
|
|
protected function choice(LookaheadIterator $tokens, &$pNodeId) |
122
|
|
|
{ |
123
|
|
|
$children = []; |
124
|
|
|
|
125
|
|
|
// concatenation() … |
126
|
|
|
$nNodeId = $pNodeId; |
127
|
|
|
$rule = $this->concatenation($tokens, $nNodeId); |
128
|
|
|
|
129
|
|
|
if ($rule === null) { |
130
|
|
|
return null; |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
if ($nNodeId !== null) { |
134
|
|
|
$this->parsedRules[$rule]->setNodeId($nNodeId); |
135
|
|
|
} |
136
|
|
|
|
137
|
|
|
$children[] = $rule; |
138
|
|
|
$others = false; |
139
|
|
|
|
140
|
|
|
// … ( ::or:: concatenation() )* |
|
|
|
|
141
|
|
|
while ($tokens->current()->getName() === Parser::T_OR) { |
142
|
|
|
$tokens->next(); |
143
|
|
|
$others = true; |
144
|
|
|
$nNodeId = $pNodeId; |
145
|
|
|
$rule = $this->concatenation($tokens, $nNodeId); |
146
|
|
|
|
147
|
|
|
if ($rule === null) { |
148
|
|
|
return null; |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
if ($nNodeId !== null) { |
152
|
|
|
$this->parsedRules[$rule]->setNodeId($nNodeId); |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
$children[] = $rule; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
$pNodeId = null; |
159
|
|
|
|
160
|
|
|
if ($others === false) { |
161
|
|
|
return $rule; |
162
|
|
|
} |
163
|
|
|
|
164
|
|
|
$name = $this->transitionalRuleCounter++; |
165
|
|
|
|
166
|
|
|
$this->parsedRules[$name] = new Alternation($name, $children); |
167
|
|
|
|
168
|
|
|
return $name; |
169
|
|
|
} |
170
|
|
|
|
171
|
|
|
/** |
172
|
|
|
* Implementation of “concatenation”. |
173
|
|
|
* |
174
|
|
|
* @param LookaheadIterator $tokens |
175
|
|
|
* @param string|null $pNodeId |
176
|
|
|
* @return string|int|null |
177
|
|
|
* @throws GrammarException |
178
|
|
|
*/ |
179
|
|
|
protected function concatenation(LookaheadIterator $tokens, &$pNodeId) |
180
|
|
|
{ |
181
|
|
|
$children = []; |
182
|
|
|
|
183
|
|
|
// repetition() … |
184
|
|
|
$rule = $this->repetition($tokens, $pNodeId); |
185
|
|
|
|
186
|
|
|
if ($rule === null) { |
187
|
|
|
return null; |
188
|
|
|
} |
189
|
|
|
|
190
|
|
|
$children[] = $rule; |
191
|
|
|
$others = false; |
192
|
|
|
|
193
|
|
|
// … repetition()* |
194
|
|
|
while (null !== $r1 = $this->repetition($tokens, $pNodeId)) { |
195
|
|
|
$children[] = $r1; |
196
|
|
|
$others = true; |
197
|
|
|
} |
198
|
|
|
|
199
|
|
|
if ($others === false && $pNodeId === null) { |
200
|
|
|
return $rule; |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
$name = $this->transitionalRuleCounter++; |
204
|
|
|
|
205
|
|
|
$this->parsedRules[$name] = new Concatenation($name, $children); |
206
|
|
|
|
207
|
|
|
return $name; |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
/** |
211
|
|
|
* Implementation of “repetition”. |
212
|
|
|
* |
213
|
|
|
* @param LookaheadIterator $tokens |
214
|
|
|
* @param string|null $pNodeId |
215
|
|
|
* @return string|int|null |
216
|
|
|
* @throws GrammarException |
217
|
|
|
*/ |
218
|
|
|
protected function repetition(LookaheadIterator $tokens, &$pNodeId) |
219
|
|
|
{ |
220
|
|
|
[$min, $max] = [null, null]; |
|
|
|
|
221
|
|
|
|
222
|
|
|
// simple() … |
223
|
|
|
$children = $this->simple($tokens, $pNodeId); |
224
|
|
|
|
225
|
|
|
if ($children === null) { |
226
|
|
|
return null; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
// … quantifier()? |
|
|
|
|
230
|
|
|
switch ($tokens->current()->getName()) { |
231
|
|
|
case Parser::T_REPEAT_ZERO_OR_ONE: |
232
|
|
|
[$min, $max] = [0, 1]; |
|
|
|
|
233
|
|
|
$tokens->next(); |
234
|
|
|
break; |
235
|
|
|
|
236
|
|
|
case Parser::T_REPEAT_ONE_OR_MORE: |
237
|
|
|
[$min, $max] = [1, -1]; |
|
|
|
|
238
|
|
|
$tokens->next(); |
239
|
|
|
break; |
240
|
|
|
|
241
|
|
|
case Parser::T_REPEAT_ZERO_OR_MORE: |
242
|
|
|
[$min, $max] = [0, -1]; |
|
|
|
|
243
|
|
|
$tokens->next(); |
244
|
|
|
break; |
245
|
|
|
|
246
|
|
|
case Parser::T_REPEAT_N_TO_M: |
247
|
|
|
$min = (int)$tokens->current()->getValue(1); |
248
|
|
|
$max = (int)$tokens->current()->getValue(2); |
249
|
|
|
$tokens->next(); |
250
|
|
|
break; |
251
|
|
|
|
252
|
|
|
case Parser::T_REPEAT_ZERO_TO_M: |
253
|
|
|
[$min, $max] = [0, (int)$tokens->current()->getValue(1)]; |
|
|
|
|
254
|
|
|
$tokens->next(); |
255
|
|
|
break; |
256
|
|
|
|
257
|
|
|
case Parser::T_REPEAT_N_OR_MORE: |
258
|
|
|
[$min, $max] = [(int)$tokens->current()->getValue(1), -1]; |
|
|
|
|
259
|
|
|
$tokens->next(); |
260
|
|
|
break; |
261
|
|
|
|
262
|
|
|
case Parser::T_REPEAT_EXACTLY_N: |
263
|
|
|
$min = $max = (int)$tokens->current()->getValue(1); |
264
|
|
|
$tokens->next(); |
265
|
|
|
break; |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
// … <node>? |
269
|
|
|
if ($tokens->current()->getName() === Parser::T_KEPT_NAME) { |
270
|
|
|
$tokens->next(); |
271
|
|
|
$pNodeId = $tokens->current()->getValue(); |
272
|
|
|
$tokens->next(); |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
if ($min === null) { |
|
|
|
|
276
|
|
|
return $children; |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
if ($max !== -1 && $max < $min) { |
280
|
|
|
$error = 'Upper bound %d must be greater or equal to lower bound %d in rule %s.'; |
281
|
|
|
$error = \sprintf($error, $max, $min, $this->ruleName); |
|
|
|
|
282
|
|
|
throw new GrammarException($error, 2); |
283
|
|
|
} |
284
|
|
|
|
285
|
|
|
$name = $this->transitionalRuleCounter++; |
286
|
|
|
|
287
|
|
|
$this->parsedRules[$name] = new Repetition($name, $min, $max, $children); |
288
|
|
|
|
289
|
|
|
return $name; |
290
|
|
|
} |
291
|
|
|
|
292
|
|
|
/** |
293
|
|
|
* Implementation of “simple”. |
294
|
|
|
* |
295
|
|
|
* @param LookaheadIterator $tokens |
296
|
|
|
* @param int|string|null $pNodeId |
297
|
|
|
* @return string|int|null |
298
|
|
|
* @throws GrammarException |
299
|
|
|
*/ |
300
|
|
|
protected function simple(LookaheadIterator $tokens, &$pNodeId) |
301
|
|
|
{ |
302
|
|
|
switch ($tokens->current()->getName()) { |
303
|
|
|
case Parser::T_GROUP_OPEN: |
304
|
|
|
return $this->group($tokens, $pNodeId); |
305
|
|
|
|
306
|
|
|
case Parser::T_TOKEN_SKIPPED: |
307
|
|
|
return $this->token($tokens, false); |
308
|
|
|
|
309
|
|
|
case Parser::T_TOKEN_KEPT: |
310
|
|
|
return $this->token($tokens, true); |
311
|
|
|
|
312
|
|
|
case Parser::T_INVOKE: |
313
|
|
|
return $this->invoke($tokens); |
314
|
|
|
|
315
|
|
|
default: |
316
|
|
|
return null; |
317
|
|
|
} |
318
|
|
|
} |
319
|
|
|
|
320
|
|
|
/** |
321
|
|
|
* @param LookaheadIterator $tokens |
322
|
|
|
* @param int|string|null $pNodeId |
323
|
|
|
* @return int|null|string |
324
|
|
|
* @throws GrammarException |
325
|
|
|
*/ |
326
|
|
|
protected function group(LookaheadIterator $tokens, &$pNodeId) |
327
|
|
|
{ |
328
|
|
|
$tokens->next(); |
329
|
|
|
$rule = $this->choice($tokens, $pNodeId); |
330
|
|
|
|
331
|
|
|
if ($rule === null) { |
332
|
|
|
return null; |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
if ($tokens->current()->getName() !== Parser::T_GROUP_CLOSE) { |
336
|
|
|
return null; |
337
|
|
|
} |
338
|
|
|
|
339
|
|
|
$tokens->next(); |
340
|
|
|
|
341
|
|
|
return $rule; |
342
|
|
|
} |
343
|
|
|
|
344
|
|
|
/** |
345
|
|
|
* @param LookaheadIterator $tokens |
346
|
|
|
* @param bool $kept |
347
|
|
|
* @return int|string|null |
348
|
|
|
*/ |
349
|
|
|
protected function token(LookaheadIterator $tokens, bool $kept = true) |
350
|
|
|
{ |
351
|
|
|
$tokenName = $tokens->current()->getValue(1); |
352
|
|
|
|
353
|
|
|
$name = $this->transitionalRuleCounter++; |
354
|
|
|
|
355
|
|
|
$this->parsedRules[$name] = new Terminal($name, $tokenName, $kept); |
356
|
|
|
$tokens->next(); |
357
|
|
|
|
358
|
|
|
return $name; |
359
|
|
|
} |
360
|
|
|
|
361
|
|
|
/** |
362
|
|
|
* @param LookaheadIterator $tokens |
363
|
|
|
* @return int|string |
364
|
|
|
* @throws GrammarException |
365
|
|
|
*/ |
366
|
|
|
protected function invoke(LookaheadIterator $tokens) |
367
|
|
|
{ |
368
|
|
|
$tokenName = $tokens->current()->getValue(1); |
369
|
|
|
|
370
|
|
|
if (! \array_key_exists($tokenName, $this->rules)) { |
371
|
|
|
$error = \vsprintf('Cannot call rule %s() in rule %s because it does not exist.', [ |
372
|
|
|
$tokenName, |
373
|
|
|
$this->ruleName, |
374
|
|
|
]); |
375
|
|
|
|
376
|
|
|
throw new GrammarException($error, 5); |
377
|
|
|
} |
378
|
|
|
|
379
|
|
|
if ($tokens->getNext()->getName() === Eoi::T_NAME) { |
380
|
|
|
$name = $this->transitionalRuleCounter++; |
381
|
|
|
$this->parsedRules[$name] = new Concatenation($name, [$tokenName]); |
382
|
|
|
} else { |
383
|
|
|
$name = $tokenName; |
384
|
|
|
} |
385
|
|
|
|
386
|
|
|
$tokens->next(); |
387
|
|
|
|
388
|
|
|
return $name; |
389
|
|
|
} |
390
|
|
|
} |
391
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.