Completed
Push — master ( 598945...316866 )
by Kirill
09:09
created

Analyzer::analyze()   B

Complexity

Conditions 7
Paths 13

Size

Total Lines 35

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 0
Metric Value
dl 0
loc 35
ccs 0
cts 27
cp 0
rs 8.4266
c 0
b 0
f 0
cc 7
nc 13
nop 0
crap 56
1
<?php
2
/**
3
 * This file is part of Railt package.
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 */
8
declare(strict_types=1);
9
10
namespace Railt\Compiler\Grammar;
11
12
use Railt\Compiler\Exception\GrammarException;
13
use Railt\Compiler\Grammar\Builder\AbstractBuilder;
14
use Railt\Compiler\Grammar\Builder\Alternation;
15
use Railt\Compiler\Grammar\Builder\Concatenation;
16
use Railt\Compiler\Grammar\Builder\Repetition;
17
use Railt\Compiler\Grammar\Builder\Terminal;
18
use Railt\Compiler\Grammar\Delegate\RuleDelegate;
19
use Railt\Lexer\LexerInterface;
20
use Railt\Lexer\Result\Eoi;
21
use Railt\Parser\Rule\Rule;
22
23
/**
24
 * Analyze rules and transform them into atomic rules operations.
25
 */
26
class Analyzer
27
{
28
    /**
29
     * Tokens representing rules.
30
     * @var array
31
     */
32
    protected $tokens;
33
34
    /**
35
     * @var array|RuleDelegate[]
36
     */
37
    protected $rules = [];
38
39
    /**
40
     * Parsed rules.
41
     * @var array|AbstractBuilder[]
42
     */
43
    protected $parsedRules;
44
45
    /**
46
     * Counter to auto-name transitional rules.
47
     * @var int
48
     */
49
    protected $transitionalRuleCounter = 0;
50
51
    /**
52
     * Rule name being analyzed.
53
     * @var string
54
     */
55
    private $ruleName;
56
57
    /**
58
     * Analyzer constructor.
59
     * @param LexerInterface $lexer
60
     */
61
    public function __construct(LexerInterface $lexer)
62
    {
63
        $this->tokens = $lexer->getTokenDefinitions();
0 ignored issues
show
Documentation Bug introduced by
It seems like $lexer->getTokenDefinitions() can also be of type object<Railt\Lexer\iterable>. However, the property $tokens is declared as type array. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
64
    }
65
66
    /**
67
     * @param RuleDelegate $delegate
68
     */
69
    public function addRuleDelegate(RuleDelegate $delegate): void
70
    {
71
        $this->rules[$delegate->getRuleName()] = $delegate;
72
    }
73
74
    /**
75
     * Build the analyzer of the rules (does not analyze the rules).
76
     *
77
     * @return Rule[]|\Traversable
78
     * @throws GrammarException
79
     */
80
    public function analyze(): iterable
81
    {
82
        if (\count($this->rules) === 0) {
83
            throw new GrammarException('No rules specified');
84
        }
85
86
        $this->parsedRules = [];
87
88
        foreach ($this->rules as $delegate) {
89
            $this->ruleName = $delegate->getRuleName();
90
            $nodeId         = $delegate->isKept() ? $delegate->getRuleName() : null;
91
92
            $pNodeId = $nodeId;
93
            $rule    = $this->rule($delegate->getInnerTokens(), $pNodeId);
94
95
            if ($rule === null) {
96
                $error = \sprintf('Error while parsing rule %s.', $delegate->getRuleName());
97
                throw new GrammarException($error, 1);
98
            }
99
100
            $zeRule = $this->parsedRules[$rule];
101
            $zeRule->setName($delegate->getRuleName());
102
103
            if ($nodeId !== null) {
104
                $zeRule->setDefaultId($nodeId);
105
            }
106
107
            unset($this->parsedRules[$rule]);
108
            $this->parsedRules[$delegate->getRuleName()] = $zeRule;
109
        }
110
111
        foreach ($this->parsedRules as $builder) {
112
            yield $builder->build();
113
        }
114
    }
115
116
    /**
117
     * Implementation of “rule”.
118
     *
119
     * @param LookaheadIterator $tokens
120
     * @param string|null $pNodeId
121
     * @return string|int|null
122
     * @throws GrammarException
123
     */
124
    protected function rule(LookaheadIterator $tokens, &$pNodeId)
125
    {
126
        return $this->choice($tokens, $pNodeId);
127
    }
128
129
    /**
130
     * Implementation of “choice”.
131
     *
132
     * @param LookaheadIterator $tokens
133
     * @param string|null $pNodeId
134
     * @return string|int|null
135
     * @throws GrammarException
136
     */
137
    protected function choice(LookaheadIterator $tokens, &$pNodeId)
138
    {
139
        $children = [];
140
141
        // concatenation() …
142
        $nNodeId = $pNodeId;
143
        $rule    = $this->concatenation($tokens, $nNodeId);
144
145
        if ($rule === null) {
146
            return null;
147
        }
148
149
        if ($nNodeId !== null) {
150
            $this->parsedRules[$rule]->setNodeId($nNodeId);
151
        }
152
153
        $children[] = $rule;
154
        $others     = false;
155
156
        // … ( ::or:: concatenation() )*
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
157
        while ($tokens->current()->getName() === Parser::T_OR) {
158
            $tokens->next();
159
            $others  = true;
160
            $nNodeId = $pNodeId;
161
            $rule    = $this->concatenation($tokens, $nNodeId);
162
163
            if ($rule === null) {
164
                return null;
165
            }
166
167
            if ($nNodeId !== null) {
168
                $this->parsedRules[$rule]->setNodeId($nNodeId);
169
            }
170
171
            $children[] = $rule;
172
        }
173
174
        $pNodeId = null;
175
176
        if ($others === false) {
177
            return $rule;
178
        }
179
180
        $name = $this->transitionalRuleCounter++;
181
182
        $this->parsedRules[$name] = new Alternation($name, $children);
183
184
        return $name;
185
    }
186
187
    /**
188
     * Implementation of “concatenation”.
189
     *
190
     * @param LookaheadIterator $tokens
191
     * @param string|null $pNodeId
192
     * @return string|int|null
193
     * @throws GrammarException
194
     */
195
    protected function concatenation(LookaheadIterator $tokens, &$pNodeId)
196
    {
197
        $children = [];
198
199
        // repetition() …
200
        $rule = $this->repetition($tokens, $pNodeId);
201
202
        if ($rule === null) {
203
            return null;
204
        }
205
206
        $children[] = $rule;
207
        $others     = false;
208
209
        // … repetition()*
210
        while (null !== $r1 = $this->repetition($tokens, $pNodeId)) {
211
            $children[] = $r1;
212
            $others     = true;
213
        }
214
215
        if ($others === false && $pNodeId === null) {
216
            return $rule;
217
        }
218
219
        $name = $this->transitionalRuleCounter++;
220
221
        $this->parsedRules[$name] = new Concatenation($name, $children);
222
223
        return $name;
224
    }
225
226
    /**
227
     * Implementation of “repetition”.
228
     *
229
     * @param LookaheadIterator $tokens
230
     * @param string|null $pNodeId
231
     * @return string|int|null
232
     * @throws GrammarException
233
     */
234
    protected function repetition(LookaheadIterator $tokens, &$pNodeId)
235
    {
236
        [$min, $max] = [null, null];
0 ignored issues
show
Bug introduced by
The variable $min seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
Bug introduced by
The variable $max seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
237
238
        // simple() …
239
        $children = $this->simple($tokens, $pNodeId);
240
241
        if ($children === null) {
242
            return null;
243
        }
244
245
        // … quantifier()?
0 ignored issues
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
246
        switch ($tokens->current()->getName()) {
247
            case Parser::T_REPEAT_ZERO_OR_ONE:
248
                [$min, $max] = [0, 1];
0 ignored issues
show
Bug introduced by
The variable $min seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
Bug introduced by
The variable $max seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
249
                $tokens->next();
250
                break;
251
252
            case Parser::T_REPEAT_ONE_OR_MORE:
253
                [$min, $max] = [1, -1];
0 ignored issues
show
Bug introduced by
The variable $min seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
Bug introduced by
The variable $max seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
254
                $tokens->next();
255
                break;
256
257
            case Parser::T_REPEAT_ZERO_OR_MORE:
258
                [$min, $max] = [0, -1];
0 ignored issues
show
Bug introduced by
The variable $min seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
Bug introduced by
The variable $max seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
259
                $tokens->next();
260
                break;
261
262
            case Parser::T_REPEAT_N_TO_M:
263
                $min = (int)$tokens->current()->getValue(1);
264
                $max = (int)$tokens->current()->getValue(2);
265
                $tokens->next();
266
                break;
267
268
            case Parser::T_REPEAT_ZERO_TO_M:
269
                [$min, $max] = [0, (int)$tokens->current()->getValue(1)];
0 ignored issues
show
Bug introduced by
The variable $min seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
Bug introduced by
The variable $max seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
270
                $tokens->next();
271
                break;
272
273
            case Parser::T_REPEAT_N_OR_MORE:
274
                [$min, $max] = [(int)$tokens->current()->getValue(1), -1];
0 ignored issues
show
Bug introduced by
The variable $min seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
Bug introduced by
The variable $max seems only to be defined at a later point. Did you maybe move this code here without moving the variable definition?

This error can happen if you refactor code and forget to move the variable initialization.

Let’s take a look at a simple example:

function someFunction() {
    $x = 5;
    echo $x;
}

The above code is perfectly fine. Now imagine that we re-order the statements:

function someFunction() {
    echo $x;
    $x = 5;
}

In that case, $x would be read before it is initialized. This was a very basic example, however the principle is the same for the found issue.

Loading history...
275
                $tokens->next();
276
                break;
277
278
            case Parser::T_REPEAT_EXACTLY_N:
279
                $min = $max = (int)$tokens->current()->getValue(1);
280
                $tokens->next();
281
                break;
282
        }
283
284
        // … <node>?
285
        if ($tokens->current()->getName() === Parser::T_KEPT_NAME) {
286
            $tokens->next();
287
            $pNodeId = $tokens->current()->getValue();
288
            $tokens->next();
289
        }
290
291
        if ($min === null) {
0 ignored issues
show
Bug introduced by
The variable $min does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
292
            return $children;
293
        }
294
295
        if ($max !== -1 && $max < $min) {
296
            $error = 'Upper bound %d must be greater or equal to lower bound %d in rule %s.';
297
            $error = \sprintf($error, $max, $min, $this->ruleName);
0 ignored issues
show
Bug introduced by
The variable $max does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
298
            throw new GrammarException($error, 2);
299
        }
300
301
        $name = $this->transitionalRuleCounter++;
302
303
        $this->parsedRules[$name] = new Repetition($name, $min, $max, $children);
304
305
        return $name;
306
    }
307
308
    /**
309
     * Implementation of “simple”.
310
     *
311
     * @param LookaheadIterator $tokens
312
     * @param int|string|null $pNodeId
313
     * @return string|int|null
314
     * @throws GrammarException
315
     */
316
    protected function simple(LookaheadIterator $tokens, &$pNodeId)
317
    {
318
        switch ($tokens->current()->getName()) {
319
            case Parser::T_GROUP_OPEN:
320
                return $this->group($tokens, $pNodeId);
321
322
            case Parser::T_TOKEN_SKIPPED:
323
                return $this->token($tokens, false);
324
325
            case Parser::T_TOKEN_KEPT:
326
                return $this->token($tokens, true);
327
328
            case Parser::T_INVOKE:
329
                return $this->invoke($tokens);
330
331
            default:
332
                return null;
333
        }
334
    }
335
336
    /**
337
     * @param LookaheadIterator $tokens
338
     * @param int|string|null $pNodeId
339
     * @return int|null|string
340
     * @throws GrammarException
341
     */
342
    protected function group(LookaheadIterator $tokens, &$pNodeId)
343
    {
344
        $tokens->next();
345
        $rule = $this->choice($tokens, $pNodeId);
346
347
        if ($rule === null) {
348
            return null;
349
        }
350
351
        if ($tokens->current()->getName() !== Parser::T_GROUP_CLOSE) {
352
            return null;
353
        }
354
355
        $tokens->next();
356
357
        return $rule;
358
    }
359
360
    /**
361
     * @param LookaheadIterator $tokens
362
     * @param bool $kept
363
     * @return int|string|null
364
     * @throws GrammarException
365
     */
366
    protected function token(LookaheadIterator $tokens, bool $kept = true)
367
    {
368
        $tokenName = $tokens->current()->getValue(1);
369
370
        if (false) { // TODO
0 ignored issues
show
Bug introduced by
Avoid IF statements that are always true or false
Loading history...
371
            $error = \sprintf('Token %s does not exist in rule %s.', $tokenName, $this->ruleName);
372
            throw new GrammarException($error, 4);
373
        }
374
375
        $name = $this->transitionalRuleCounter++;
376
377
        $this->parsedRules[$name] = new Terminal($name, $tokenName, $kept);
378
        $tokens->next();
379
380
        return $name;
381
    }
382
383
    /**
384
     * @param LookaheadIterator $tokens
385
     * @return int|string
386
     * @throws GrammarException
387
     */
388
    protected function invoke(LookaheadIterator $tokens)
389
    {
390
        $tokenName = $tokens->current()->getValue(1);
391
392
        $isEmptyRule = ! \array_key_exists($tokenName, $this->rules);
393
394
        if ($isEmptyRule) {
395
            $error = \vsprintf('Cannot call rule %s() in rule %s because it does not exist.', [
396
                $tokenName,
397
                $this->ruleName,
398
            ]);
399
400
            throw new GrammarException($error, 5);
401
        }
402
403
        if ($tokens->getNext()->getName() === Eoi::T_NAME) {
404
            $name                     = $this->transitionalRuleCounter++;
405
            $this->parsedRules[$name] = new Concatenation($name, [$tokenName]);
406
        } else {
407
            $name = $tokenName;
408
        }
409
410
        $tokens->next();
411
412
        return $name;
413
    }
414
}
415