Parser::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 5
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 2
1
<?php
2
/**
3
 * This file is part of PHP-Yacc package.
4
 *
5
 * For the full copyright and license information, please view the LICENSE
6
 * file that was distributed with this source code.
7
 */
8
declare(strict_types=1);
9
10
namespace PhpYacc\Yacc;
11
12
use PhpYacc\Exception\ParseException;
13
use PhpYacc\Grammar\Context;
14
use PhpYacc\Grammar\Symbol;
15
16
/**
17
 * Class Parser.
18
 */
19
class Parser
20
{
21
    /**
22
     * @var Context
23
     */
24
    protected $context;
25
26
    /**
27
     * @var Lexer
28
     */
29
    protected $lexer;
30
31
    /**
32
     * @var MacroSet
33
     */
34
    protected $macros;
35
36
    /**
37
     * @var Symbol
38
     */
39
    protected $eofToken;
40
41
    /**
42
     * @var Symbol
43
     */
44
    protected $errorToken;
45
46
    /**
47
     * @var Symbol
48
     */
49
    protected $startPrime;
50
51
    /**
52
     * @var int
53
     */
54
    protected $currentPrecedence = 0;
55
56
    /**
57
     * Parser constructor.
58
     *
59
     * @param Lexer    $lexer
60
     * @param MacroSet $macros
61
     */
62
    public function __construct(Lexer $lexer, MacroSet $macros)
63
    {
64
        $this->lexer = $lexer;
65
        $this->macros = $macros;
66
    }
67
68
    /**
69
     * @param string       $code
70
     * @param Context|null $context
71
     *
72
     * @throws ParseException
73
     * @throws \PhpYacc\Exception\LexingException
74
     *
75
     * @return Context
76
     */
77
    public function parse(string $code, Context $context = null)
78
    {
79
        $this->context = $context ?: new Context();
80
81
        $this->lexer->startLexing($code, $this->context->filename);
82
83
        $this->doDeclaration();
84
        $this->doGrammar();
85
86
        $this->context->eofToken = $this->eofToken;
87
        $this->context->errorToken = $this->errorToken;
88
        $this->context->startPrime = $this->startPrime;
89
90
        $this->context->finish();
91
92
        return $this->context;
93
    }
94
95
    /**
96
     * @param array $symbols
97
     * @param int   $n
98
     * @param $delm
99
     * @param array $attribute
100
     *
101
     * @throws ParseException
102
     * @throws \PhpYacc\Exception\LexingException
103
     *
104
     * @return string
105
     */
106
    protected function copyAction(array $symbols, int $n, $delm, array $attribute): string
107
    {
108
        $tokens = [];
109
        $ct = 0;
110
111
        while (($token = $this->lexer->getRawToken())->getValue() !== $delm || $ct > 0) {
112
            switch ($token->getValue()) {
113
                case "\0":
114
                    throw ParseException::unexpected($token, Token::decode($delm));
115
                case '{':
116
                    $ct++;
117
                    break;
118
                case '}':
119
                    $ct--;
120
                    break;
121
            }
122
            $tokens[] = $token;
123
        }
124
125
        $expanded = $this->macros->apply($this->context, $symbols, $tokens, $n, $attribute);
126
127
        $action = \implode('', \array_map(function (Token $token) {
128
            return $token->getValue();
129
        }, $expanded));
130
131
        return $action;
132
    }
133
134
    /**
135
     * @throws ParseException
136
     * @throws \PhpYacc\Exception\LexingException
137
     */
138
    protected function doType()
139
    {
140
        $type = $this->getType();
141
        while (true) {
142
            if (($token = $this->lexer->getToken())->getValue() === ',') {
143
                continue;
144
            }
145
            if ($token->getType() !== Token::T_NAME && $token->getValue()[0] !== "'") {
146
                break;
147
            }
148
            $p = $this->context->internSymbol($token->getValue(), false);
149
            if ($type !== null) {
150
                $p->type = $type;
151
            }
152
        }
153
        $this->lexer->ungetToken();
154
    }
155
156
    /**
157
     * @throws ParseException
158
     * @throws \PhpYacc\Exception\LexingException
159
     */
160
    protected function doGrammar()
161
    {
162
        $attribute = [];
163
        $buffer = [null];
164
        $production = new Production('', 0);
165
166
        $production->body = [$this->startPrime];
167
        $this->context->addGram($production);
168
169
        $token = $this->lexer->getToken();
170
171
        while ($token->getType() !== Token::T_MARK && $token->getType() !== Token::T_EOF) {
172
            if ($token->getType() === Token::T_NAME) {
173
                if ($this->lexer->peek()->getValue()[0] === '@') {
174
                    $attribute[0] = $token->getValue();
175
                    $this->lexer->getToken();
176
                    $token = $this->lexer->getToken();
177
                } else {
178
                    $attribute[0] = null;
179
                }
180
                $buffer[0] = $this->context->internSymbol($token->getValue(), false);
181
                $attribute[1] = null;
182
                if ($buffer[0]->isTerminal) {
183
                    throw new \RuntimeException("Non terminal symbol expected: $token");
184
                } elseif (($tmp = $this->lexer->getToken())->getType() !== Token::T_COLON) {
185
                    throw new \RuntimeException("':' expected, $tmp found");
186
                }
187
                if ($this->context->startSymbol === null) {
188
                    $this->context->startSymbol = $buffer[0];
189
                }
190
            } elseif ($token->getValue()[0] === '|') {
191
                if (!$buffer[0]) {
192
                    throw new \RuntimeException("Syntax Error, unexpected $token");
193
                }
194
                $attribute[1] = null;
195
            } elseif ($token->getType() === Token::T_BEGIN_INC) {
196
                $this->doCopy();
197
                $token = $this->lexer->getToken();
198
                continue;
199
            } else {
200
                throw new \RuntimeException("Syntax Error, unexpected $token");
201
            }
202
203
            $lastTerm = $this->startPrime;
204
            $action = null;
205
            $pos = 0;
206
            $i = 1;
207
            while (true) {
208
                $token = $this->lexer->getToken();
209
210
                if ($token->getValue()[0] === '=') {
211
                    $pos = $token->getLine();
212
                    if (($token = $this->lexer->getToken())->getValue()[0] === '{') {
213
                        $pos = $token->getLine();
214
                        $action = $this->copyAction($buffer, $i - 1, '}', $attribute);
215
                    } else {
216
                        $this->lexer->ungetToken();
217
                        $action = $this->copyAction($buffer, $i - 1, ';', $attribute);
218
                    }
219
                } elseif ($token->getValue()[0] === '{') {
220
                    $pos = $token->getLine();
221
                    $action = $this->copyAction($buffer, $i - 1, '}', $attribute);
222
                } elseif ($token->getType() === Token::T_PRECTOK) {
223
                    $lastTerm = $this->context->internSymbol($this->lexer->getToken()->getValue(), false);
224
                } elseif ($token->getType() === Token::T_NAME && $this->lexer->peek()->getType() === Token::T_COLON) {
225
                    break;
226
                } elseif ($token->getType() === Token::T_NAME && $this->lexer->peek()->getValue()[0] === '@') {
227
                    $attribute[$i] = $token->getValue();
228
                    $this->lexer->getToken();
229
                } elseif ($token->getType() === Token::T_NAME || $token->getType() === Token::T_STRING) {
230
                    if ($action) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $action of type null|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
231
                        $g = $this->context->genNonTerminal();
232
                        $production = new Production($action, $pos);
233
                        $production->body = [$g];
234
                        $buffer[$i++] = $g;
235
                        $attribute[$i] = null;
236
                        $production->link = $production->body[0]->value;
237
                        $g->value = $this->context->addGram($production);
238
                    }
239
                    $buffer[$i++] = $w = $this->context->internSymbol($token->getValue(), false);
240
                    $attribute[$i] = null;
241
                    if ($w->isTerminal) {
242
                        $lastTerm = $w;
243
                    }
244
                    $action = null;
245
                } else {
246
                    break;
247
                }
248
            }
249
            if (!$action) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $action of type null|string is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
250
                if ($i > 1 && $buffer[0]->type !== null && $buffer[0]->type !== $buffer[1]->type) {
251
                    throw new ParseException('Stack types are different');
252
                }
253
            }
254
            $production = new Production($action, $pos);
255
256
            $production->body = \array_slice($buffer, 0, $i);
257
            $production->precedence = $lastTerm->precedence;
258
            $production->associativity = $lastTerm->associativity & Symbol::MASK;
259
            $production->link = $production->body[0]->value;
260
            $buffer[0]->value = $this->context->addGram($production);
261
262
            if ($token->getType() === Token::T_SEMICOLON) {
263
                $token = $this->lexer->getToken();
264
            }
265
        }
266
267
        $this->context->gram(0)->body[] = $this->context->startSymbol;
268
        $this->startPrime->value = null;
269
        foreach ($this->context->nonterminals as $key => $symbol) {
270
            if ($symbol === $this->startPrime) {
271
                continue;
272
            }
273
            if (($j = $symbol->value) === null) {
274
                throw new ParseException("Non terminal {$symbol->name} used, but not defined");
275
            }
276
            $k = null;
277
            while ($j) {
278
                $w = $j->link;
279
                $j->link = $k;
280
                $k = $j;
281
                $j = $w;
282
            }
283
            $symbol->value = $k;
284
        }
285
    }
286
287
    /**
288
     * @throws ParseException
289
     * @throws \PhpYacc\Exception\LexingException
290
     */
291
    protected function doDeclaration()
292
    {
293
        $this->eofToken = $this->context->internSymbol('EOF', true);
294
        $this->eofToken->value = 0;
295
        $this->errorToken = $this->context->internSymbol('error', true);
296
        $this->startPrime = $this->context->internSymbol('$start', false);
297
298
        while (($token = $this->lexer->getToken())->getType() !== Token::T_MARK) {
299
            switch ($token->getType()) {
300
                case Token::T_TOKEN:
301
                case Token::T_RIGHT:
302
                case Token::T_LEFT:
303
                case Token::T_NON_ASSOC:
304
                    $this->doToken($token);
305
                    break;
306
307
                case Token::T_BEGIN_INC:
308
                    $this->doCopy();
309
                    break;
310
311
                case Token::T_UNION:
312
                    $this->doUnion();
313
                    $this->context->unioned = true;
314
                    break;
315
316
                case Token::T_TYPE:
317
                    $this->doType();
318
                    break;
319
320
                case Token::T_EXPECT:
321
                    $token = $this->lexer->getToken();
322
                    if ($token->getType() === Token::T_NUMBER) {
323
                        $this->context->expected = (int) $token->getValue();
324
                    } else {
325
                        throw ParseException::unexpected($token, Token::T_NUMBER);
326
                    }
327
                    break;
328
329
                case Token::T_START:
330
                    $token = $this->lexer->getToken();
331
                    $this->context->startSymbol = $this->context->internSymbol($token->getValue(), false);
332
                    break;
333
334
                case Token::T_PURE_PARSER:
335
                    $this->context->pureFlag = true;
336
                    break;
337
338
                case Token::T_EOF:
339
                    throw new ParseException('No grammar given');
340
                default:
341
                    throw new ParseException("Syntax error, unexpected {$token->getValue()}");
342
            }
343
        }
344
345
        $base = 256;
346
        foreach ($this->context->terminals as $terminal) {
347
            if ($terminal === $this->context->eofToken) {
348
                continue;
349
            }
350
            if ($terminal->value < 0) {
351
                $terminal->value = $base++;
352
            }
353
        }
354
    }
355
356
    /**
357
     * @param Token $tag
358
     *
359
     * @throws ParseException
360
     * @throws \PhpYacc\Exception\LexingException
361
     */
362
    protected function doToken(Token $tag)
363
    {
364
        $preIncr = 0;
365
        $type = $this->getType();
366
        $token = $this->lexer->getToken();
367
368
        while ($token->getType() === Token::T_NAME || $token->getType() === Token::T_STRING) {
369
            $p = $this->context->internSymbol($token->getValue(), true);
370
371
            if ($type) {
372
                $p->type = $type;
373
            }
374
375
            switch ($tag->getType()) {
376
                case Token::T_LEFT:
377
                    $p->associativity |= Symbol::LEFT;
378
                    break;
379
                case Token::T_RIGHT:
380
                    $p->associativity |= Symbol::RIGHT;
381
                    break;
382
                case Token::T_NON_ASSOC:
383
                    $p->associativity |= Symbol::NON;
384
                    break;
385
            }
386
387
            if ($tag->getType() !== Token::T_TOKEN) {
388
                $p->precedence = $this->currentPrecedence;
389
                $preIncr = 1;
390
            }
391
392
            $token = $this->lexer->getToken();
393
            if ($token->getType() === Token::T_NUMBER) {
394
                if ($p->value === null) {
395
                    $p->value = (int) $token->getValue();
396
                } else {
397
                    throw new ParseException(
398
                        sprintf('Unexpected Token::NUMBER as %s already has a value', $p->name)
399
                    );
400
                }
401
                $token = $this->lexer->getToken();
402
            }
403
404
            if ($token->getType() === Token::T_COMMA) {
405
                $token = $this->lexer->getToken();
406
            }
407
        }
408
409
        $this->lexer->ungetToken();
410
        $this->currentPrecedence += $preIncr;
411
    }
412
413
    /**
414
     * @throws ParseException
415
     * @throws \PhpYacc\Exception\LexingException
416
     *
417
     * @return null|Symbol
418
     */
419
    protected function getType()
420
    {
421
        $token = $this->lexer->getToken();
422
423
        if ($token->getValue()[0] !== '<') {
424
            $this->lexer->ungetToken();
425
426
            return;
427
        }
428
429
        $ct = 1;
430
        $p = '';
431
        $token = $this->lexer->getToken();
432
433
        while (true) {
434
            switch ($token->getValue()[0]) {
435
                case "\n":
436
                case "\0":
437
                    throw ParseException::unexpected($token, '>');
438
                case '<':
439
                    $ct++;
440
                    break;
441
                case '>':
442
                    $ct--;
443
                    break;
444
            }
445
446
            if ($ct === 0) {
447
                break;
448
            }
449
450
            $p .= $token->getValue();
451
            $token = $this->lexer->getRawToken();
452
        }
453
        $this->context->unioned = true;
454
455
        return $this->context->intern($p);
456
    }
457
458
    /**
459
     * @return void
460
     */
461
    protected function doCopy()
462
    {
463
        // TODO
464
    }
465
466
    /**
467
     * @return void
468
     */
469
    protected function doUnion()
470
    {
471
        // TODO
472
    }
473
}
474