Passed
Pull Request — master (#1184)
by Alexey
10:51
created

Parser::mergeTree()   A

Complexity

Conditions 6
Paths 6

Size

Total Lines 31
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 6
eloc 14
c 1
b 0
f 0
nc 6
nop 4
dl 0
loc 31
rs 9.2222
1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * Hoa
7
 *
8
 *
9
 *
10
 *
11
 * BSD 3-Clause License
12
 *
13
 * Copyright © 2007-2017, Hoa community. All rights reserved.
14
 *
15
 * Redistribution and use in source and binary forms, with or without
16
 * modification, are permitted provided that the following conditions are met:
17
 *
18
 * 1. Redistributions of source code must retain the above copyright notice, this
19
 *    list of conditions and the following disclaimer.
20
 *
21
 * 2. Redistributions in binary form must reproduce the above copyright notice,
22
 *    this list of conditions and the following disclaimer in the documentation
23
 *    and/or other materials provided with the distribution.
24
 *
25
 * 3. Neither the name of the copyright holder nor the names of its
26
 *    contributors may be used to endorse or promote products derived from
27
 *    this software without specific prior written permission.
28
 *
29
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
32
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
33
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
35
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
37
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
 */
40
41
namespace JMS\Serializer\Type\Compiler\Llk;
42
43
use Hoa\Iterator;
44
use Hoa\Iterator\Buffer;
45
use JMS\Serializer\Type\Compiler;
46
use JMS\Serializer\Type\Compiler\Exception\UnexpectedToken;
47
48
/**
49
 * Class \JMS\Serializer\Type\Compiler\Llk\Parser.
50
 *
51
 * LL(k) parser.
52
 */
53
class Parser
54
{
55
    /**
56
     * List of pragmas.
57
     *
58
     * @var array
59
     */
60
    protected $_pragmas       = null;
61
62
    /**
63
     * List of skipped tokens.
64
     *
65
     * @var array
66
     */
67
    protected $_skip          = null;
68
69
    /**
70
     * Associative array (token name => token regex), to be defined in
71
     * precedence order.
72
     *
73
     * @var array
74
     */
75
    protected $_tokens        = null;
76
77
    /**
78
     * Rules, to be defined as associative array, name => Rule object.
79
     *
80
     * @var Rule[]
81
     */
82
    protected $_rules         = null;
83
84
    /**
85
     * Lexer iterator.
86
     *
87
     * @var Buffer
88
     */
89
    protected $_tokenSequence = null;
90
91
    /**
92
     * Possible token causing an error.
93
     *
94
     * @var array
95
     */
96
    protected $_errorToken    = null;
97
98
    /**
99
     * Trace of activated rules.
100
     *
101
     * @var array
102
     */
103
    protected $_trace         = [];
104
105
    /**
106
     * Stack of todo list.
107
     *
108
     * @var array
109
     */
110
    protected $_todo          = null;
111
112
    /**
113
     * AST.
114
     *
115
     * @var TreeNode
116
     */
117
    protected $_tree          = null;
118
119
    /**
120
     * Current depth while building the trace.
121
     *
122
     * @var int
123
     */
124
    protected $_depth         = -1;
125
126
127
128
    /**
129
     * Construct the parser.
130
     *
131
     * @param   array  $tokens  Tokens.
132
     * @param   array  $rules   Rules.
133
     * @param   array  $pragmas Pragmas.
134
     */
135
    public function __construct(
136
        array $tokens = [],
137
        array $rules = [],
138
        array $pragmas = []
139
    ) {
140
        $this->_tokens  = $tokens;
141
        $this->_rules   = $rules;
142
        $this->_pragmas = $pragmas;
143
144
        return;
145
    }
146
147
    /**
148
     * Parse :-).
149
     *
150
     * @param   string  $text Text to parse.
151
     * @param   string  $rule The axiom, i.e. root rule.
152
     * @param   bool    $tree Whether build tree or not.
153
     *
154
     * @return  mixed
155
     *
156
     * @throws  UnexpectedToken
157
     */
158
    final public function parse(string $text, ?string $rule = null, bool $tree = true)
159
    {
160
        $k = 1024;
161
162
        if (isset($this->_pragmas['parser.lookahead'])) {
163
            $k = max(0, intval($this->_pragmas['parser.lookahead']));
164
        }
165
166
        $lexer                = new Lexer($this->_pragmas);
167
        $this->_tokenSequence = new Iterator\Buffer(
168
            $lexer->lexMe($text, $this->_tokens),
169
            $k
170
        );
171
        $this->_tokenSequence->rewind();
172
173
        $this->_errorToken = null;
174
        $this->_trace      = [];
175
        $this->_todo       = [];
176
177
        if (false === array_key_exists($rule, $this->_rules)) {
178
            $rule = $this->getRootRule();
179
        }
180
181
        $closeRule   = new Rule\Ekzit($rule, 0);
182
        $openRule    = new Rule\Entry($rule, 0, [$closeRule]);
183
        $this->_todo = [$closeRule, $openRule];
184
185
        do {
186
            $out = $this->unfold();
187
188
            if (null !== $out &&
189
                'EOF' === $this->_tokenSequence->current()['token']) {
190
                break;
191
            }
192
193
            if (false === $this->backtrack()) {
194
                $token  = $this->_errorToken;
195
196
                if (null === $this->_errorToken) {
197
                    $token = $this->_tokenSequence->current();
198
                }
199
200
                $offset = $token['offset'];
201
                $line   = 1;
202
                $column = 1;
203
204
                if (!empty($text)) {
205
                    if (0 === $offset) {
206
                        $leftnl = 0;
207
                    } else {
208
                        $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0;
209
                    }
210
211
                    $rightnl = strpos($text, "\n", $offset);
212
                    $line    = substr_count($text, "\n", 0, $leftnl + 1) + 1;
213
                    $column  = $offset - $leftnl + (0 === $leftnl);
214
215
                    if (false !== $rightnl) {
216
                        $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n");
217
                    }
218
                }
219
220
                throw new Compiler\Exception\UnexpectedToken(
221
                    'Unexpected token "%s" (%s) at line %d and column %d:' .
222
                    "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑',
223
                    0,
224
                    [
225
                        $token['value'],
226
                        $token['token'],
227
                        $line,
228
                        $column,
229
                        $text,
230
                    ],
231
                    $line,
232
                    $column
233
                );
234
            }
235
        } while (true);
236
237
        if (false === $tree) {
238
            return true;
239
        }
240
241
        $tree = $this->_buildTree();
242
243
        if (!($tree instanceof TreeNode)) {
244
            throw new Compiler\Exception(
245
                'Parsing error: cannot build AST, the trace is corrupted.',
246
                1
247
            );
248
        }
249
250
        return $this->_tree = $tree;
251
    }
252
253
    /**
254
     * Unfold trace.
255
     *
256
     * @return  mixed
257
     */
258
    final protected function unfold()
259
    {
260
        while (0 < count($this->_todo)) {
261
            $rule = array_pop($this->_todo);
262
263
            if ($rule instanceof Rule\Ekzit) {
264
                $rule->setDepth($this->_depth);
265
                $this->_trace[] = $rule;
266
267
                if (false === $rule->isTransitional()) {
268
                    --$this->_depth;
269
                }
270
            } else {
271
                $ruleName = $rule->getRule();
272
                $next     = $rule->getData();
273
                $zeRule   = $this->_rules[$ruleName];
274
                $out      = $this->_parse($zeRule, $next);
275
276
                if (false === $out && false === $this->backtrack()) {
277
                    return null;
278
                }
279
            }
280
        }
281
282
        return true;
283
    }
284
285
    /**
286
     * Parse current rule.
287
     *
288
     * @param   Rule  $zeRule Current rule.
289
     * @param   int                     $next   Next rule index.
290
     */
291
    final protected function _parse(Rule $zeRule, int $next): bool
292
    {
293
        $regex = null;
294
        if ($zeRule instanceof Rule\Token) {
295
            $name = $this->_tokenSequence->current()['token'];
296
297
            if ($zeRule->getTokenName() !== $name) {
298
                return false;
299
            }
300
301
            $value = $this->_tokenSequence->current()['value'];
302
303
            if (0 <= $unification = $zeRule->getUnificationIndex()) {
304
                for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) {
305
                    $trace = $this->_trace[$i];
306
307
                    if ($trace instanceof Rule\Entry) {
308
                        if (false === $trace->isTransitional()) {
309
                            if ($trace->getDepth() <= $this->_depth) {
310
                                break;
311
                            }
312
313
                            --$skip;
314
                        }
315
                    } elseif ($trace instanceof Rule\Ekzit &&
316
                              false === $trace->isTransitional()) {
317
                        $skip += $trace->getDepth() > $this->_depth;
318
                    }
319
320
                    if (0 < $skip) {
321
                        continue;
322
                    }
323
324
                    if ($trace instanceof Rule\Token &&
325
                        $unification === $trace->getUnificationIndex() &&
326
                        $value !== $trace->getValue()) {
327
                        return false;
328
                    }
329
                }
330
            }
331
332
            $current = $this->_tokenSequence->current();
333
334
            $namespace = $current['namespace'];
335
            $offset    = $current['offset'];
336
337
            $zzeRule   = clone $zeRule;
338
            $zzeRule->setValue($value);
339
            $zzeRule->setOffset($offset);
340
            $zzeRule->setNamespace($namespace);
341
342
            if (isset($this->_tokens[$namespace][$name])) {
343
                $zzeRule->setRepresentation($this->_tokens[$namespace][$name]);
344
            } else {
345
                foreach ($this->_tokens[$namespace] as $_name => $regex) {
346
                    if (false === $pos = strpos($_name, ':')) {
347
                        continue;
348
                    }
349
350
                    $_name = substr($_name, 0, $pos);
351
352
                    if ($_name === $name) {
353
                        break;
354
                    }
355
                }
356
357
                $zzeRule->setRepresentation($regex);
358
            }
359
360
            array_pop($this->_todo);
361
            $this->_trace[] = $zzeRule;
362
            $this->_tokenSequence->next();
363
            $this->_errorToken = $this->_tokenSequence->current();
364
365
            return true;
366
        } elseif ($zeRule instanceof Rule\Concatenation) {
367
            if (false === $zeRule->isTransitional()) {
368
                ++$this->_depth;
369
            }
370
371
            $this->_trace[] = new Rule\Entry(
372
                $zeRule->getName(),
373
                0,
374
                null,
375
                $this->_depth
376
            );
377
            $children = $zeRule->getChildren();
378
379
            for ($i = count($children) - 1; $i >= 0; --$i) {
380
                $nextRule      = $children[$i];
381
                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
382
                $this->_todo[] = new Rule\Entry($nextRule, 0);
383
            }
384
385
            return true;
386
        } elseif ($zeRule instanceof Rule\Choice) {
387
            $children = $zeRule->getChildren();
388
389
            if ($next >= count($children)) {
390
                return false;
391
            }
392
393
            if (false === $zeRule->isTransitional()) {
394
                ++$this->_depth;
395
            }
396
397
            $this->_trace[] = new Rule\Entry(
398
                $zeRule->getName(),
399
                $next,
400
                $this->_todo,
401
                $this->_depth
402
            );
403
            $nextRule      = $children[$next];
404
            $this->_todo[] = new Rule\Ekzit($nextRule, 0);
405
            $this->_todo[] = new Rule\Entry($nextRule, 0);
406
407
            return true;
408
        } elseif ($zeRule instanceof Rule\Repetition) {
409
            $nextRule = $zeRule->getChildren();
410
411
            if (0 === $next) {
412
                $name = $zeRule->getName();
413
                $min  = $zeRule->getMin();
414
415
                if (false === $zeRule->isTransitional()) {
416
                    ++$this->_depth;
417
                }
418
419
                $this->_trace[] = new Rule\Entry(
420
                    $name,
421
                    $min,
422
                    null,
423
                    $this->_depth
424
                );
425
                array_pop($this->_todo);
426
                $this->_todo[]  = new Rule\Ekzit(
427
                    $name,
428
                    $min,
429
                    $this->_todo
430
                );
431
432
                for ($i = 0; $i < $min; ++$i) {
433
                    $this->_todo[] = new Rule\Ekzit($nextRule, 0);
434
                    $this->_todo[] = new Rule\Entry($nextRule, 0);
435
                }
436
437
                return true;
438
            } else {
439
                $max = $zeRule->getMax();
440
441
                if (-1 !== $max && $next > $max) {
442
                    return false;
443
                }
444
445
                $this->_todo[] = new Rule\Ekzit(
446
                    $zeRule->getName(),
447
                    $next,
448
                    $this->_todo
449
                );
450
                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
451
                $this->_todo[] = new Rule\Entry($nextRule, 0);
452
453
                return true;
454
            }
455
        }
456
457
        return false;
458
    }
459
460
    /**
461
     * Backtrack the trace.
462
     */
463
    final protected function backtrack(): bool
464
    {
465
        $found = false;
466
467
        do {
468
            $last = array_pop($this->_trace);
469
470
            if ($last instanceof Rule\Entry) {
471
                $zeRule = $this->_rules[$last->getRule()];
472
                $found  = $zeRule instanceof Rule\Choice;
473
            } elseif ($last instanceof Rule\Ekzit) {
474
                $zeRule = $this->_rules[$last->getRule()];
475
                $found  = $zeRule instanceof Rule\Repetition;
476
            } elseif ($last instanceof Rule\Token) {
477
                $this->_tokenSequence->previous();
478
479
                if (false === $this->_tokenSequence->valid()) {
480
                    return false;
481
                }
482
            }
483
        } while (0 < count($this->_trace) && false === $found);
484
485
        if (false === $found) {
486
            return false;
487
        }
488
489
        $rule          = $last->getRule();
490
        $next          = $last->getData() + 1;
491
        $this->_depth  = $last->getDepth();
492
        $this->_todo   = $last->getTodo();
493
        $this->_todo[] = new Rule\Entry($rule, $next);
494
495
        return true;
496
    }
497
498
    /**
499
     * Build AST from trace.
500
     * Walk through the trace iteratively and recursively.
501
     *
502
     * @param   int      $i         Current trace index.
503
     * @param   array    &$children Collected children.
504
     *
505
     * @return TreeNode|int
506
     */
507
    final protected function _buildTree(int $i = 0, array &$children = [])
508
    {
509
        $max = count($this->_trace);
510
511
        while ($i < $max) {
512
            $trace = $this->_trace[$i];
513
514
            if ($trace instanceof Rule\Entry) {
515
                $ruleName  = $trace->getRule();
516
                $rule      = $this->_rules[$ruleName];
517
                $isRule    = false === $trace->isTransitional();
518
                $nextTrace = $this->_trace[$i + 1];
519
                $id        = $rule->getNodeId();
520
521
                // Optimization: Skip empty trace sequence.
522
                if ($nextTrace instanceof Rule\Ekzit &&
523
                    $ruleName === $nextTrace->getRule()) {
524
                    $i += 2;
525
526
                    continue;
527
                }
528
529
                if (true === $isRule) {
530
                    $children[] = $ruleName;
531
                }
532
533
                if (null !== $id) {
534
                    $children[] = [
535
                        'id'      => $id,
536
                        'options' => $rule->getNodeOptions(),
537
                    ];
538
                }
539
540
                $i = $this->_buildTree($i + 1, $children);
541
542
                if (false === $isRule) {
543
                    continue;
544
                }
545
546
                $handle   = [];
547
                $cId      = null;
548
                $cOptions = [];
549
550
                do {
551
                    $pop = array_pop($children);
552
553
                    if (true === is_object($pop)) {
554
                        $handle[] = $pop;
555
                    } elseif (true === is_array($pop) && null === $cId) {
556
                        $cId      = $pop['id'];
557
                        $cOptions = $pop['options'];
558
                    } elseif ($ruleName === $pop) {
559
                        break;
560
                    }
561
                } while (null !== $pop);
562
563
                if (null === $cId) {
564
                    $cId      = $rule->getDefaultId();
565
                    $cOptions = $rule->getDefaultOptions();
566
                }
567
568
                if (null === $cId) {
569
                    for ($j = count($handle) - 1; $j >= 0; --$j) {
570
                        $children[] = $handle[$j];
571
                    }
572
573
                    continue;
574
                }
575
576
                if (true === in_array('M', $cOptions) &&
577
                    true === $this->mergeTree($children, $handle, $cId)) {
578
                    continue;
579
                }
580
581
                if (true === in_array('m', $cOptions) &&
582
                    true === $this->mergeTree($children, $handle, $cId, true)) {
583
                    continue;
584
                }
585
586
                $cTree = new TreeNode($id ?: $cId);
587
588
                foreach ($handle as $child) {
589
                    $child->setParent($cTree);
590
                    $cTree->prependChild($child);
591
                }
592
593
                $children[] = $cTree;
594
            } elseif ($trace instanceof Rule\Ekzit) {
595
                return $i + 1;
596
            } else {
597
                if (false === $trace->isKept()) {
598
                    ++$i;
599
600
                    continue;
601
                }
602
603
                $child = new TreeNode('token', [
604
                    'token'     => $trace->getTokenName(),
605
                    'value'     => $trace->getValue(),
606
                    'namespace' => $trace->getNamespace(),
607
                    'offset'    => $trace->getOffset(),
608
                ]);
609
610
                $children[] = $child;
611
                ++$i;
612
            }
613
        }
614
615
        return $children[0];
616
    }
617
618
    /**
619
     * Try to merge directly children into an existing node.
620
     *
621
     * @param   array   &$children Current children being gathering.
622
     * @param   array   &$handle   Children of the new node.
623
     * @param   string  $cId       Node ID.
624
     * @param   bool    $recursive Whether we should merge recursively or
625
     *                             not.
626
     */
627
    final protected function mergeTree(
628
        array &$children,
629
        array &$handle,
630
        string $cId,
631
        bool $recursive = false
632
    ): bool {
633
        end($children);
634
        $last = current($children);
635
636
        if (!is_object($last)) {
637
            return false;
638
        }
639
640
        if ($cId !== $last->getId()) {
641
            return false;
642
        }
643
644
        if (true === $recursive) {
645
            foreach ($handle as $child) {
646
                $this->mergeTreeRecursive($last, $child);
647
            }
648
649
            return true;
650
        }
651
652
        foreach ($handle as $child) {
653
            $last->appendChild($child);
654
            $child->setParent($last);
655
        }
656
657
        return true;
658
    }
659
660
    /**
661
     * Merge recursively.
662
     * Please, see self::mergeTree() to know the context.
663
     *
664
     * @param   TreeNode  $node    Node that receives.
665
     * @param   TreeNode  $newNode Node to merge.
666
     */
667
    final protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode): void
668
    {
669
        $nNId = $newNode->getId();
670
671
        if ('token' === $nNId) {
672
            $node->appendChild($newNode);
673
            $newNode->setParent($node);
674
675
            return;
676
        }
677
678
        $children = $node->getChildren();
679
        end($children);
680
        $last     = current($children);
681
682
        if ($last->getId() !== $nNId) {
683
            $node->appendChild($newNode);
684
            $newNode->setParent($node);
685
686
            return;
687
        }
688
689
        foreach ($newNode->getChildren() as $child) {
690
            $this->mergeTreeRecursive($last, $child);
691
        }
692
693
        return;
694
    }
695
696
    /**
697
     * Get AST.
698
     */
699
    final public function getTree(): TreeNode
700
    {
701
        return $this->_tree;
702
    }
703
704
    /**
705
     * Get trace.
706
     *
707
     * @return  array
708
     */
709
    final public function getTrace(): array
710
    {
711
        return $this->_trace;
712
    }
713
714
    /**
715
     * Get pragmas.
716
     *
717
     * @return  array
718
     */
719
    final public function getPragmas(): array
720
    {
721
        return $this->_pragmas;
722
    }
723
724
    /**
725
     * Get tokens.
726
     *
727
     * @return  array
728
     */
729
    final public function getTokens(): array
730
    {
731
        return $this->_tokens;
732
    }
733
734
    /**
735
     * Get the lexer iterator.
736
     */
737
    final public function getTokenSequence(): Buffer
738
    {
739
        return $this->_tokenSequence;
740
    }
741
742
    /**
743
     * Get rule by name.
744
     *
745
     * @param   string  $name Rule name.
746
     */
747
    final public function getRule(string $name): Rule
748
    {
749
        if (!isset($this->_rules[$name])) {
750
            return null;
0 ignored issues
show
Bug Best Practice introduced by
The expression return null returns the type null which is incompatible with the type-hinted return JMS\Serializer\Type\Compiler\Llk\Rule.
Loading history...
751
        }
752
753
        return $this->_rules[$name];
754
    }
755
756
    /**
757
     * Get rules.
758
     *
759
     * @return Rule[]
760
     */
761
    final public function getRules(): array
762
    {
763
        return $this->_rules;
764
    }
765
766
    /**
767
     * Get root rule.
768
     */
769
    final public function getRootRule(): string
770
    {
771
        \assert([] !== $this->_rules);
772
773
        foreach ($this->_rules as $rule => $_) {
774
            if (!is_int($rule)) {
775
                break;
776
            }
777
        }
778
779
        return $rule;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $rule seems to be defined by a foreach iteration on line 773. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
780
    }
781
}
782