Completed
Pull Request — master (#1184)
by Alexey
12:56
created

Parser::getRule()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 3
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 7
rs 10
1
<?php
2
/**
3
 * Hoa
4
 *
5
 *
6
 * @license
7
 *
8
 * BSD 3-Clause License
9
 *
10
 * Copyright © 2007-2017, Hoa community. All rights reserved.
11
 *
12
 * Redistribution and use in source and binary forms, with or without
13
 * modification, are permitted provided that the following conditions are met:
14
 *
15
 * 1. Redistributions of source code must retain the above copyright notice, this
16
 *    list of conditions and the following disclaimer.
17
 *
18
 * 2. Redistributions in binary form must reproduce the above copyright notice,
19
 *    this list of conditions and the following disclaimer in the documentation
20
 *    and/or other materials provided with the distribution.
21
 *
22
 * 3. Neither the name of the copyright holder nor the names of its
23
 *    contributors may be used to endorse or promote products derived from
24
 *    this software without specific prior written permission.
25
 *
26
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
30
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
 *
37
 */
38
39
namespace JMS\Serializer\Type\Compiler\Llk;
40
41
use JMS\Serializer\Type\Compiler;
42
use Hoa\Iterator;
43
44
/**
45
 * Class \JMS\Serializer\Type\Compiler\Llk\Parser.
46
 *
47
 * LL(k) parser.
48
 *
49
 * @copyright  Copyright © 2007-2017 Hoa community
50
 * @license    New BSD License
51
 */
52
class Parser
53
{
54
    /**
55
     * List of pragmas.
56
     *
57
     * @var array
58
     */
59
    protected $_pragmas       = null;
60
61
    /**
62
     * List of skipped tokens.
63
     *
64
     * @var array
65
     */
66
    protected $_skip          = null;
67
68
    /**
69
     * Associative array (token name => token regex), to be defined in
70
     * precedence order.
71
     *
72
     * @var array
73
     */
74
    protected $_tokens        = null;
75
76
    /**
77
     * Rules, to be defined as associative array, name => Rule object.
78
     *
79
     * @var Rule[]
80
     */
81
    protected $_rules         = null;
82
83
    /**
84
     * Lexer iterator.
85
     *
86
     * @var \Hoa\Iterator\Buffer
87
     */
88
    protected $_tokenSequence = null;
89
90
    /**
91
     * Possible token causing an error.
92
     *
93
     * @var array
94
     */
95
    protected $_errorToken    = null;
96
97
    /**
98
     * Trace of activated rules.
99
     *
100
     * @var array
101
     */
102
    protected $_trace         = [];
103
104
    /**
105
     * Stack of todo list.
106
     *
107
     * @var array
108
     */
109
    protected $_todo          = null;
110
111
    /**
112
     * AST.
113
     *
114
     * @var \JMS\Serializer\Type\Compiler\Llk\TreeNode
115
     */
116
    protected $_tree          = null;
117
118
    /**
119
     * Current depth while building the trace.
120
     *
121
     * @var int
122
     */
123
    protected $_depth         = -1;
124
125
126
127
    /**
128
     * Construct the parser.
129
     *
130
     * @param   array  $tokens     Tokens.
131
     * @param   array  $rules      Rules.
132
     * @param   array  $pragmas    Pragmas.
133
     */
134
    public function __construct(
135
        array $tokens  = [],
136
        array $rules   = [],
137
        array $pragmas = []
138
    ) {
139
        $this->_tokens  = $tokens;
140
        $this->_rules   = $rules;
141
        $this->_pragmas = $pragmas;
142
143
        return;
144
    }
145
146
    /**
147
     * Parse :-).
148
     *
149
     * @param   string  $text    Text to parse.
150
     * @param   string  $rule    The axiom, i.e. root rule.
151
     * @param   bool    $tree    Whether build tree or not.
152
     * @return  mixed
153
     * @throws  \JMS\Serializer\Type\Compiler\Exception\UnexpectedToken
154
     */
155
    final public function parse(string $text, string $rule = null, bool $tree = true)
156
    {
157
        $k = 1024;
158
159
        if (isset($this->_pragmas['parser.lookahead'])) {
160
            $k = max(0, intval($this->_pragmas['parser.lookahead']));
161
        }
162
163
        $lexer                = new Lexer($this->_pragmas);
164
        $this->_tokenSequence = new Iterator\Buffer(
165
            $lexer->lexMe($text, $this->_tokens),
166
            $k
167
        );
168
        $this->_tokenSequence->rewind();
169
170
        $this->_errorToken = null;
171
        $this->_trace      = [];
172
        $this->_todo       = [];
173
174
        if (false === array_key_exists($rule, $this->_rules)) {
175
            $rule = $this->getRootRule();
176
        }
177
178
        $closeRule   = new Rule\Ekzit($rule, 0);
179
        $openRule    = new Rule\Entry($rule, 0, [$closeRule]);
180
        $this->_todo = [$closeRule, $openRule];
181
182
        do {
183
            $out = $this->unfold();
184
185
            if (null !== $out &&
186
                'EOF' === $this->_tokenSequence->current()['token']) {
187
                break;
188
            }
189
190
            if (false === $this->backtrack()) {
191
                $token  = $this->_errorToken;
192
193
                if (null === $this->_errorToken) {
194
                    $token = $this->_tokenSequence->current();
195
                }
196
197
                $offset = $token['offset'];
198
                $line   = 1;
199
                $column = 1;
200
201
                if (!empty($text)) {
202
                    if (0 === $offset) {
203
                        $leftnl = 0;
204
                    } else {
205
                        $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0;
206
                    }
207
208
                    $rightnl = strpos($text, "\n", $offset);
209
                    $line    = substr_count($text, "\n", 0, $leftnl + 1) + 1;
210
                    $column  = $offset - $leftnl + (0 === $leftnl);
211
212
                    if (false !== $rightnl) {
213
                        $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n");
214
                    }
215
                }
216
217
                throw new Compiler\Exception\UnexpectedToken(
218
                    'Unexpected token "%s" (%s) at line %d and column %d:' .
219
                    "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑',
220
                    0,
221
                    [
222
                        $token['value'],
223
                        $token['token'],
224
                        $line,
225
                        $column,
226
                        $text
227
                    ],
228
                    $line,
229
                    $column
230
                );
231
            }
232
        } while (true);
233
234
        if (false === $tree) {
235
            return true;
236
        }
237
238
        $tree = $this->_buildTree();
239
240
        if (!($tree instanceof TreeNode)) {
0 ignored issues
show
introduced by
$tree is always a sub-type of JMS\Serializer\Type\Compiler\Llk\TreeNode.
Loading history...
241
            throw new Compiler\Exception(
242
                'Parsing error: cannot build AST, the trace is corrupted.',
243
                1
244
            );
245
        }
246
247
        return $this->_tree = $tree;
248
    }
249
250
    /**
251
     * Unfold trace.
252
     *
253
     * @return  mixed
254
     */
255
    final protected function unfold()
256
    {
257
        while (0 < count($this->_todo)) {
258
            $rule = array_pop($this->_todo);
259
260
            if ($rule instanceof Rule\Ekzit) {
261
                $rule->setDepth($this->_depth);
262
                $this->_trace[] = $rule;
263
264
                if (false === $rule->isTransitional()) {
265
                    --$this->_depth;
266
                }
267
            } else {
268
                $ruleName = $rule->getRule();
269
                $next     = $rule->getData();
270
                $zeRule   = $this->_rules[$ruleName];
271
                $out      = $this->_parse($zeRule, $next);
272
273
                if (false === $out && false === $this->backtrack()) {
274
                    return null;
275
                }
276
            }
277
        }
278
279
        return true;
280
    }
281
282
    /**
283
     * Parse current rule.
284
     *
285
     * @param   \JMS\Serializer\Type\Compiler\Llk\Rule  $zeRule    Current rule.
286
     * @param   int                     $next      Next rule index.
287
     * @return  bool
288
     */
289
    final protected function _parse(Rule $zeRule, $next)
290
    {
291
        $regex = null;
292
        if ($zeRule instanceof Rule\Token) {
293
            $name = $this->_tokenSequence->current()['token'];
294
295
            if ($zeRule->getTokenName() !== $name) {
296
                return false;
297
            }
298
299
            $value = $this->_tokenSequence->current()['value'];
300
301
            if (0 <= $unification = $zeRule->getUnificationIndex()) {
302
                for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) {
303
                    $trace = $this->_trace[$i];
304
305
                    if ($trace instanceof Rule\Entry) {
306
                        if (false === $trace->isTransitional()) {
307
                            if ($trace->getDepth() <= $this->_depth) {
308
                                break;
309
                            }
310
311
                            --$skip;
312
                        }
313
                    } elseif ($trace instanceof Rule\Ekzit &&
314
                              false === $trace->isTransitional()) {
315
                        $skip += $trace->getDepth() > $this->_depth;
316
                    }
317
318
                    if (0 < $skip) {
319
                        continue;
320
                    }
321
322
                    if ($trace instanceof Rule\Token &&
323
                        $unification === $trace->getUnificationIndex() &&
324
                        $value !== $trace->getValue()) {
325
                        return false;
326
                    }
327
                }
328
            }
329
330
            $current = $this->_tokenSequence->current();
331
332
            $namespace = $current['namespace'];
333
            $offset    = $current['offset'];
334
335
            $zzeRule   = clone $zeRule;
336
            $zzeRule->setValue($value);
337
            $zzeRule->setOffset($offset);
338
            $zzeRule->setNamespace($namespace);
339
340
            if (isset($this->_tokens[$namespace][$name])) {
341
                $zzeRule->setRepresentation($this->_tokens[$namespace][$name]);
342
            } else {
343
                foreach ($this->_tokens[$namespace] as $_name => $regex) {
344
                    if (false === $pos = strpos($_name, ':')) {
345
                        continue;
346
                    }
347
348
                    $_name = substr($_name, 0, $pos);
349
350
                    if ($_name === $name) {
351
                        break;
352
                    }
353
                }
354
355
                $zzeRule->setRepresentation($regex);
356
            }
357
358
            array_pop($this->_todo);
359
            $this->_trace[] = $zzeRule;
360
            $this->_tokenSequence->next();
361
            $this->_errorToken = $this->_tokenSequence->current();
362
363
            return true;
364
        } elseif ($zeRule instanceof Rule\Concatenation) {
365
            if (false === $zeRule->isTransitional()) {
366
                ++$this->_depth;
367
            }
368
369
            $this->_trace[] = new Rule\Entry(
370
                $zeRule->getName(),
371
                0,
372
                null,
373
                $this->_depth
374
            );
375
            $children = $zeRule->getChildren();
376
377
            for ($i = count($children) - 1; $i >= 0; --$i) {
378
                $nextRule      = $children[$i];
379
                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
380
                $this->_todo[] = new Rule\Entry($nextRule, 0);
381
            }
382
383
            return true;
384
        } elseif ($zeRule instanceof Rule\Choice) {
385
            $children = $zeRule->getChildren();
386
387
            if ($next >= count($children)) {
388
                return false;
389
            }
390
391
            if (false === $zeRule->isTransitional()) {
392
                ++$this->_depth;
393
            }
394
395
            $this->_trace[] = new Rule\Entry(
396
                $zeRule->getName(),
397
                $next,
398
                $this->_todo,
399
                $this->_depth
400
            );
401
            $nextRule      = $children[$next];
402
            $this->_todo[] = new Rule\Ekzit($nextRule, 0);
403
            $this->_todo[] = new Rule\Entry($nextRule, 0);
404
405
            return true;
406
        } elseif ($zeRule instanceof Rule\Repetition) {
407
            $nextRule = $zeRule->getChildren();
408
409
            if (0 === $next) {
410
                $name = $zeRule->getName();
411
                $min  = $zeRule->getMin();
412
413
                if (false === $zeRule->isTransitional()) {
414
                    ++$this->_depth;
415
                }
416
417
                $this->_trace[] = new Rule\Entry(
418
                    $name,
419
                    $min,
420
                    null,
421
                    $this->_depth
422
                );
423
                array_pop($this->_todo);
424
                $this->_todo[]  = new Rule\Ekzit(
425
                    $name,
426
                    $min,
427
                    $this->_todo
428
                );
429
430
                for ($i = 0; $i < $min; ++$i) {
431
                    $this->_todo[] = new Rule\Ekzit($nextRule, 0);
432
                    $this->_todo[] = new Rule\Entry($nextRule, 0);
433
                }
434
435
                return true;
436
            } else {
437
                $max = $zeRule->getMax();
438
439
                if (-1 != $max && $next > $max) {
440
                    return false;
441
                }
442
443
                $this->_todo[] = new Rule\Ekzit(
444
                    $zeRule->getName(),
445
                    $next,
446
                    $this->_todo
447
                );
448
                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
449
                $this->_todo[] = new Rule\Entry($nextRule, 0);
450
451
                return true;
452
            }
453
        }
454
455
        return false;
456
    }
457
458
    /**
459
     * Backtrack the trace.
460
     *
461
     * @return  bool
462
     */
463
    final protected function backtrack()
464
    {
465
        $found = false;
466
467
        do {
468
            $last = array_pop($this->_trace);
469
470
            if ($last instanceof Rule\Entry) {
471
                $zeRule = $this->_rules[$last->getRule()];
472
                $found  = $zeRule instanceof Rule\Choice;
473
            } elseif ($last instanceof Rule\Ekzit) {
474
                $zeRule = $this->_rules[$last->getRule()];
475
                $found  = $zeRule instanceof Rule\Repetition;
476
            } elseif ($last instanceof Rule\Token) {
477
                $this->_tokenSequence->previous();
478
479
                if (false === $this->_tokenSequence->valid()) {
480
                    return false;
481
                }
482
            }
483
        } while (0 < count($this->_trace) && false === $found);
484
485
        if (false === $found) {
486
            return false;
487
        }
488
489
        $rule          = $last->getRule();
490
        $next          = $last->getData() + 1;
491
        $this->_depth  = $last->getDepth();
492
        $this->_todo   = $last->getTodo();
493
        $this->_todo[] = new Rule\Entry($rule, $next);
494
495
        return true;
496
    }
497
498
    /**
499
     * Build AST from trace.
500
     * Walk through the trace iteratively and recursively.
501
     *
502
     * @param   int      $i            Current trace index.
503
     * @param   array    &$children    Collected children.
504
     * @return  \JMS\Serializer\Type\Compiler\Llk\TreeNode
505
     */
506
    final protected function _buildTree($i = 0, &$children = [])
507
    {
508
        $max = count($this->_trace);
509
510
        while ($i < $max) {
511
            $trace = $this->_trace[$i];
512
513
            if ($trace instanceof Rule\Entry) {
514
                $ruleName  = $trace->getRule();
515
                $rule      = $this->_rules[$ruleName];
516
                $isRule    = false === $trace->isTransitional();
517
                $nextTrace = $this->_trace[$i + 1];
518
                $id        = $rule->getNodeId();
519
520
                // Optimization: Skip empty trace sequence.
521
                if ($nextTrace instanceof Rule\Ekzit &&
522
                    $ruleName == $nextTrace->getRule()) {
523
                    $i += 2;
524
525
                    continue;
526
                }
527
528
                if (true === $isRule) {
529
                    $children[] = $ruleName;
530
                }
531
532
                if (null !== $id) {
533
                    $children[] = [
534
                        'id'      => $id,
535
                        'options' => $rule->getNodeOptions()
536
                    ];
537
                }
538
539
                $i = $this->_buildTree($i + 1, $children);
540
541
                if (false === $isRule) {
542
                    continue;
543
                }
544
545
                $handle   = [];
546
                $cId      = null;
547
                $cOptions = [];
548
549
                do {
550
                    $pop = array_pop($children);
551
552
                    if (true === is_object($pop)) {
553
                        $handle[] = $pop;
554
                    } elseif (true === is_array($pop) && null === $cId) {
555
                        $cId      = $pop['id'];
556
                        $cOptions = $pop['options'];
557
                    } elseif ($ruleName == $pop) {
558
                        break;
559
                    }
560
                } while (null !== $pop);
561
562
                if (null === $cId) {
563
                    $cId      = $rule->getDefaultId();
564
                    $cOptions = $rule->getDefaultOptions();
565
                }
566
567
                if (null === $cId) {
568
                    for ($j = count($handle) - 1; $j >= 0; --$j) {
569
                        $children[] = $handle[$j];
570
                    }
571
572
                    continue;
573
                }
574
575
                if (true === in_array('M', $cOptions) &&
576
                    true === $this->mergeTree($children, $handle, $cId)) {
577
                    continue;
578
                }
579
580
                if (true === in_array('m', $cOptions) &&
581
                    true === $this->mergeTree($children, $handle, $cId, true)) {
582
                    continue;
583
                }
584
585
                $cTree = new TreeNode($id ?: $cId);
586
587
                foreach ($handle as $child) {
588
                    $child->setParent($cTree);
589
                    $cTree->prependChild($child);
590
                }
591
592
                $children[] = $cTree;
593
            } elseif ($trace instanceof Rule\Ekzit) {
594
                return $i + 1;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $i + 1 returns the type integer which is incompatible with the documented return type JMS\Serializer\Type\Compiler\Llk\TreeNode.
Loading history...
595
            } else {
596
                if (false === $trace->isKept()) {
597
                    ++$i;
598
599
                    continue;
600
                }
601
602
                $child = new TreeNode('token', [
603
                    'token'     => $trace->getTokenName(),
604
                    'value'     => $trace->getValue(),
605
                    'namespace' => $trace->getNamespace(),
606
                    'offset'    => $trace->getOffset()
607
                ]);
608
609
                $children[] = $child;
610
                ++$i;
611
            }
612
        }
613
614
        return $children[0];
615
    }
616
617
    /**
618
     * Try to merge directly children into an existing node.
619
     *
620
     * @param   array   &$children    Current children being gathering.
621
     * @param   array   &$handle      Children of the new node.
622
     * @param   string  $cId          Node ID.
623
     * @param   bool    $recursive    Whether we should merge recursively or
624
     *                                not.
625
     * @return  bool
626
     */
627
    final protected function mergeTree(
628
        &$children,
629
        &$handle,
630
        $cId,
631
        $recursive = false
632
    ) {
633
        end($children);
634
        $last = current($children);
635
636
        if (!is_object($last)) {
637
            return false;
638
        }
639
640
        if ($cId !== $last->getId()) {
641
            return false;
642
        }
643
644
        if (true === $recursive) {
645
            foreach ($handle as $child) {
646
                $this->mergeTreeRecursive($last, $child);
647
            }
648
649
            return true;
650
        }
651
652
        foreach ($handle as $child) {
653
            $last->appendChild($child);
654
            $child->setParent($last);
655
        }
656
657
        return true;
658
    }
659
660
    /**
661
     * Merge recursively.
662
     * Please, see self::mergeTree() to know the context.
663
     *
664
     * @param   \JMS\Serializer\Type\Compiler\Llk\TreeNode  $node       Node that receives.
665
     * @param   \JMS\Serializer\Type\Compiler\Llk\TreeNode  $newNode    Node to merge.
666
     * @return  void
667
     */
668
    final protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode)
669
    {
670
        $nNId = $newNode->getId();
671
672
        if ('token' === $nNId) {
673
            $node->appendChild($newNode);
674
            $newNode->setParent($node);
675
676
            return;
677
        }
678
679
        $children = $node->getChildren();
680
        end($children);
681
        $last     = current($children);
682
683
        if ($last->getId() !== $nNId) {
684
            $node->appendChild($newNode);
685
            $newNode->setParent($node);
686
687
            return;
688
        }
689
690
        foreach ($newNode->getChildren() as $child) {
691
            $this->mergeTreeRecursive($last, $child);
692
        }
693
694
        return;
695
    }
696
697
    /**
698
     * Get AST.
699
     *
700
     * @return  \JMS\Serializer\Type\Compiler\Llk\TreeNode
701
     */
702
    final public function getTree()
703
    {
704
        return $this->_tree;
705
    }
706
707
    /**
708
     * Get trace.
709
     *
710
     * @return  array
711
     */
712
    final public function getTrace()
713
    {
714
        return $this->_trace;
715
    }
716
717
    /**
718
     * Get pragmas.
719
     *
720
     * @return  array
721
     */
722
    final public function getPragmas()
723
    {
724
        return $this->_pragmas;
725
    }
726
727
    /**
728
     * Get tokens.
729
     *
730
     * @return  array
731
     */
732
    final public function getTokens()
733
    {
734
        return $this->_tokens;
735
    }
736
737
    /**
738
     * Get the lexer iterator.
739
     *
740
     * @return  \Hoa\Iterator\Buffer
741
     */
742
    final public function getTokenSequence()
743
    {
744
        return $this->_tokenSequence;
745
    }
746
747
    /**
748
     * Get rule by name.
749
     *
750
     * @param   string  $name    Rule name.
751
     * @return  \JMS\Serializer\Type\Compiler\Llk\Rule
752
     */
753
    final public function getRule($name)
754
    {
755
        if (!isset($this->_rules[$name])) {
756
            return null;
757
        }
758
759
        return $this->_rules[$name];
760
    }
761
762
    /**
763
     * Get rules.
764
     *
765
     * @return Rule[]
766
     */
767
    final public function getRules()
768
    {
769
        return $this->_rules;
770
    }
771
772
    /**
773
     * Get root rule.
774
     *
775
     * @return string
776
     */
777
    final public function getRootRule()
778
    {
779
        \assert([] !== $this->_rules);
780
781
        foreach ($this->_rules as $rule => $_) {
782
            if (!is_int($rule)) {
783
                break;
784
            }
785
        }
786
787
        return $rule;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $rule seems to be defined by a foreach iteration on line 781. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
788
    }
789
}
790