Completed
Pull Request — master (#1184)
by Alexey
14:33
created

Parser::parse()   C

Complexity

Conditions 14
Paths 64

Size

Total Lines 93
Code Lines 56

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 14
eloc 56
c 1
b 0
f 0
nc 64
nop 3
dl 0
loc 93
rs 6.2666

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/**
6
 * Hoa
7
 *
8
 *
9
 *
10
 *
11
 * BSD 3-Clause License
12
 *
13
 * Copyright © 2007-2017, Hoa community. All rights reserved.
14
 *
15
 * Redistribution and use in source and binary forms, with or without
16
 * modification, are permitted provided that the following conditions are met:
17
 *
18
 * 1. Redistributions of source code must retain the above copyright notice, this
19
 *    list of conditions and the following disclaimer.
20
 *
21
 * 2. Redistributions in binary form must reproduce the above copyright notice,
22
 *    this list of conditions and the following disclaimer in the documentation
23
 *    and/or other materials provided with the distribution.
24
 *
25
 * 3. Neither the name of the copyright holder nor the names of its
26
 *    contributors may be used to endorse or promote products derived from
27
 *    this software without specific prior written permission.
28
 *
29
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
32
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
33
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
35
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
36
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
37
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
 */
40
41
namespace JMS\Serializer\Type\Compiler\Llk;
42
43
use Hoa\Iterator;
44
use Hoa\Iterator\Buffer;
45
use JMS\Serializer\Type\Compiler;
46
use JMS\Serializer\Type\Compiler\Exception\UnexpectedToken;
47
48
/**
49
 * Class \JMS\Serializer\Type\Compiler\Llk\Parser.
50
 *
51
 * LL(k) parser.
52
 */
53
class Parser
54
{
55
    /**
56
     * List of pragmas.
57
     *
58
     * @var array
59
     */
60
    protected $_pragmas       = null;
61
62
    /**
63
     * List of skipped tokens.
64
     *
65
     * @var array
66
     */
67
    protected $_skip          = null;
68
69
    /**
70
     * Associative array (token name => token regex), to be defined in
71
     * precedence order.
72
     *
73
     * @var array
74
     */
75
    protected $_tokens        = null;
76
77
    /**
78
     * Rules, to be defined as associative array, name => Rule object.
79
     *
80
     * @var Rule[]
81
     */
82
    protected $_rules         = null;
83
84
    /**
85
     * Lexer iterator.
86
     *
87
     * @var Buffer
88
     */
89
    protected $_tokenSequence = null;
90
91
    /**
92
     * Possible token causing an error.
93
     *
94
     * @var array
95
     */
96
    protected $_errorToken    = null;
97
98
    /**
99
     * Trace of activated rules.
100
     *
101
     * @var array
102
     */
103
    protected $_trace         = [];
104
105
    /**
106
     * Stack of todo list.
107
     *
108
     * @var array
109
     */
110
    protected $_todo          = null;
111
112
    /**
113
     * AST.
114
     *
115
     * @var TreeNode
116
     */
117
    protected $_tree          = null;
118
119
    /**
120
     * Current depth while building the trace.
121
     *
122
     * @var int
123
     */
124
    protected $_depth         = -1;
125
126
127
128
    /**
129
     * Construct the parser.
130
     *
131
     * @param   array  $tokens  Tokens.
132
     * @param   array  $rules   Rules.
133
     * @param   array  $pragmas Pragmas.
134
     */
135
    public function __construct(
136
        array $tokens = [],
137
        array $rules = [],
138
        array $pragmas = []
139
    ) {
140
        $this->_tokens  = $tokens;
141
        $this->_rules   = $rules;
142
        $this->_pragmas = $pragmas;
143
144
        return;
145
    }
146
147
    /**
148
     * Parse :-).
149
     *
150
     * @param   string  $text Text to parse.
151
     * @param   string  $rule The axiom, i.e. root rule.
152
     * @param   bool    $tree Whether build tree or not.
153
     *
154
     * @return  mixed
155
     *
156
     * @throws  UnexpectedToken
157
     */
158
    final public function parse(string $text, ?string $rule = null, bool $tree = true)
159
    {
160
        $k = 1024;
161
162
        if (isset($this->_pragmas['parser.lookahead'])) {
163
            $k = max(0, intval($this->_pragmas['parser.lookahead']));
164
        }
165
166
        $lexer                = new Lexer($this->_pragmas);
167
        $this->_tokenSequence = new Iterator\Buffer(
168
            $lexer->lexMe($text, $this->_tokens),
169
            $k
170
        );
171
        $this->_tokenSequence->rewind();
172
173
        $this->_errorToken = null;
174
        $this->_trace      = [];
175
        $this->_todo       = [];
176
177
        if (false === array_key_exists($rule, $this->_rules)) {
178
            $rule = $this->getRootRule();
179
        }
180
181
        $closeRule   = new Rule\Ekzit($rule, 0);
182
        $openRule    = new Rule\Entry($rule, 0, [$closeRule]);
183
        $this->_todo = [$closeRule, $openRule];
184
185
        do {
186
            $out = $this->unfold();
187
188
            if (null !== $out &&
189
                'EOF' === $this->_tokenSequence->current()['token']) {
190
                break;
191
            }
192
193
            if (false === $this->backtrack()) {
194
                $token  = $this->_errorToken;
195
196
                if (null === $this->_errorToken) {
197
                    $token = $this->_tokenSequence->current();
198
                }
199
200
                $offset = $token['offset'];
201
                $line   = 1;
202
                $column = 1;
203
204
                if (!empty($text)) {
205
                    if (0 === $offset) {
206
                        $leftnl = 0;
207
                    } else {
208
                        $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0;
209
                    }
210
211
                    $rightnl = strpos($text, "\n", $offset);
212
                    $line    = substr_count($text, "\n", 0, $leftnl + 1) + 1;
213
                    $column  = $offset - $leftnl + (0 === $leftnl);
214
215
                    if (false !== $rightnl) {
216
                        $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n");
217
                    }
218
                }
219
220
                throw new Compiler\Exception\UnexpectedToken(
221
                    'Unexpected token "%s" (%s) at line %d and column %d:' .
222
                    "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑',
223
                    0,
224
                    [
225
                        $token['value'],
226
                        $token['token'],
227
                        $line,
228
                        $column,
229
                        $text,
230
                    ],
231
                    $line,
232
                    $column
233
                );
234
            }
235
        } while (true);
236
237
        if (false === $tree) {
238
            return true;
239
        }
240
241
        $tree = $this->_buildTree();
242
243
        if (!($tree instanceof TreeNode)) {
244
            throw new Compiler\Exception(
245
                'Parsing error: cannot build AST, the trace is corrupted.',
246
                1
247
            );
248
        }
249
250
        return $this->_tree = $tree;
251
    }
252
253
    /**
254
     * Unfold trace.
255
     *
256
     * @return  mixed
257
     */
258
    final protected function unfold()
259
    {
260
        while (0 < count($this->_todo)) {
261
            $rule = array_pop($this->_todo);
262
263
            if ($rule instanceof Rule\Ekzit) {
264
                $rule->setDepth($this->_depth);
265
                $this->_trace[] = $rule;
266
267
                if (false === $rule->isTransitional()) {
268
                    --$this->_depth;
269
                }
270
            } else {
271
                $ruleName = $rule->getRule();
272
                $next     = $rule->getData();
273
                $zeRule   = $this->_rules[$ruleName];
274
                $out      = $this->_parse($zeRule, $next);
275
276
                if (false === $out && false === $this->backtrack()) {
277
                    return null;
278
                }
279
            }
280
        }
281
282
        return true;
283
    }
284
285
    /**
286
     * Parse current rule.
287
     *
288
     * @param   Rule  $zeRule Current rule.
289
     * @param   int                     $next   Next rule index.
290
     */
291
    final protected function _parse(Rule $zeRule, int $next): bool
292
    {
293
        $regex = null;
294
        if ($zeRule instanceof Rule\Token) {
295
            $name = $this->_tokenSequence->current()['token'];
296
297
            if ($zeRule->getTokenName() !== $name) {
298
                return false;
299
            }
300
301
            $value = $this->_tokenSequence->current()['value'];
302
303
            if (0 <= $unification = $zeRule->getUnificationIndex()) {
304
                for ($skip = 0, $i = count($this->_trace) - 1; $i >= 0; --$i) {
305
                    $trace = $this->_trace[$i];
306
307
                    if ($trace instanceof Rule\Entry) {
308
                        if (false === $trace->isTransitional()) {
309
                            if ($trace->getDepth() <= $this->_depth) {
310
                                break;
311
                            }
312
313
                            --$skip;
314
                        }
315
                    } elseif ($trace instanceof Rule\Ekzit &&
316
                              false === $trace->isTransitional()) {
317
                        $skip += $trace->getDepth() > $this->_depth;
318
                    }
319
320
                    if (0 < $skip) {
321
                        continue;
322
                    }
323
324
                    if ($trace instanceof Rule\Token &&
325
                        $unification === $trace->getUnificationIndex() &&
326
                        $value !== $trace->getValue()) {
327
                        return false;
328
                    }
329
                }
330
            }
331
332
            $current = $this->_tokenSequence->current();
333
334
            $namespace = $current['namespace'];
335
            $offset    = $current['offset'];
336
337
            $zzeRule   = clone $zeRule;
338
            $zzeRule->setValue($value);
339
            $zzeRule->setOffset($offset);
340
            $zzeRule->setNamespace($namespace);
341
342
            if (isset($this->_tokens[$namespace][$name])) {
343
                $zzeRule->setRepresentation($this->_tokens[$namespace][$name]);
344
            } else {
345
                foreach ($this->_tokens[$namespace] as $_name => $regex) {
346
                    if (false === $pos = strpos($_name, ':')) {
347
                        continue;
348
                    }
349
350
                    $_name = substr($_name, 0, $pos);
351
352
                    if ($_name === $name) {
353
                        break;
354
                    }
355
                }
356
357
                $zzeRule->setRepresentation($regex);
358
            }
359
360
            array_pop($this->_todo);
361
            $this->_trace[] = $zzeRule;
362
            $this->_tokenSequence->next();
363
            $this->_errorToken = $this->_tokenSequence->current();
364
365
            return true;
366
        } elseif ($zeRule instanceof Rule\Concatenation) {
367
            if (false === $zeRule->isTransitional()) {
368
                ++$this->_depth;
369
            }
370
371
            $this->_trace[] = new Rule\Entry(
372
                $zeRule->getName(),
373
                0,
374
                null,
375
                $this->_depth
376
            );
377
            $children = $zeRule->getChildren();
378
379
            for ($i = count($children) - 1; $i >= 0; --$i) {
380
                $nextRule      = $children[$i];
381
                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
382
                $this->_todo[] = new Rule\Entry($nextRule, 0);
383
            }
384
385
            return true;
386
        } elseif ($zeRule instanceof Rule\Choice) {
387
            $children = $zeRule->getChildren();
388
389
            if ($next >= count($children)) {
390
                return false;
391
            }
392
393
            if (false === $zeRule->isTransitional()) {
394
                ++$this->_depth;
395
            }
396
397
            $this->_trace[] = new Rule\Entry(
398
                $zeRule->getName(),
399
                $next,
400
                $this->_todo,
401
                $this->_depth
402
            );
403
            $nextRule      = $children[$next];
404
            $this->_todo[] = new Rule\Ekzit($nextRule, 0);
405
            $this->_todo[] = new Rule\Entry($nextRule, 0);
406
407
            return true;
408
        } elseif ($zeRule instanceof Rule\Repetition) {
409
            $nextRule = $zeRule->getChildren();
410
411
            if (0 === $next) {
412
                $name = $zeRule->getName();
413
                $min  = $zeRule->getMin();
414
415
                if (false === $zeRule->isTransitional()) {
416
                    ++$this->_depth;
417
                }
418
419
                $this->_trace[] = new Rule\Entry(
420
                    $name,
421
                    $min,
422
                    null,
423
                    $this->_depth
424
                );
425
                array_pop($this->_todo);
426
                $this->_todo[]  = new Rule\Ekzit(
427
                    $name,
428
                    $min,
429
                    $this->_todo
430
                );
431
432
                for ($i = 0; $i < $min; ++$i) {
433
                    $this->_todo[] = new Rule\Ekzit($nextRule, 0);
434
                    $this->_todo[] = new Rule\Entry($nextRule, 0);
435
                }
436
437
                return true;
438
            } else {
439
                $max = $zeRule->getMax();
440
441
                if (-1 !== $max && $next > $max) {
442
                    return false;
443
                }
444
445
                $this->_todo[] = new Rule\Ekzit(
446
                    $zeRule->getName(),
447
                    $next,
448
                    $this->_todo
449
                );
450
                $this->_todo[] = new Rule\Ekzit($nextRule, 0);
451
                $this->_todo[] = new Rule\Entry($nextRule, 0);
452
453
                return true;
454
            }
455
        }
456
457
        return false;
458
    }
459
460
    /**
461
     * Backtrack the trace.
462
     */
463
    final protected function backtrack(): bool
464
    {
465
        $found = false;
466
467
        do {
468
            $last = array_pop($this->_trace);
469
470
            if ($last instanceof Rule\Entry) {
471
                $zeRule = $this->_rules[$last->getRule()];
472
                $found  = $zeRule instanceof Rule\Choice;
473
            } elseif ($last instanceof Rule\Ekzit) {
474
                $zeRule = $this->_rules[$last->getRule()];
475
                $found  = $zeRule instanceof Rule\Repetition;
476
            } elseif ($last instanceof Rule\Token) {
477
                $this->_tokenSequence->previous();
478
479
                if (false === $this->_tokenSequence->valid()) {
480
                    return false;
481
                }
482
            }
483
        } while (0 < count($this->_trace) && false === $found);
484
485
        if (false === $found) {
486
            return false;
487
        }
488
489
        $rule          = $last->getRule();
490
        $next          = $last->getData() + 1;
491
        $this->_depth  = $last->getDepth();
492
        $this->_todo   = $last->getTodo();
493
        $this->_todo[] = new Rule\Entry($rule, $next);
494
495
        return true;
496
    }
497
498
    /**
499
     * Build AST from trace.
500
     * Walk through the trace iteratively and recursively.
501
     *
502
     * @param   int      $i         Current trace index.
503
     * @param   array    &$children Collected children.
504
     */
505
    final protected function _buildTree(int $i = 0, array &$children = [])
506
    {
507
        $max = count($this->_trace);
508
509
        while ($i < $max) {
510
            $trace = $this->_trace[$i];
511
512
            if ($trace instanceof Rule\Entry) {
513
                $ruleName  = $trace->getRule();
514
                $rule      = $this->_rules[$ruleName];
515
                $isRule    = false === $trace->isTransitional();
516
                $nextTrace = $this->_trace[$i + 1];
517
                $id        = $rule->getNodeId();
518
519
                // Optimization: Skip empty trace sequence.
520
                if ($nextTrace instanceof Rule\Ekzit &&
521
                    $ruleName === $nextTrace->getRule()) {
522
                    $i += 2;
523
524
                    continue;
525
                }
526
527
                if (true === $isRule) {
528
                    $children[] = $ruleName;
529
                }
530
531
                if (null !== $id) {
532
                    $children[] = [
533
                        'id'      => $id,
534
                        'options' => $rule->getNodeOptions(),
535
                    ];
536
                }
537
538
                $i = $this->_buildTree($i + 1, $children);
539
540
                if (false === $isRule) {
541
                    continue;
542
                }
543
544
                $handle   = [];
545
                $cId      = null;
546
                $cOptions = [];
547
548
                do {
549
                    $pop = array_pop($children);
550
551
                    if (true === is_object($pop)) {
552
                        $handle[] = $pop;
553
                    } elseif (true === is_array($pop) && null === $cId) {
554
                        $cId      = $pop['id'];
555
                        $cOptions = $pop['options'];
556
                    } elseif ($ruleName === $pop) {
557
                        break;
558
                    }
559
                } while (null !== $pop);
560
561
                if (null === $cId) {
562
                    $cId      = $rule->getDefaultId();
563
                    $cOptions = $rule->getDefaultOptions();
564
                }
565
566
                if (null === $cId) {
567
                    for ($j = count($handle) - 1; $j >= 0; --$j) {
568
                        $children[] = $handle[$j];
569
                    }
570
571
                    continue;
572
                }
573
574
                if (true === in_array('M', $cOptions) &&
575
                    true === $this->mergeTree($children, $handle, $cId)) {
576
                    continue;
577
                }
578
579
                if (true === in_array('m', $cOptions) &&
580
                    true === $this->mergeTree($children, $handle, $cId, true)) {
581
                    continue;
582
                }
583
584
                $cTree = new TreeNode($id ?: $cId);
585
586
                foreach ($handle as $child) {
587
                    $child->setParent($cTree);
588
                    $cTree->prependChild($child);
589
                }
590
591
                $children[] = $cTree;
592
            } elseif ($trace instanceof Rule\Ekzit) {
593
                return $i + 1;
594
            } else {
595
                if (false === $trace->isKept()) {
596
                    ++$i;
597
598
                    continue;
599
                }
600
601
                $child = new TreeNode('token', [
602
                    'token'     => $trace->getTokenName(),
603
                    'value'     => $trace->getValue(),
604
                    'namespace' => $trace->getNamespace(),
605
                    'offset'    => $trace->getOffset(),
606
                ]);
607
608
                $children[] = $child;
609
                ++$i;
610
            }
611
        }
612
613
        return $children[0];
614
    }
615
616
    /**
617
     * Try to merge directly children into an existing node.
618
     *
619
     * @param   array   &$children Current children being gathering.
620
     * @param   array   &$handle   Children of the new node.
621
     * @param   string  $cId       Node ID.
622
     * @param   bool    $recursive Whether we should merge recursively or
623
     *                             not.
624
     */
625
    final protected function mergeTree(
626
        array &$children,
627
        array &$handle,
628
        string $cId,
629
        bool $recursive = false
630
    ): bool {
631
        end($children);
632
        $last = current($children);
633
634
        if (!is_object($last)) {
635
            return false;
636
        }
637
638
        if ($cId !== $last->getId()) {
639
            return false;
640
        }
641
642
        if (true === $recursive) {
643
            foreach ($handle as $child) {
644
                $this->mergeTreeRecursive($last, $child);
645
            }
646
647
            return true;
648
        }
649
650
        foreach ($handle as $child) {
651
            $last->appendChild($child);
652
            $child->setParent($last);
653
        }
654
655
        return true;
656
    }
657
658
    /**
659
     * Merge recursively.
660
     * Please, see self::mergeTree() to know the context.
661
     *
662
     * @param   TreeNode  $node    Node that receives.
663
     * @param   TreeNode  $newNode Node to merge.
664
     */
665
    final protected function mergeTreeRecursive(TreeNode $node, TreeNode $newNode): void
666
    {
667
        $nNId = $newNode->getId();
668
669
        if ('token' === $nNId) {
670
            $node->appendChild($newNode);
671
            $newNode->setParent($node);
672
673
            return;
674
        }
675
676
        $children = $node->getChildren();
677
        end($children);
678
        $last     = current($children);
679
680
        if ($last->getId() !== $nNId) {
681
            $node->appendChild($newNode);
682
            $newNode->setParent($node);
683
684
            return;
685
        }
686
687
        foreach ($newNode->getChildren() as $child) {
688
            $this->mergeTreeRecursive($last, $child);
689
        }
690
691
        return;
692
    }
693
694
    /**
695
     * Get AST.
696
     */
697
    final public function getTree(): TreeNode
698
    {
699
        return $this->_tree;
700
    }
701
702
    /**
703
     * Get trace.
704
     *
705
     * @return  array
706
     */
707
    final public function getTrace(): array
708
    {
709
        return $this->_trace;
710
    }
711
712
    /**
713
     * Get pragmas.
714
     *
715
     * @return  array
716
     */
717
    final public function getPragmas(): array
718
    {
719
        return $this->_pragmas;
720
    }
721
722
    /**
723
     * Get tokens.
724
     *
725
     * @return  array
726
     */
727
    final public function getTokens(): array
728
    {
729
        return $this->_tokens;
730
    }
731
732
    /**
733
     * Get the lexer iterator.
734
     */
735
    final public function getTokenSequence(): Buffer
736
    {
737
        return $this->_tokenSequence;
738
    }
739
740
    /**
741
     * Get rule by name.
742
     *
743
     * @param   string  $name Rule name.
744
     */
745
    final public function getRule(string $name): Rule
746
    {
747
        if (!isset($this->_rules[$name])) {
748
            return null;
0 ignored issues
show
Bug Best Practice introduced by
The expression return null returns the type null which is incompatible with the type-hinted return JMS\Serializer\Type\Compiler\Llk\Rule.
Loading history...
749
        }
750
751
        return $this->_rules[$name];
752
    }
753
754
    /**
755
     * Get rules.
756
     *
757
     * @return Rule[]
758
     */
759
    final public function getRules()
760
    {
761
        return $this->_rules;
762
    }
763
764
    /**
765
     * Get root rule.
766
     */
767
    final public function getRootRule(): string
768
    {
769
        \assert([] !== $this->_rules);
770
771
        foreach ($this->_rules as $rule => $_) {
772
            if (!is_int($rule)) {
773
                break;
774
            }
775
        }
776
777
        return $rule;
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $rule seems to be defined by a foreach iteration on line 771. Are you sure the iterator is never empty, otherwise this variable is not defined?
Loading history...
778
    }
779
}
780