Completed
Push — latest ( 76d169...995567 )
by Colin
22s queued 10s
created

InlineParserEngine::parse()   B

Complexity

Conditions 8
Paths 28

Size

Total Lines 53
Code Lines 26

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 26
CRAP Score 8

Importance

Changes 0
Metric Value
eloc 26
c 0
b 0
f 0
dl 0
loc 53
ccs 26
cts 26
cp 1
rs 8.4444
cc 8
nc 28
nop 2
crap 8

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11
 *  - (c) John MacFarlane
12
 *
13
 * For the full copyright and license information, please view the LICENSE
14
 * file that was distributed with this source code.
15
 */
16
17
namespace League\CommonMark\Parser;
18
19
use League\CommonMark\Environment\EnvironmentInterface;
20
use League\CommonMark\Node\Block\AbstractBlock;
21
use League\CommonMark\Node\Inline\AdjacentTextMerger;
22
use League\CommonMark\Node\Inline\Text;
23
use League\CommonMark\Node\Node;
24
use League\CommonMark\Parser\Inline\InlineParserInterface;
25
use League\CommonMark\Reference\ReferenceMapInterface;
26
27
/**
28
 * @internal
29
 */
30
final class InlineParserEngine implements InlineParserEngineInterface
31
{
32
    /**
33
     * @var EnvironmentInterface
34
     *
35
     * @psalm-readonly
36
     */
37
    private $environment;
38
39
    /**
40
     * @var ReferenceMapInterface
41
     *
42
     * @psalm-readonly
43
     */
44
    private $referenceMap;
45
46
    /**
47
     * @var array<int, InlineParserInterface|string|bool>
48
     * @psalm-var list<array{0: InlineParserInterface, 1: string, 2: bool}>
49
     * @phpstan-var array<int, array{0: InlineParserInterface, 1: string, 2: bool}>
50
     */
51
    private $parsers;
52
53 2880
    public function __construct(EnvironmentInterface $environment, ReferenceMapInterface $referenceMap)
54
    {
55 2880
        $this->environment  = $environment;
56 2880
        $this->referenceMap = $referenceMap;
57
58 2880
        foreach ($environment->getInlineParsers() as $parser) {
59
            \assert($parser instanceof InlineParserInterface);
60 2880
            $regex = $parser->getMatchDefinition()->getRegex();
61
62 2880
            $this->parsers[] = [$parser, $regex, \strlen($regex) !== \mb_strlen($regex)];
63
        }
64 2880
    }
65
66 2586
    public function parse(string $contents, AbstractBlock $block): void
67
    {
68 2586
        $contents = \trim($contents);
69 2586
        $cursor   = new Cursor($contents);
70
71 2586
        $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap);
72
73
        // Have all parsers look at the line to determine what they might want to parse and what positions they exist at
74 2586
        foreach ($this->matchParsers($contents) as $matchPosition => $parsers) {
75 1992
            $currentPosition = $cursor->getPosition();
76
            // We've already gone past this point
77 1992
            if ($currentPosition > $matchPosition) {
78 903
                continue;
79
            }
80
81
            // We've skipped over some uninteresting text that should be added as a plain text node
82 1992
            if ($currentPosition < $matchPosition) {
83 1719
                $cursor->advanceBy($matchPosition - $currentPosition);
84 1719
                $this->addPlainText($cursor->getPreviousText(), $block);
85
            }
86
87
            // We're now at a potential start - see which of the current parsers can handle it
88 1992
            $parsed = false;
89 1992
            foreach ($parsers as [$parser, $match]) {
90
                \assert($parser instanceof InlineParserInterface);
91 1992
                if ($parser->parse($match, $inlineParserContext)) {
92
                    // A parser has successfully handled the text at the given position; don't consider any others at this position
93 1977
                    $parsed = true;
94 1977
                    break;
95
                }
96
            }
97
98 1992
            if ($parsed) {
99 1977
                continue;
100
            }
101
102
            // Despite potentially being interested, nothing actually parsed text here, so add the current character and continue onwards
103 222
            $this->addPlainText((string) $cursor->getCharacter(), $block);
104 222
            $cursor->advance();
105
        }
106
107
        // Add any remaining text that wasn't parsed
108 2586
        if (! $cursor->isAtEnd()) {
109 1278
            $this->addPlainText($cursor->getRemainder(), $block);
110
        }
111
112
        // Process any delimiters that were found
113 2586
        $delimiterStack = $inlineParserContext->getDelimiterStack();
114 2586
        $delimiterStack->processDelimiters(null, $this->environment->getDelimiterProcessors());
115 2586
        $delimiterStack->removeAll();
116
117
        // Combine adjacent text notes into one
118 2586
        AdjacentTextMerger::mergeChildNodes($block);
119 2586
    }
120
121 2385
    private function addPlainText(string $text, Node $container): void
122
    {
123 2385
        $lastInline = $container->lastChild();
124 2385
        if ($lastInline instanceof Text && ! isset($lastInline->data['delim'])) {
125 288
            $lastInline->append($text);
126
        } else {
127 2349
            $container->appendChild(new Text($text));
128
        }
129 2385
    }
130
131
    /**
132
     * Given the current line, ask all the parsers which parts of the text they would be interested in parsing.
133
     *
134
     * The resulting array provides a list of character positions, which parsers are interested in trying to parse
135
     * the text at those points, and (for convenience/optimization) what the matching text happened to be.
136
     *
137
     * @return array<array<int, InlineParserInterface|string>>
138
     *
139
     * @psalm-return array<int, list<array{0: InlineParserInterface, 1: string}>>
140
     *
141
     * @phpstan-return array<int, array<int, array{0: InlineParserInterface, 1: string}>>
142
     */
143 2586
    private function matchParsers(string $contents): array
144
    {
145 2586
        $contents    = \trim($contents);
146 2586
        $isMultibyte = \mb_strlen($contents) !== \strlen($contents);
147
148 2586
        $ret = [];
149
150 2586
        foreach ($this->parsers as [$parser, $regex, $isRegexMultibyte]) {
151 2586
            if ($isMultibyte || $isRegexMultibyte) {
152 111
                $regex .= 'u';
153
            }
154
155 2586
            if (! \preg_match_all($regex, $contents, $matches, \PREG_OFFSET_CAPTURE | \PREG_SET_ORDER)) {
156 2580
                continue;
157
            }
158
159 1992
            foreach ($matches as $match) {
160 1992
                if ($isMultibyte) {
161
                    // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
162 57
                    $offset = \mb_strlen(\substr($contents, 0, $match[0][1]), 'UTF-8');
163
                } else {
164 1935
                    $offset = (int) $match[0][1];
165
                }
166
167 1992
                $ret[$offset][] = [$parser, (string) $match[0][0]];
168
            }
169
        }
170
171 2586
        \ksort($ret);
172
173 2586
        return $ret;
174
    }
175
}
176