Completed
Push — latest ( 609def...dcbb7e )
by Colin
14s queued 11s
created

InlineParserEngine   A

Complexity

Total Complexity 21

Size/Duplication

Total Lines 157
Duplicated Lines 0 %

Test Coverage

Coverage 98.36%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 21
eloc 62
c 1
b 0
f 0
dl 0
loc 157
ccs 60
cts 61
cp 0.9836
rs 10

4 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 10 2
A addPlainText() 0 7 3
B parse() 0 53 8
B matchParsers() 0 44 8
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11
 *  - (c) John MacFarlane
12
 *
13
 * For the full copyright and license information, please view the LICENSE
14
 * file that was distributed with this source code.
15
 */
16
17
namespace League\CommonMark\Parser;
18
19
use League\CommonMark\Environment\EnvironmentInterface;
20
use League\CommonMark\Node\Block\AbstractBlock;
21
use League\CommonMark\Node\Inline\AdjacentTextMerger;
22
use League\CommonMark\Node\Inline\Text;
23
use League\CommonMark\Node\Node;
24
use League\CommonMark\Parser\Inline\InlineParserInterface;
25
use League\CommonMark\Reference\ReferenceMapInterface;
26
27
/**
28
 * @internal
29
 */
30
final class InlineParserEngine implements InlineParserEngineInterface
31
{
32
    /**
33
     * @var EnvironmentInterface
34
     *
35
     * @psalm-readonly
36
     */
37
    private $environment;
38
39
    /**
40
     * @var ReferenceMapInterface
41
     *
42
     * @psalm-readonly
43
     */
44
    private $referenceMap;
45
46
    /**
47
     * @var array<int, InlineParserInterface|string|bool>
48
     * @psalm-var list<array{0: InlineParserInterface, 1: string, 2: bool}>
49
     * @phpstan-var array<int, array{0: InlineParserInterface, 1: string, 2: bool}>
50
     */
51
    private $parsers;
52
53 2886
    public function __construct(EnvironmentInterface $environment, ReferenceMapInterface $referenceMap)
54
    {
55 2886
        $this->environment  = $environment;
56 2886
        $this->referenceMap = $referenceMap;
57
58 2886
        foreach ($environment->getInlineParsers() as $parser) {
59
            \assert($parser instanceof InlineParserInterface);
60 2886
            $regex = $parser->getMatchDefinition()->getRegex();
61
62 2886
            $this->parsers[] = [$parser, $regex, \strlen($regex) !== \mb_strlen($regex)];
63
        }
64 2886
    }
65
66 2592
    public function parse(string $contents, AbstractBlock $block): void
67
    {
68 2592
        $contents = \trim($contents);
69 2592
        $cursor   = new Cursor($contents);
70
71 2592
        $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap);
72
73
        // Have all parsers look at the line to determine what they might want to parse and what positions they exist at
74 2592
        foreach ($this->matchParsers($contents) as $matchPosition => $parsers) {
75 1995
            $currentPosition = $cursor->getPosition();
76
            // We've already gone past this point
77 1995
            if ($currentPosition > $matchPosition) {
78 903
                continue;
79
            }
80
81
            // We've skipped over some uninteresting text that should be added as a plain text node
82 1995
            if ($currentPosition < $matchPosition) {
83 1722
                $cursor->advanceBy($matchPosition - $currentPosition);
84 1722
                $this->addPlainText($cursor->getPreviousText(), $block);
85
            }
86
87
            // We're now at a potential start - see which of the current parsers can handle it
88 1995
            $parsed = false;
89 1995
            foreach ($parsers as [$parser, $matches]) {
90
                \assert($parser instanceof InlineParserInterface);
91 1995
                if ($parser->parse($inlineParserContext->withMatches($matches))) {
92
                    // A parser has successfully handled the text at the given position; don't consider any others at this position
93 1983
                    $parsed = true;
94 1983
                    break;
95
                }
96
            }
97
98 1995
            if ($parsed) {
99 1983
                continue;
100
            }
101
102
            // Despite potentially being interested, nothing actually parsed text here, so add the current character and continue onwards
103 219
            $this->addPlainText((string) $cursor->getCharacter(), $block);
104 219
            $cursor->advance();
105
        }
106
107
        // Add any remaining text that wasn't parsed
108 2592
        if (! $cursor->isAtEnd()) {
109 1284
            $this->addPlainText($cursor->getRemainder(), $block);
110
        }
111
112
        // Process any delimiters that were found
113 2592
        $delimiterStack = $inlineParserContext->getDelimiterStack();
114 2592
        $delimiterStack->processDelimiters(null, $this->environment->getDelimiterProcessors());
115 2592
        $delimiterStack->removeAll();
116
117
        // Combine adjacent text notes into one
118 2592
        AdjacentTextMerger::mergeChildNodes($block);
119 2592
    }
120
121 2391
    private function addPlainText(string $text, Node $container): void
122
    {
123 2391
        $lastInline = $container->lastChild();
124 2391
        if ($lastInline instanceof Text && ! isset($lastInline->data['delim'])) {
125 285
            $lastInline->append($text);
126
        } else {
127 2355
            $container->appendChild(new Text($text));
128
        }
129 2391
    }
130
131
    /**
132
     * Given the current line, ask all the parsers which parts of the text they would be interested in parsing.
133
     *
134
     * The resulting array provides a list of character positions, which parsers are interested in trying to parse
135
     * the text at those points, and (for convenience/optimization) what the matching text happened to be.
136
     *
137
     * @return array<array<int, InlineParserInterface|string>>
138
     *
139
     * @psalm-return array<int, list<array{0: InlineParserInterface, 1: non-empty-array<string>}>>
140
     *
141
     * @phpstan-return array<int, array<int, array{0: InlineParserInterface, 1: string[]}>>
142
     */
143 2592
    private function matchParsers(string $contents): array
144
    {
145 2592
        $contents    = \trim($contents);
146 2592
        $isMultibyte = \mb_strlen($contents) !== \strlen($contents);
147
148 2592
        $ret = [];
149
150 2592
        foreach ($this->parsers as [$parser, $regex, $isRegexMultibyte]) {
151 2592
            if ($isMultibyte || $isRegexMultibyte) {
152 111
                $regex .= 'u';
153
            }
154
155
            // See if the parser's InlineParserMatch regex matched against any part of the string
156 2592
            if (! \preg_match_all($regex, $contents, $matches, \PREG_OFFSET_CAPTURE | \PREG_SET_ORDER)) {
157 2586
                continue;
158
            }
159
160
            // For each part that matched...
161 1995
            foreach ($matches as $match) {
162 1995
                if ($isMultibyte) {
163
                    // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
164 57
                    $offset = \mb_strlen(\substr($contents, 0, $match[0][1]), 'UTF-8');
165
                } else {
166 1938
                    $offset = (int) $match[0][1];
167
                }
168
169
                // Remove the offsets, keeping only the matched text
170 1330
                $m = \array_map(static function (array $s): string {
171 1995
                    return (string) $s[0];
172 1995
                }, $match);
173
174 1995
                if ($m === []) {
175
                    continue;
176
                }
177
178
                // Add this match to the list of character positions to stop at
179 1995
                $ret[$offset][] = [$parser, $m];
180
            }
181
        }
182
183
        // Sort matches by position so we visit them in order
184 2592
        \ksort($ret);
185
186 2592
        return $ret;
187
    }
188
}
189