MarkdownParser   B
last analyzed

Complexity

Total Complexity 49

Size/Duplication

Total Lines 316
Duplicated Lines 0 %

Test Coverage

Coverage 98.39%

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 122
c 1
b 0
f 0
dl 0
loc 316
ccs 122
cts 124
cp 0.9839
rs 8.48
wmc 49

15 Methods

Rating   Name   Duplication   Size   Complexity  
A parseBlockContinuation() 0 26 5
A updateReferenceMap() 0 5 3
A __construct() 0 3 1
A initialize() 0 8 1
A activateBlockParser() 0 3 1
D parseLine() 0 75 18
A prepareActiveBlockParserForReplacement() 0 12 2
A getActiveBlockParser() 0 8 2
A deactivateBlockParser() 0 8 2
A parse() 0 23 2
A addChild() 0 12 2
A findBlockStart() 0 12 3
A finalize() 0 8 2
A processInlines() 0 6 2
A closeBlockParsers() 0 10 3

How to fix   Complexity   

Complex Class

Complex classes like MarkdownParser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use MarkdownParser, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11
 *  - (c) John MacFarlane
12
 *
13
 * Additional code based on commonmark-java (https://github.com/commonmark/commonmark-java)
14
 *  - (c) Atlassian Pty Ltd
15
 *
16
 * For the full copyright and license information, please view the LICENSE
17
 * file that was distributed with this source code.
18
 */
19
20
namespace League\CommonMark\Parser;
21
22
use League\CommonMark\Environment\EnvironmentInterface;
23
use League\CommonMark\Event\DocumentParsedEvent;
24
use League\CommonMark\Event\DocumentPreParsedEvent;
25
use League\CommonMark\Exception\CommonMarkException;
26
use League\CommonMark\Input\MarkdownInput;
27
use League\CommonMark\Node\Block\Document;
28
use League\CommonMark\Node\Block\Paragraph;
29
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
30
use League\CommonMark\Parser\Block\BlockContinueParserWithInlinesInterface;
31
use League\CommonMark\Parser\Block\BlockStart;
32
use League\CommonMark\Parser\Block\BlockStartParserInterface;
33
use League\CommonMark\Parser\Block\DocumentBlockParser;
34
use League\CommonMark\Parser\Block\ParagraphParser;
35
use League\CommonMark\Reference\MemoryLimitedReferenceMap;
0 ignored issues
show
Bug introduced by
The type League\CommonMark\Refere...moryLimitedReferenceMap was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
36
use League\CommonMark\Reference\ReferenceInterface;
37
use League\CommonMark\Reference\ReferenceMap;
38
39
final class MarkdownParser implements MarkdownParserInterface
40
{
41
    /** @psalm-readonly */
42
    private EnvironmentInterface $environment;
43
44
    /** @psalm-readonly-allow-private-mutation */
45
    private int $maxNestingLevel;
46
47
    /** @psalm-readonly-allow-private-mutation */
48
    private ReferenceMap $referenceMap;
49
50
    /** @psalm-readonly-allow-private-mutation */
51
    private int $lineNumber = 0;
52
53
    /** @psalm-readonly-allow-private-mutation */
54
    private Cursor $cursor;
55
56
    /**
57
     * @var array<int, BlockContinueParserInterface>
58
     *
59
     * @psalm-readonly-allow-private-mutation
60
     */
61
    private array $activeBlockParsers = [];
62
63
    /**
64
     * @var array<int, BlockContinueParserWithInlinesInterface>
65
     *
66
     * @psalm-readonly-allow-private-mutation
67
     */
68
    private array $closedBlockParsers = [];
69 2246
70
    public function __construct(EnvironmentInterface $environment)
71 2246
    {
72
        $this->environment = $environment;
73
    }
74 2232
75
    private function initialize(): void
76 2232
    {
77 2232
        $this->referenceMap       = new ReferenceMap();
78 2232
        $this->lineNumber         = 0;
79 2232
        $this->activeBlockParsers = [];
80
        $this->closedBlockParsers = [];
81 2232
82
        $this->maxNestingLevel = $this->environment->getConfiguration()->get('max_nesting_level');
83
    }
84
85
    /**
86
     * @throws CommonMarkException
87 2232
     */
88
    public function parse(string $input): Document
89 2232
    {
90
        $this->initialize();
91 2232
92 2232
        $documentParser = new DocumentBlockParser($this->referenceMap);
93
        $this->activateBlockParser($documentParser);
94 2232
95 2226
        $preParsedEvent = new DocumentPreParsedEvent($documentParser->getBlock(), new MarkdownInput($input));
96 2216
        $this->environment->dispatch($preParsedEvent);
97
        $markdownInput = $preParsedEvent->getMarkdown();
98 2216
99 2216
        foreach ($markdownInput->getLines() as $lineNumber => $line) {
100 2216
            $this->lineNumber = $lineNumber;
101
            $this->parseLine($line);
102
        }
103
104 2216
        // finalizeAndProcess
105 2216
        $this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber);
106
        $this->processInlines(\strlen($input));
107 2216
108
        $this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock()));
109 2214
110
        return $documentParser->getBlock();
111
    }
112
113
    /**
114
     * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
115
     * line of input, then finalizing the document.
116 2216
     */
117
    private function parseLine(string $line): void
118 2216
    {
119
        // replace NUL characters for security
120 2216
        $line = \str_replace("\0", "\u{FFFD}", $line);
121 2216
122 68
        $this->cursor = new Cursor($line);
123
124
        $matches = $this->parseBlockContinuation();
125 2216
        if ($matches === null) {
126 2216
            return;
127 2216
        }
128
129
        $unmatchedBlocks = \count($this->activeBlockParsers) - $matches;
130
        $blockParser     = $this->activeBlockParsers[$matches - 1];
131 2216
        $startedNewBlock = false;
132 2216
133
        // Unless last matched container is a code block, try new container starts,
134 2216
        // adding children to the last matched container:
135 626
        $tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer();
136 626
        while ($tryBlockStarts) {
137
            // this is a little performance optimization
138
            if ($this->cursor->isBlank()) {
139 2216
                $this->cursor->advanceToEnd();
140 4
                break;
141
            }
142
143 2216
            if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) {
144 2216
                break;
145 1920
            }
146 1920
147
            $blockStart = $this->findBlockStart($blockParser);
148
            if ($blockStart === null || $blockStart->isAborting()) {
149 876
                $this->cursor->advanceToNextNonSpaceOrTab();
150 876
                break;
151
            }
152
153 876
            if (($state = $blockStart->getCursorState()) !== null) {
154
                $this->cursor->restoreState($state);
155
            }
156 876
157 214
            $startedNewBlock = true;
158 214
159
            // We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now.
160
            if ($unmatchedBlocks > 0) {
161 876
                $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1);
162 134
                $unmatchedBlocks = 0;
163
            }
164
165 876
            $oldBlockLineStart = null;
166 876
            if ($blockStart->isReplaceActiveBlockParser()) {
167 876
                $oldBlockLineStart = $this->prepareActiveBlockParserForReplacement();
168
            }
169
170
            foreach ($blockStart->getBlockParsers() as $newBlockParser) {
171
                $blockParser    = $this->addChild($newBlockParser, $oldBlockLineStart);
172
                $tryBlockStarts = $newBlockParser->isContainer();
173
            }
174 2216
        }
175 352
176
        // What remains at the offset is a text line. Add the text to the appropriate block.
177
178 2216
        // First check for a lazy paragraph continuation:
179 650
        if (! $startedNewBlock && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) {
180
            $this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
181
        } else {
182 2216
            // finalize any blocks not matched
183 636
            if ($unmatchedBlocks > 0) {
184 1966
                $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1);
185 1924
            }
186 1924
187
            if (! $blockParser->isContainer()) {
188
                $this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
189
            } elseif (! $this->cursor->isBlank()) {
190
                $this->addChild(new ParagraphParser());
191 2216
                $this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
192
            }
193
        }
194
    }
195 2216
196 2216
    private function parseBlockContinuation(): ?int
197 1118
    {
198 1118
        // For each containing block, try to parse the associated line start.
199 1118
        // The document will always match, so we can skip the first block parser and start at 1 matches
200 772
        $matches = 1;
201
        for ($i = 1; $i < \count($this->activeBlockParsers); $i++) {
202
            $blockParser   = $this->activeBlockParsers[$i];
203 728
            $blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser());
204 68
            if ($blockContinue === null) {
205
                break;
206 68
            }
207
208
            if ($blockContinue->isFinalize()) {
209 724
                $this->closeBlockParsers(\count($this->activeBlockParsers) - $i, $this->lineNumber);
210 724
211
                return null;
212
            }
213 724
214
            if (($state = $blockContinue->getCursorState()) !== null) {
215
                $this->cursor->restoreState($state);
216 2216
            }
217
218
            $matches++;
219 2216
        }
220
221 2216
        return $matches;
222
    }
223 2216
224
    private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart
225 2216
    {
226 1454
        $matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser);
227
228
        foreach ($this->environment->getBlockStartParsers() as $blockStartParser) {
229
            \assert($blockStartParser instanceof BlockStartParserInterface);
230 1004
            if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) {
231
                return $result;
232
            }
233 2216
        }
234
235 2216
        return null;
236 2216
    }
237 2216
238
    private function closeBlockParsers(int $count, int $endLineNumber): void
239
    {
240 2216
        for ($i = 0; $i < $count; $i++) {
241
            $blockParser = $this->deactivateBlockParser();
242 2012
            $this->finalize($blockParser, $endLineNumber);
243
244
            // phpcs:disable SlevomatCodingStandard.ControlStructures.EarlyExit.EarlyExitNotUsed
245
            if ($blockParser instanceof BlockContinueParserWithInlinesInterface) {
246
                // Remember for inline parsing
247
                $this->closedBlockParsers[] = $blockParser;
248
            }
249
        }
250
    }
251
252 2216
    /**
253
     * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings,
254 2216
     * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference
255 1838
     * definitions.
256
     */
257
    private function finalize(BlockContinueParserInterface $blockParser, int $endLineNumber): void
258 2216
    {
259 2216
        if ($blockParser instanceof ParagraphParser) {
260
            $this->updateReferenceMap($blockParser->getReferences());
261
        }
262
263
        $blockParser->getBlock()->setEndLine($endLineNumber);
264
        $blockParser->closeBlock();
265 2216
    }
266
267 2216
    /**
268
     * Walk through a block & children recursively, parsing string content into inline content where appropriate.
269 2216
     */
270 2012
    private function processInlines(int $inputSize): void
271
    {
272
        $p = new InlineParserEngine($this->environment, new MemoryLimitedReferenceMap($this->referenceMap, $inputSize));
273
274
        foreach ($this->closedBlockParsers as $blockParser) {
275
            $blockParser->parseInlines($p);
276
        }
277
    }
278 2216
279
    /**
280 2216
     * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try
281
     * its parent, and so on til we find a block that can accept children.
282 2216
     */
283 126
    private function addChild(BlockContinueParserInterface $blockParser, ?int $startLineNumber = null): BlockContinueParserInterface
284
    {
285
        $blockParser->getBlock()->setStartLine($startLineNumber ?? $this->lineNumber);
286 2216
287 2216
        while (! $this->getActiveBlockParser()->canContain($blockParser->getBlock())) {
288
            $this->closeBlockParsers(1, ($startLineNumber ?? $this->lineNumber) - 1);
289 2216
        }
290
291
        $this->getActiveBlockParser()->getBlock()->appendChild($blockParser->getBlock());
292 2232
        $this->activateBlockParser($blockParser);
293
294 2232
        return $blockParser;
295
    }
296
297
    private function activateBlockParser(BlockContinueParserInterface $blockParser): void
298
    {
299
        $this->activeBlockParsers[] = $blockParser;
300 2216
    }
301
302 2216
    /**
303 2216
     * @throws ParserLogicException
304
     */
305
    private function deactivateBlockParser(): BlockContinueParserInterface
306
    {
307 2216
        $popped = \array_pop($this->activeBlockParsers);
308
        if ($popped === null) {
309
            throw new ParserLogicException('The last block parser should not be deactivated');
310 134
        }
311
312
        return $popped;
313 134
    }
314
315 134
    /**
316 134
     * @return int|null The line number where the old block started
317
     */
318
    private function prepareActiveBlockParserForReplacement(): ?int
319 134
    {
320
        // Note that we don't want to parse inlines or finalize this block, as it's getting replaced.
321
        $old = $this->deactivateBlockParser();
322
323
        if ($old instanceof ParagraphParser) {
324
            $this->updateReferenceMap($old->getReferences());
325 1924
        }
326
327 1924
        $old->getBlock()->detach();
328 162
329 162
        return $old->getBlock()->getStartLine();
330
    }
331
332
    /**
333
     * @param ReferenceInterface[] $references
334
     */
335
    private function updateReferenceMap(iterable $references): void
336
    {
337 2216
        foreach ($references as $reference) {
338
            if (! $this->referenceMap->contains($reference->getLabel())) {
339 2216
                $this->referenceMap->add($reference);
340 2216
            }
341
        }
342
    }
343
344 2216
    /**
345
     * @throws ParserLogicException
346
     */
347
    public function getActiveBlockParser(): BlockContinueParserInterface
348
    {
349
        $active = \end($this->activeBlockParsers);
350
        if ($active === false) {
351
            throw new ParserLogicException('No active block parsers are available');
352
        }
353
354
        return $active;
355
    }
356
}
357