1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | /* |
||
6 | * This file is part of the league/commonmark package. |
||
7 | * |
||
8 | * (c) Colin O'Dell <[email protected]> |
||
9 | * |
||
10 | * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) |
||
11 | * - (c) John MacFarlane |
||
12 | * |
||
13 | * Additional code based on commonmark-java (https://github.com/commonmark/commonmark-java) |
||
14 | * - (c) Atlassian Pty Ltd |
||
15 | * |
||
16 | * For the full copyright and license information, please view the LICENSE |
||
17 | * file that was distributed with this source code. |
||
18 | */ |
||
19 | |||
20 | namespace League\CommonMark\Parser; |
||
21 | |||
22 | use League\CommonMark\Environment\EnvironmentInterface; |
||
23 | use League\CommonMark\Event\DocumentParsedEvent; |
||
24 | use League\CommonMark\Event\DocumentPreParsedEvent; |
||
25 | use League\CommonMark\Exception\CommonMarkException; |
||
26 | use League\CommonMark\Input\MarkdownInput; |
||
27 | use League\CommonMark\Node\Block\Document; |
||
28 | use League\CommonMark\Node\Block\Paragraph; |
||
29 | use League\CommonMark\Parser\Block\BlockContinueParserInterface; |
||
30 | use League\CommonMark\Parser\Block\BlockContinueParserWithInlinesInterface; |
||
31 | use League\CommonMark\Parser\Block\BlockStart; |
||
32 | use League\CommonMark\Parser\Block\BlockStartParserInterface; |
||
33 | use League\CommonMark\Parser\Block\DocumentBlockParser; |
||
34 | use League\CommonMark\Parser\Block\ParagraphParser; |
||
35 | use League\CommonMark\Reference\MemoryLimitedReferenceMap; |
||
0 ignored issues
–
show
|
|||
36 | use League\CommonMark\Reference\ReferenceInterface; |
||
37 | use League\CommonMark\Reference\ReferenceMap; |
||
38 | |||
39 | final class MarkdownParser implements MarkdownParserInterface |
||
40 | { |
||
41 | /** @psalm-readonly */ |
||
42 | private EnvironmentInterface $environment; |
||
43 | |||
44 | /** @psalm-readonly-allow-private-mutation */ |
||
45 | private int $maxNestingLevel; |
||
46 | |||
47 | /** @psalm-readonly-allow-private-mutation */ |
||
48 | private ReferenceMap $referenceMap; |
||
49 | |||
50 | /** @psalm-readonly-allow-private-mutation */ |
||
51 | private int $lineNumber = 0; |
||
52 | |||
53 | /** @psalm-readonly-allow-private-mutation */ |
||
54 | private Cursor $cursor; |
||
55 | |||
56 | /** |
||
57 | * @var array<int, BlockContinueParserInterface> |
||
58 | * |
||
59 | * @psalm-readonly-allow-private-mutation |
||
60 | */ |
||
61 | private array $activeBlockParsers = []; |
||
62 | |||
63 | /** |
||
64 | * @var array<int, BlockContinueParserWithInlinesInterface> |
||
65 | * |
||
66 | * @psalm-readonly-allow-private-mutation |
||
67 | */ |
||
68 | private array $closedBlockParsers = []; |
||
69 | |||
70 | 2334 | public function __construct(EnvironmentInterface $environment) |
|
71 | { |
||
72 | 2334 | $this->environment = $environment; |
|
73 | } |
||
74 | |||
75 | 2320 | private function initialize(): void |
|
76 | { |
||
77 | 2320 | $this->referenceMap = new ReferenceMap(); |
|
78 | 2320 | $this->lineNumber = 0; |
|
79 | 2320 | $this->activeBlockParsers = []; |
|
80 | 2320 | $this->closedBlockParsers = []; |
|
81 | |||
82 | 2320 | $this->maxNestingLevel = $this->environment->getConfiguration()->get('max_nesting_level'); |
|
83 | } |
||
84 | |||
85 | /** |
||
86 | * @throws CommonMarkException |
||
87 | */ |
||
88 | 2320 | public function parse(string $input): Document |
|
89 | { |
||
90 | 2320 | $this->initialize(); |
|
91 | |||
92 | 2320 | $documentParser = new DocumentBlockParser($this->referenceMap); |
|
93 | 2320 | $this->activateBlockParser($documentParser); |
|
94 | |||
95 | 2320 | $preParsedEvent = new DocumentPreParsedEvent($documentParser->getBlock(), new MarkdownInput($input)); |
|
96 | 2314 | $this->environment->dispatch($preParsedEvent); |
|
97 | 2304 | $markdownInput = $preParsedEvent->getMarkdown(); |
|
98 | |||
99 | 2304 | foreach ($markdownInput->getLines() as $lineNumber => $line) { |
|
100 | 2304 | $this->lineNumber = $lineNumber; |
|
101 | 2304 | $this->parseLine($line); |
|
102 | } |
||
103 | |||
104 | // finalizeAndProcess |
||
105 | 2304 | $this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber); |
|
106 | 2304 | $this->processInlines(\strlen($input)); |
|
107 | |||
108 | 2304 | $this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock())); |
|
109 | |||
110 | 2302 | return $documentParser->getBlock(); |
|
111 | } |
||
112 | |||
113 | /** |
||
114 | * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each |
||
115 | * line of input, then finalizing the document. |
||
116 | */ |
||
117 | 2304 | private function parseLine(string $line): void |
|
118 | { |
||
119 | // replace NUL characters for security |
||
120 | 2304 | $line = \str_replace("\0", "\u{FFFD}", $line); |
|
121 | |||
122 | 2304 | $this->cursor = new Cursor($line); |
|
123 | |||
124 | 2304 | $matches = $this->parseBlockContinuation(); |
|
125 | 2304 | if ($matches === null) { |
|
126 | 74 | return; |
|
127 | } |
||
128 | |||
129 | 2304 | $unmatchedBlocks = \count($this->activeBlockParsers) - $matches; |
|
130 | 2304 | $blockParser = $this->activeBlockParsers[$matches - 1]; |
|
131 | 2304 | $startedNewBlock = false; |
|
132 | |||
133 | // Unless last matched container is a code block, try new container starts, |
||
134 | // adding children to the last matched container: |
||
135 | 2304 | $tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer(); |
|
136 | 2304 | while ($tryBlockStarts) { |
|
137 | // this is a little performance optimization |
||
138 | 2304 | if ($this->cursor->isBlank()) { |
|
139 | 656 | $this->cursor->advanceToEnd(); |
|
140 | 656 | break; |
|
141 | } |
||
142 | |||
143 | 2304 | if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) { |
|
144 | 4 | break; |
|
145 | } |
||
146 | |||
147 | 2304 | $blockStart = $this->findBlockStart($blockParser); |
|
148 | 2304 | if ($blockStart === null || $blockStart->isAborting()) { |
|
149 | 1998 | $this->cursor->advanceToNextNonSpaceOrTab(); |
|
150 | 1998 | break; |
|
151 | } |
||
152 | |||
153 | 900 | if (($state = $blockStart->getCursorState()) !== null) { |
|
154 | 900 | $this->cursor->restoreState($state); |
|
155 | } |
||
156 | |||
157 | 900 | $startedNewBlock = true; |
|
158 | |||
159 | // We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now. |
||
160 | 900 | if ($unmatchedBlocks > 0) { |
|
161 | 218 | $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1); |
|
162 | 218 | $unmatchedBlocks = 0; |
|
163 | } |
||
164 | |||
165 | 900 | $oldBlockLineStart = null; |
|
166 | 900 | if ($blockStart->isReplaceActiveBlockParser()) { |
|
167 | 138 | $oldBlockLineStart = $this->prepareActiveBlockParserForReplacement(); |
|
168 | } |
||
169 | |||
170 | 900 | foreach ($blockStart->getBlockParsers() as $newBlockParser) { |
|
171 | 900 | $blockParser = $this->addChild($newBlockParser, $oldBlockLineStart); |
|
172 | 900 | $tryBlockStarts = $newBlockParser->isContainer(); |
|
173 | } |
||
174 | } |
||
175 | |||
176 | // What remains at the offset is a text line. Add the text to the appropriate block. |
||
177 | |||
178 | // First check for a lazy paragraph continuation: |
||
179 | 2304 | if (! $startedNewBlock && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) { |
|
180 | 358 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
181 | } else { |
||
182 | // finalize any blocks not matched |
||
183 | 2304 | if ($unmatchedBlocks > 0) { |
|
184 | 676 | $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1); |
|
185 | } |
||
186 | |||
187 | 2304 | if (! $blockParser->isContainer()) { |
|
188 | 660 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
189 | 2048 | } elseif (! $this->cursor->isBlank()) { |
|
190 | 2002 | $this->addChild(new ParagraphParser()); |
|
191 | 2002 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
|
192 | } |
||
193 | } |
||
194 | } |
||
195 | |||
196 | 2304 | private function parseBlockContinuation(): ?int |
|
197 | { |
||
198 | // For each containing block, try to parse the associated line start. |
||
199 | // The document will always match, so we can skip the first block parser and start at 1 matches |
||
200 | 2304 | $matches = 1; |
|
201 | 2304 | for ($i = 1; $i < \count($this->activeBlockParsers); $i++) { |
|
202 | 1152 | $blockParser = $this->activeBlockParsers[$i]; |
|
203 | 1152 | $blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser()); |
|
204 | 1152 | if ($blockContinue === null) { |
|
205 | 798 | break; |
|
206 | } |
||
207 | |||
208 | 746 | if ($blockContinue->isFinalize()) { |
|
209 | 74 | $this->closeBlockParsers(\count($this->activeBlockParsers) - $i, $this->lineNumber); |
|
210 | |||
211 | 74 | return null; |
|
212 | } |
||
213 | |||
214 | 742 | if (($state = $blockContinue->getCursorState()) !== null) { |
|
215 | 742 | $this->cursor->restoreState($state); |
|
216 | } |
||
217 | |||
218 | 742 | $matches++; |
|
219 | } |
||
220 | |||
221 | 2304 | return $matches; |
|
222 | } |
||
223 | |||
224 | 2304 | private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart |
|
225 | { |
||
226 | 2304 | $matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser); |
|
227 | |||
228 | 2304 | foreach ($this->environment->getBlockStartParsers() as $blockStartParser) { |
|
229 | \assert($blockStartParser instanceof BlockStartParserInterface); |
||
230 | 2304 | if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) { |
|
231 | 1500 | return $result; |
|
232 | } |
||
233 | } |
||
234 | |||
235 | 1058 | return null; |
|
236 | } |
||
237 | |||
238 | 2304 | private function closeBlockParsers(int $count, int $endLineNumber): void |
|
239 | { |
||
240 | 2304 | for ($i = 0; $i < $count; $i++) { |
|
241 | 2304 | $blockParser = $this->deactivateBlockParser(); |
|
242 | 2304 | $this->finalize($blockParser, $endLineNumber); |
|
243 | |||
244 | // phpcs:disable SlevomatCodingStandard.ControlStructures.EarlyExit.EarlyExitNotUsed |
||
245 | 2304 | if ($blockParser instanceof BlockContinueParserWithInlinesInterface) { |
|
246 | // Remember for inline parsing |
||
247 | 2090 | $this->closedBlockParsers[] = $blockParser; |
|
248 | } |
||
249 | } |
||
250 | } |
||
251 | |||
252 | /** |
||
253 | * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings, |
||
254 | * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference |
||
255 | * definitions. |
||
256 | */ |
||
257 | 2304 | private function finalize(BlockContinueParserInterface $blockParser, int $endLineNumber): void |
|
258 | { |
||
259 | 2304 | if ($blockParser instanceof ParagraphParser) { |
|
260 | 1914 | $this->updateReferenceMap($blockParser->getReferences()); |
|
261 | } |
||
262 | |||
263 | 2304 | $blockParser->getBlock()->setEndLine($endLineNumber); |
|
264 | 2304 | $blockParser->closeBlock(); |
|
265 | } |
||
266 | |||
267 | /** |
||
268 | * Walk through a block & children recursively, parsing string content into inline content where appropriate. |
||
269 | */ |
||
270 | 2304 | private function processInlines(int $inputSize): void |
|
271 | { |
||
272 | 2304 | $p = new InlineParserEngine($this->environment, new MemoryLimitedReferenceMap($this->referenceMap, $inputSize)); |
|
273 | |||
274 | 2304 | foreach ($this->closedBlockParsers as $blockParser) { |
|
275 | 2090 | $blockParser->parseInlines($p); |
|
276 | } |
||
277 | } |
||
278 | |||
279 | /** |
||
280 | * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try |
||
281 | * its parent, and so on til we find a block that can accept children. |
||
282 | */ |
||
283 | 2304 | private function addChild(BlockContinueParserInterface $blockParser, ?int $startLineNumber = null): BlockContinueParserInterface |
|
284 | { |
||
285 | 2304 | $blockParser->getBlock()->setStartLine($startLineNumber ?? $this->lineNumber); |
|
286 | |||
287 | 2304 | while (! $this->getActiveBlockParser()->canContain($blockParser->getBlock())) { |
|
288 | 130 | $this->closeBlockParsers(1, ($startLineNumber ?? $this->lineNumber) - 1); |
|
289 | } |
||
290 | |||
291 | 2304 | $this->getActiveBlockParser()->getBlock()->appendChild($blockParser->getBlock()); |
|
292 | 2304 | $this->activateBlockParser($blockParser); |
|
293 | |||
294 | 2304 | return $blockParser; |
|
295 | } |
||
296 | |||
297 | 2320 | private function activateBlockParser(BlockContinueParserInterface $blockParser): void |
|
298 | { |
||
299 | 2320 | $this->activeBlockParsers[] = $blockParser; |
|
300 | } |
||
301 | |||
302 | /** |
||
303 | * @throws ParserLogicException |
||
304 | */ |
||
305 | 2304 | private function deactivateBlockParser(): BlockContinueParserInterface |
|
306 | { |
||
307 | 2304 | $popped = \array_pop($this->activeBlockParsers); |
|
308 | 2304 | if ($popped === null) { |
|
309 | throw new ParserLogicException('The last block parser should not be deactivated'); |
||
310 | } |
||
311 | |||
312 | 2304 | return $popped; |
|
313 | } |
||
314 | |||
315 | /** |
||
316 | * @return int|null The line number where the old block started |
||
317 | */ |
||
318 | 138 | private function prepareActiveBlockParserForReplacement(): ?int |
|
319 | { |
||
320 | // Note that we don't want to parse inlines or finalize this block, as it's getting replaced. |
||
321 | 138 | $old = $this->deactivateBlockParser(); |
|
322 | |||
323 | 138 | if ($old instanceof ParagraphParser) { |
|
324 | 138 | $this->updateReferenceMap($old->getReferences()); |
|
325 | } |
||
326 | |||
327 | 138 | $old->getBlock()->detach(); |
|
328 | |||
329 | 138 | return $old->getBlock()->getStartLine(); |
|
330 | } |
||
331 | |||
332 | /** |
||
333 | * @param ReferenceInterface[] $references |
||
334 | */ |
||
335 | 2002 | private function updateReferenceMap(iterable $references): void |
|
336 | { |
||
337 | 2002 | foreach ($references as $reference) { |
|
338 | 168 | if (! $this->referenceMap->contains($reference->getLabel())) { |
|
339 | 168 | $this->referenceMap->add($reference); |
|
340 | } |
||
341 | } |
||
342 | } |
||
343 | |||
344 | /** |
||
345 | * @throws ParserLogicException |
||
346 | */ |
||
347 | 2304 | public function getActiveBlockParser(): BlockContinueParserInterface |
|
348 | { |
||
349 | 2304 | $active = \end($this->activeBlockParsers); |
|
350 | 2304 | if ($active === false) { |
|
351 | throw new ParserLogicException('No active block parsers are available'); |
||
352 | } |
||
353 | |||
354 | 2304 | return $active; |
|
355 | } |
||
356 | } |
||
357 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths