thephpleague /
commonmark
| 1 | <?php |
||
| 2 | |||
| 3 | declare(strict_types=1); |
||
| 4 | |||
| 5 | /* |
||
| 6 | * This file is part of the league/commonmark package. |
||
| 7 | * |
||
| 8 | * (c) Colin O'Dell <[email protected]> |
||
| 9 | * |
||
| 10 | * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) |
||
| 11 | * - (c) John MacFarlane |
||
| 12 | * |
||
| 13 | * Additional code based on commonmark-java (https://github.com/commonmark/commonmark-java) |
||
| 14 | * - (c) Atlassian Pty Ltd |
||
| 15 | * |
||
| 16 | * For the full copyright and license information, please view the LICENSE |
||
| 17 | * file that was distributed with this source code. |
||
| 18 | */ |
||
| 19 | |||
| 20 | namespace League\CommonMark\Parser; |
||
| 21 | |||
| 22 | use League\CommonMark\Environment\EnvironmentInterface; |
||
| 23 | use League\CommonMark\Event\DocumentParsedEvent; |
||
| 24 | use League\CommonMark\Event\DocumentPreParsedEvent; |
||
| 25 | use League\CommonMark\Exception\CommonMarkException; |
||
| 26 | use League\CommonMark\Input\MarkdownInput; |
||
| 27 | use League\CommonMark\Node\Block\Document; |
||
| 28 | use League\CommonMark\Node\Block\Paragraph; |
||
| 29 | use League\CommonMark\Parser\Block\BlockContinueParserInterface; |
||
| 30 | use League\CommonMark\Parser\Block\BlockContinueParserWithInlinesInterface; |
||
| 31 | use League\CommonMark\Parser\Block\BlockStart; |
||
| 32 | use League\CommonMark\Parser\Block\BlockStartParserInterface; |
||
| 33 | use League\CommonMark\Parser\Block\DocumentBlockParser; |
||
| 34 | use League\CommonMark\Parser\Block\ParagraphParser; |
||
| 35 | use League\CommonMark\Reference\MemoryLimitedReferenceMap; |
||
|
0 ignored issues
–
show
|
|||
| 36 | use League\CommonMark\Reference\ReferenceInterface; |
||
| 37 | use League\CommonMark\Reference\ReferenceMap; |
||
| 38 | |||
| 39 | final class MarkdownParser implements MarkdownParserInterface |
||
| 40 | { |
||
| 41 | /** @psalm-readonly */ |
||
| 42 | private EnvironmentInterface $environment; |
||
| 43 | |||
| 44 | /** @psalm-readonly-allow-private-mutation */ |
||
| 45 | private int $maxNestingLevel; |
||
| 46 | |||
| 47 | /** @psalm-readonly-allow-private-mutation */ |
||
| 48 | private ReferenceMap $referenceMap; |
||
| 49 | |||
| 50 | /** @psalm-readonly-allow-private-mutation */ |
||
| 51 | private int $lineNumber = 0; |
||
| 52 | |||
| 53 | /** @psalm-readonly-allow-private-mutation */ |
||
| 54 | private Cursor $cursor; |
||
| 55 | |||
| 56 | /** |
||
| 57 | * @var array<int, BlockContinueParserInterface> |
||
| 58 | * |
||
| 59 | * @psalm-readonly-allow-private-mutation |
||
| 60 | */ |
||
| 61 | private array $activeBlockParsers = []; |
||
| 62 | |||
| 63 | /** |
||
| 64 | * @var array<int, BlockContinueParserWithInlinesInterface> |
||
| 65 | * |
||
| 66 | * @psalm-readonly-allow-private-mutation |
||
| 67 | */ |
||
| 68 | private array $closedBlockParsers = []; |
||
| 69 | |||
| 70 | public function __construct(EnvironmentInterface $environment) |
||
| 71 | { |
||
| 72 | $this->environment = $environment; |
||
| 73 | } |
||
| 74 | |||
| 75 | private function initialize(): void |
||
| 76 | { |
||
| 77 | $this->referenceMap = new ReferenceMap(); |
||
| 78 | $this->lineNumber = 0; |
||
| 79 | $this->activeBlockParsers = []; |
||
| 80 | $this->closedBlockParsers = []; |
||
| 81 | |||
| 82 | $this->maxNestingLevel = $this->environment->getConfiguration()->get('max_nesting_level'); |
||
| 83 | } |
||
| 84 | |||
| 85 | /** |
||
| 86 | * @throws CommonMarkException |
||
| 87 | */ |
||
| 88 | public function parse(string $input): Document |
||
| 89 | { |
||
| 90 | $this->initialize(); |
||
| 91 | |||
| 92 | $documentParser = new DocumentBlockParser($this->referenceMap); |
||
| 93 | $this->activateBlockParser($documentParser); |
||
| 94 | |||
| 95 | $preParsedEvent = new DocumentPreParsedEvent($documentParser->getBlock(), new MarkdownInput($input)); |
||
| 96 | $this->environment->dispatch($preParsedEvent); |
||
| 97 | $markdownInput = $preParsedEvent->getMarkdown(); |
||
| 98 | |||
| 99 | foreach ($markdownInput->getLines() as $lineNumber => $line) { |
||
| 100 | $this->lineNumber = $lineNumber; |
||
| 101 | $this->parseLine($line); |
||
| 102 | } |
||
| 103 | |||
| 104 | // finalizeAndProcess |
||
| 105 | $this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber); |
||
| 106 | $this->processInlines(\strlen($input)); |
||
| 107 | |||
| 108 | $this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock())); |
||
| 109 | |||
| 110 | return $documentParser->getBlock(); |
||
| 111 | } |
||
| 112 | |||
| 113 | /** |
||
| 114 | * Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each |
||
| 115 | * line of input, then finalizing the document. |
||
| 116 | */ |
||
| 117 | private function parseLine(string $line): void |
||
| 118 | { |
||
| 119 | // replace NUL characters for security |
||
| 120 | $line = \str_replace("\0", "\u{FFFD}", $line); |
||
| 121 | |||
| 122 | $this->cursor = new Cursor($line); |
||
| 123 | |||
| 124 | $matches = $this->parseBlockContinuation(); |
||
| 125 | if ($matches === null) { |
||
| 126 | return; |
||
| 127 | } |
||
| 128 | |||
| 129 | $unmatchedBlocks = \count($this->activeBlockParsers) - $matches; |
||
| 130 | $blockParser = $this->activeBlockParsers[$matches - 1]; |
||
| 131 | $startedNewBlock = false; |
||
| 132 | |||
| 133 | // Unless last matched container is a code block, try new container starts, |
||
| 134 | // adding children to the last matched container: |
||
| 135 | $tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer(); |
||
| 136 | while ($tryBlockStarts) { |
||
| 137 | // this is a little performance optimization |
||
| 138 | if ($this->cursor->isBlank()) { |
||
| 139 | $this->cursor->advanceToEnd(); |
||
| 140 | break; |
||
| 141 | } |
||
| 142 | |||
| 143 | if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) { |
||
| 144 | break; |
||
| 145 | } |
||
| 146 | |||
| 147 | $blockStart = $this->findBlockStart($blockParser); |
||
| 148 | if ($blockStart === null || $blockStart->isAborting()) { |
||
| 149 | $this->cursor->advanceToNextNonSpaceOrTab(); |
||
| 150 | break; |
||
| 151 | } |
||
| 152 | |||
| 153 | if (($state = $blockStart->getCursorState()) !== null) { |
||
| 154 | $this->cursor->restoreState($state); |
||
| 155 | } |
||
| 156 | |||
| 157 | $startedNewBlock = true; |
||
| 158 | |||
| 159 | // We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now. |
||
| 160 | if ($unmatchedBlocks > 0) { |
||
| 161 | $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1); |
||
| 162 | $unmatchedBlocks = 0; |
||
| 163 | } |
||
| 164 | |||
| 165 | $oldBlockLineStart = null; |
||
| 166 | if ($blockStart->isReplaceActiveBlockParser()) { |
||
| 167 | $oldBlockLineStart = $this->prepareActiveBlockParserForReplacement(); |
||
| 168 | } |
||
| 169 | |||
| 170 | foreach ($blockStart->getBlockParsers() as $newBlockParser) { |
||
| 171 | $blockParser = $this->addChild($newBlockParser, $oldBlockLineStart); |
||
| 172 | $tryBlockStarts = $newBlockParser->isContainer(); |
||
| 173 | } |
||
| 174 | } |
||
| 175 | |||
| 176 | // What remains at the offset is a text line. Add the text to the appropriate block. |
||
| 177 | |||
| 178 | // First check for a lazy paragraph continuation: |
||
| 179 | if (! $startedNewBlock && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) { |
||
| 180 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
||
| 181 | } else { |
||
| 182 | // finalize any blocks not matched |
||
| 183 | if ($unmatchedBlocks > 0) { |
||
| 184 | $this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1); |
||
| 185 | } |
||
| 186 | |||
| 187 | if (! $blockParser->isContainer()) { |
||
| 188 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
||
| 189 | } elseif (! $this->cursor->isBlank()) { |
||
| 190 | $this->addChild(new ParagraphParser()); |
||
| 191 | $this->getActiveBlockParser()->addLine($this->cursor->getRemainder()); |
||
| 192 | } |
||
| 193 | } |
||
| 194 | } |
||
| 195 | |||
| 196 | private function parseBlockContinuation(): ?int |
||
| 197 | { |
||
| 198 | // For each containing block, try to parse the associated line start. |
||
| 199 | // The document will always match, so we can skip the first block parser and start at 1 matches |
||
| 200 | $matches = 1; |
||
| 201 | for ($i = 1; $i < \count($this->activeBlockParsers); $i++) { |
||
| 202 | $blockParser = $this->activeBlockParsers[$i]; |
||
| 203 | $blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser()); |
||
| 204 | if ($blockContinue === null) { |
||
| 205 | break; |
||
| 206 | } |
||
| 207 | |||
| 208 | if ($blockContinue->isFinalize()) { |
||
| 209 | $this->closeBlockParsers(\count($this->activeBlockParsers) - $i, $this->lineNumber); |
||
| 210 | |||
| 211 | return null; |
||
| 212 | } |
||
| 213 | |||
| 214 | if (($state = $blockContinue->getCursorState()) !== null) { |
||
| 215 | $this->cursor->restoreState($state); |
||
| 216 | } |
||
| 217 | |||
| 218 | $matches++; |
||
| 219 | } |
||
| 220 | |||
| 221 | return $matches; |
||
| 222 | } |
||
| 223 | |||
| 224 | private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart |
||
| 225 | { |
||
| 226 | $matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser); |
||
| 227 | |||
| 228 | foreach ($this->environment->getBlockStartParsers() as $blockStartParser) { |
||
| 229 | \assert($blockStartParser instanceof BlockStartParserInterface); |
||
| 230 | if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) { |
||
| 231 | return $result; |
||
| 232 | } |
||
| 233 | } |
||
| 234 | |||
| 235 | return null; |
||
| 236 | } |
||
| 237 | |||
| 238 | private function closeBlockParsers(int $count, int $endLineNumber): void |
||
| 239 | { |
||
| 240 | for ($i = 0; $i < $count; $i++) { |
||
| 241 | $blockParser = $this->deactivateBlockParser(); |
||
| 242 | $this->finalize($blockParser, $endLineNumber); |
||
| 243 | |||
| 244 | // phpcs:disable SlevomatCodingStandard.ControlStructures.EarlyExit.EarlyExitNotUsed |
||
| 245 | if ($blockParser instanceof BlockContinueParserWithInlinesInterface) { |
||
| 246 | // Remember for inline parsing |
||
| 247 | $this->closedBlockParsers[] = $blockParser; |
||
| 248 | } |
||
| 249 | } |
||
| 250 | } |
||
| 251 | |||
| 252 | /** |
||
| 253 | * Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings, |
||
| 254 | * setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference |
||
| 255 | * definitions. |
||
| 256 | */ |
||
| 257 | private function finalize(BlockContinueParserInterface $blockParser, int $endLineNumber): void |
||
| 258 | { |
||
| 259 | if ($blockParser instanceof ParagraphParser) { |
||
| 260 | $this->updateReferenceMap($blockParser->getReferences()); |
||
| 261 | } |
||
| 262 | |||
| 263 | $blockParser->getBlock()->setEndLine($endLineNumber); |
||
| 264 | $blockParser->closeBlock(); |
||
| 265 | } |
||
| 266 | |||
| 267 | /** |
||
| 268 | * Walk through a block & children recursively, parsing string content into inline content where appropriate. |
||
| 269 | */ |
||
| 270 | private function processInlines(int $inputSize): void |
||
| 271 | { |
||
| 272 | $p = new InlineParserEngine($this->environment, new MemoryLimitedReferenceMap($this->referenceMap, $inputSize)); |
||
| 273 | |||
| 274 | foreach ($this->closedBlockParsers as $blockParser) { |
||
| 275 | $blockParser->parseInlines($p); |
||
| 276 | } |
||
| 277 | } |
||
| 278 | |||
| 279 | /** |
||
| 280 | * Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try |
||
| 281 | * its parent, and so on til we find a block that can accept children. |
||
| 282 | */ |
||
| 283 | private function addChild(BlockContinueParserInterface $blockParser, ?int $startLineNumber = null): BlockContinueParserInterface |
||
| 284 | { |
||
| 285 | $blockParser->getBlock()->setStartLine($startLineNumber ?? $this->lineNumber); |
||
| 286 | |||
| 287 | while (! $this->getActiveBlockParser()->canContain($blockParser->getBlock())) { |
||
| 288 | $this->closeBlockParsers(1, ($startLineNumber ?? $this->lineNumber) - 1); |
||
| 289 | } |
||
| 290 | |||
| 291 | $this->getActiveBlockParser()->getBlock()->appendChild($blockParser->getBlock()); |
||
| 292 | $this->activateBlockParser($blockParser); |
||
| 293 | |||
| 294 | return $blockParser; |
||
| 295 | } |
||
| 296 | |||
| 297 | private function activateBlockParser(BlockContinueParserInterface $blockParser): void |
||
| 298 | { |
||
| 299 | $this->activeBlockParsers[] = $blockParser; |
||
| 300 | } |
||
| 301 | |||
| 302 | /** |
||
| 303 | * @throws ParserLogicException |
||
| 304 | */ |
||
| 305 | private function deactivateBlockParser(): BlockContinueParserInterface |
||
| 306 | { |
||
| 307 | $popped = \array_pop($this->activeBlockParsers); |
||
| 308 | if ($popped === null) { |
||
| 309 | throw new ParserLogicException('The last block parser should not be deactivated'); |
||
| 310 | } |
||
| 311 | |||
| 312 | return $popped; |
||
| 313 | } |
||
| 314 | |||
| 315 | /** |
||
| 316 | * @return int|null The line number where the old block started |
||
| 317 | */ |
||
| 318 | private function prepareActiveBlockParserForReplacement(): ?int |
||
| 319 | { |
||
| 320 | // Note that we don't want to parse inlines or finalize this block, as it's getting replaced. |
||
| 321 | $old = $this->deactivateBlockParser(); |
||
| 322 | |||
| 323 | if ($old instanceof ParagraphParser) { |
||
| 324 | $this->updateReferenceMap($old->getReferences()); |
||
| 325 | } |
||
| 326 | |||
| 327 | $old->getBlock()->detach(); |
||
| 328 | |||
| 329 | return $old->getBlock()->getStartLine(); |
||
| 330 | } |
||
| 331 | |||
| 332 | /** |
||
| 333 | * @param ReferenceInterface[] $references |
||
| 334 | */ |
||
| 335 | private function updateReferenceMap(iterable $references): void |
||
| 336 | { |
||
| 337 | foreach ($references as $reference) { |
||
| 338 | if (! $this->referenceMap->contains($reference->getLabel())) { |
||
| 339 | $this->referenceMap->add($reference); |
||
| 340 | } |
||
| 341 | } |
||
| 342 | } |
||
| 343 | |||
| 344 | /** |
||
| 345 | * @throws ParserLogicException |
||
| 346 | */ |
||
| 347 | public function getActiveBlockParser(): BlockContinueParserInterface |
||
| 348 | { |
||
| 349 | $active = \end($this->activeBlockParsers); |
||
| 350 | if ($active === false) { |
||
| 351 | throw new ParserLogicException('No active block parsers are available'); |
||
| 352 | } |
||
| 353 | |||
| 354 | return $active; |
||
| 355 | } |
||
| 356 | } |
||
| 357 |
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths