InlineParserEngine::parse() - Code Metrics - thephpleague/commonmark - Measure and Improve Code Quality continuously with Scrutinizer

InlineParserEngine::parse() B
last analyzed 2021-05-31 19:47 UTC

↳ Parent: InlineParserEngine

Complexity

Conditions	8
Paths	28

Size

Total Lines	53
Code Lines	26

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	26
CRAP Score	8

Importance

Changes

Metric	Value
eloc	26
c	0
b	0
f	0
dl	0
loc	53
ccs	26
cts	26
cp	1
rs	8.4444
cc	8
nc	28
nop	2
crap	8

How to fix Long Method

<?php

declare(strict_types=1);

/*
 * This file is part of the league/commonmark package.
 *
 * (c) Colin O'Dell <[email protected]>
 *
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
 *  - (c) John MacFarlane
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

namespace League\CommonMark\Parser;

use League\CommonMark\Environment\EnvironmentInterface;
use League\CommonMark\Node\Block\AbstractBlock;
use League\CommonMark\Node\Inline\AdjacentTextMerger;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Parser\Inline\InlineParserInterface;
use League\CommonMark\Reference\ReferenceMapInterface;

/**
 * @internal
 */
final class InlineParserEngine implements InlineParserEngineInterface
{
    /**
     * @var EnvironmentInterface
     *
     * @psalm-readonly
     */
    private $environment;

    /**
     * @var ReferenceMapInterface
     *
     * @psalm-readonly
     */
    private $referenceMap;

    /**
     * @var array<int, InlineParserInterface|string|bool>
     * @psalm-var list<array{0: InlineParserInterface, 1: string, 2: bool}>
     * @phpstan-var array<int, array{0: InlineParserInterface, 1: string, 2: bool}>
     */
    private $parsers;

    public function __construct(EnvironmentInterface $environment, ReferenceMapInterface $referenceMap)
    {
        $this->environment  = $environment;
        $this->referenceMap = $referenceMap;

        foreach ($environment->getInlineParsers() as $parser) {
            \assert($parser instanceof InlineParserInterface);
            $regex = $parser->getMatchDefinition()->getRegex();

            $this->parsers[] = [$parser, $regex, \strlen($regex) !== \mb_strlen($regex)];
        }
    }

    public function parse(string $contents, AbstractBlock $block): void
    {
        $contents = \trim($contents);
        $cursor   = new Cursor($contents);

        $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap);

        // Have all parsers look at the line to determine what they might want to parse and what positions they exist at
        foreach ($this->matchParsers($contents) as $matchPosition => $parsers) {
            $currentPosition = $cursor->getPosition();
            // We've already gone past this point
            if ($currentPosition > $matchPosition) {
                continue;
            }

            // We've skipped over some uninteresting text that should be added as a plain text node
            if ($currentPosition < $matchPosition) {
                $cursor->advanceBy($matchPosition - $currentPosition);
                $this->addPlainText($cursor->getPreviousText(), $block);
            }

            // We're now at a potential start - see which of the current parsers can handle it
            $parsed = false;
            foreach ($parsers as [$parser, $matches]) {
                \assert($parser instanceof InlineParserInterface);
                if ($parser->parse($inlineParserContext->withMatches($matches))) {
                    // A parser has successfully handled the text at the given position; don't consider any others at this position
                    $parsed = true;
                    break;
                }
            }

            if ($parsed) {
                continue;
            }

            // Despite potentially being interested, nothing actually parsed text here, so add the current character and continue onwards
            $this->addPlainText((string) $cursor->getCharacter(), $block);
            $cursor->advance();
        }

        // Add any remaining text that wasn't parsed
        if (! $cursor->isAtEnd()) {
            $this->addPlainText($cursor->getRemainder(), $block);
        }

        // Process any delimiters that were found
        $delimiterStack = $inlineParserContext->getDelimiterStack();
        $delimiterStack->processDelimiters(null, $this->environment->getDelimiterProcessors());
        $delimiterStack->removeAll();

        // Combine adjacent text notes into one
        AdjacentTextMerger::mergeChildNodes($block);
    }

    private function addPlainText(string $text, AbstractBlock $container): void
    {
        $lastInline = $container->lastChild();
        if ($lastInline instanceof Text && ! $lastInline->data->has('delim')) {
            $lastInline->append($text);
        } else {
            $container->appendChild(new Text($text));
        }
    }

    /**
     * Given the current line, ask all the parsers which parts of the text they would be interested in parsing.
     *
     * The resulting array provides a list of character positions, which parsers are interested in trying to parse
     * the text at those points, and (for convenience/optimization) what the matching text happened to be.
     *
     * @return array<array<int, InlineParserInterface|string>>
     *
     * @psalm-return array<int, list<array{0: InlineParserInterface, 1: non-empty-array<string>}>>
     *
     * @phpstan-return array<int, array<int, array{0: InlineParserInterface, 1: non-empty-array<string>}>>
     */
    private function matchParsers(string $contents): array
    {
        $contents    = \trim($contents);
        $isMultibyte = \mb_strlen($contents) !== \strlen($contents);

        $ret = [];

        foreach ($this->parsers as [$parser, $regex, $isRegexMultibyte]) {
            if ($isMultibyte || $isRegexMultibyte) {
                $regex .= 'u';
            }

            // See if the parser's InlineParserMatch regex matched against any part of the string
            if (! \preg_match_all($regex, $contents, $matches, \PREG_OFFSET_CAPTURE | \PREG_SET_ORDER)) {
                continue;
            }

            // For each part that matched...
            foreach ($matches as $match) {
                if ($isMultibyte) {
                    // PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
                    $offset = \mb_strlen(\substr($contents, 0, $match[0][1]), 'UTF-8');
                } else {
                    $offset = $match[0][1];
                }

                \assert(\is_int($offset));

                // Remove the offsets, keeping only the matched text
                $m = \array_map(static function (array $s): string {
                    return (string) $s[0];
                }, $match);

                if ($m === []) {
                    continue;
                }

                // Add this match to the list of character positions to stop at
                $ret[$offset][] = [$parser, $m];
            }
        }

        // Sort matches by position so we visit them in order
        \ksort($ret);

        return $ret;
    }
}


1		<?php
2
3		declare(strict_types=1);
4
5		/*
6		* This file is part of the league/commonmark package.
7		*
8		* (c) Colin O'Dell <[email protected]>
9		*
10		* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11		* - (c) John MacFarlane
12		*
13		* For the full copyright and license information, please view the LICENSE
14		* file that was distributed with this source code.
15		*/
16
17		namespace League\CommonMark\Parser;
18
19		use League\CommonMark\Environment\EnvironmentInterface;
20		use League\CommonMark\Node\Block\AbstractBlock;
21		use League\CommonMark\Node\Inline\AdjacentTextMerger;
22		use League\CommonMark\Node\Inline\Text;
23		use League\CommonMark\Parser\Inline\InlineParserInterface;
24		use League\CommonMark\Reference\ReferenceMapInterface;
25
26		/**
27		* @internal
28		*/
29		final class InlineParserEngine implements InlineParserEngineInterface
30		{
31		/**
32		* @var EnvironmentInterface
33		*
34		* @psalm-readonly
35		*/
36		private $environment;
37
38		/**
39		* @var ReferenceMapInterface
40		*
41		* @psalm-readonly
42		*/
43		private $referenceMap;
44
45		/**
46		* @var array<int, InlineParserInterface\|string\|bool>
47		* @psalm-var list<array{0: InlineParserInterface, 1: string, 2: bool}>
48		* @phpstan-var array<int, array{0: InlineParserInterface, 1: string, 2: bool}>
49		*/
50		private $parsers;
51
52	2988	public function __construct(EnvironmentInterface $environment, ReferenceMapInterface $referenceMap)
53		{
54	2988	$this->environment = $environment;
55	2988	$this->referenceMap = $referenceMap;
56
57	2988	foreach ($environment->getInlineParsers() as $parser) {
58		\assert($parser instanceof InlineParserInterface);
59	2988	$regex = $parser->getMatchDefinition()->getRegex();
60
61	2988	$this->parsers[] = [$parser, $regex, \strlen($regex) !== \mb_strlen($regex)];
62		}
63	2988	}
64
65	2694	public function parse(string $contents, AbstractBlock $block): void
66		{
67	2694	$contents = \trim($contents);
68	2694	$cursor = new Cursor($contents);
69
70	2694	$inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap);
71
72		// Have all parsers look at the line to determine what they might want to parse and what positions they exist at
73	2694	foreach ($this->matchParsers($contents) as $matchPosition => $parsers) {
74	2070	$currentPosition = $cursor->getPosition();
75		// We've already gone past this point
76	2070	if ($currentPosition > $matchPosition) {
77	927	continue;
78		}
79
80		// We've skipped over some uninteresting text that should be added as a plain text node
81	2070	if ($currentPosition < $matchPosition) {
82	1797	$cursor->advanceBy($matchPosition - $currentPosition);
83	1797	$this->addPlainText($cursor->getPreviousText(), $block);
84		}
85
86		// We're now at a potential start - see which of the current parsers can handle it
87	2070	$parsed = false;
88	2070	foreach ($parsers as [$parser, $matches]) {
89		\assert($parser instanceof InlineParserInterface);
90	2070	if ($parser->parse($inlineParserContext->withMatches($matches))) {
91		// A parser has successfully handled the text at the given position; don't consider any others at this position
92	2058	$parsed = true;
93	2058	break;
94		}
95		}
96
97	2070	if ($parsed) {
98	2058	continue;
99		}
100
101		// Despite potentially being interested, nothing actually parsed text here, so add the current character and continue onwards
102	186	$this->addPlainText((string) $cursor->getCharacter(), $block);
103	186	$cursor->advance();
104		}
105
106		// Add any remaining text that wasn't parsed
107	2694	if (! $cursor->isAtEnd()) {
108	1371	$this->addPlainText($cursor->getRemainder(), $block);
109		}
110
111		// Process any delimiters that were found
112	2694	$delimiterStack = $inlineParserContext->getDelimiterStack();
113	2694	$delimiterStack->processDelimiters(null, $this->environment->getDelimiterProcessors());
114	2694	$delimiterStack->removeAll();
115
116		// Combine adjacent text notes into one
117	2694	AdjacentTextMerger::mergeChildNodes($block);
118	2694	}
119
120	2493	private function addPlainText(string $text, AbstractBlock $container): void
121		{
122	2493	$lastInline = $container->lastChild();
123	2493	if ($lastInline instanceof Text && ! $lastInline->data->has('delim')) {
124	270	$lastInline->append($text);
125		} else {
126	2457	$container->appendChild(new Text($text));
127		}
128	2493	}
129
130		/**
131		* Given the current line, ask all the parsers which parts of the text they would be interested in parsing.
132		*
133		* The resulting array provides a list of character positions, which parsers are interested in trying to parse
134		* the text at those points, and (for convenience/optimization) what the matching text happened to be.
135		*
136		* @return array<array<int, InlineParserInterface\|string>>
137		*
138		* @psalm-return array<int, list<array{0: InlineParserInterface, 1: non-empty-array<string>}>>
139		*
140		* @phpstan-return array<int, array<int, array{0: InlineParserInterface, 1: non-empty-array<string>}>>
141		*/
142	2694	private function matchParsers(string $contents): array
143		{
144	2694	$contents = \trim($contents);
145	2694	$isMultibyte = \mb_strlen($contents) !== \strlen($contents);
146
147	2694	$ret = [];
148
149	2694	foreach ($this->parsers as [$parser, $regex, $isRegexMultibyte]) {
150	2694	if ($isMultibyte \|\| $isRegexMultibyte) {
151	111	$regex .= 'u';
152		}
153
154		// See if the parser's InlineParserMatch regex matched against any part of the string
155	2694	if (! \preg_match_all($regex, $contents, $matches, \PREG_OFFSET_CAPTURE \| \PREG_SET_ORDER)) {
156	2688	continue;
157		}
158
159		// For each part that matched...
160	2070	foreach ($matches as $match) {
161	2070	if ($isMultibyte) {
162		// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
163	57	$offset = \mb_strlen(\substr($contents, 0, $match[0][1]), 'UTF-8');
164		} else {
165	2013	$offset = $match[0][1];
166		}
167
168		\assert(\is_int($offset));
169
170		// Remove the offsets, keeping only the matched text
171	1380	$m = \array_map(static function (array $s): string {
172	2070	return (string) $s[0];
173	2070	}, $match);
174
175	2070	if ($m === []) {
176		continue;
177		}
178
179		// Add this match to the list of character positions to stop at
180	2070	$ret[$offset][] = [$parser, $m];
181		}
182		}
183
184		// Sort matches by position so we visit them in order
185	2694	\ksort($ret);
186
187	2694	return $ret;
188		}
189		}
190

thephpleague / commonmark

InlineParserEngine::parse() B last analyzed 2021-05-31 19:47 UTC

Complexity

Size

Duplication

Code Coverage

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like

InlineParserEngine::parse() B
last analyzed 2021-05-31 19:47 UTC