Issues in LinkParserHelper.php (renovate/phpunit-phpunit-12.x) - Issues in renovate/phpunit-phpunit-12.x - thephpleague/commonmark - Measure and Improve Code Quality continuously with Scrutinizer

Issues (85)

src/Util/LinkParserHelper.php (1 issue)

Severity

Unknown 1

<?php

declare(strict_types=1);

/*
 * This file is part of the league/commonmark package.
 *
 * (c) Colin O'Dell <[email protected]>
 *
 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
 *  - (c) John MacFarlane
 *
 * For the full copyright and license information, please view the LICENSE
 * file that was distributed with this source code.
 */

namespace League\CommonMark\Util;

use League\CommonMark\Parser\Cursor;

/**
 * @psalm-immutable
 */
final class LinkParserHelper
{
    /**
     * Attempt to parse link destination
     *
     * @return string|null The string, or null if no match
     */
    public static function parseLinkDestination(Cursor $cursor): ?string
    {
        if ($cursor->getCurrentCharacter() === '<') {
            return self::parseDestinationBraces($cursor);
        }

        $destination = self::manuallyParseLinkDestination($cursor);
        if ($destination === null) {
            return null;
        }

        return UrlEncoder::unescapeAndEncode(
            RegexHelper::unescape($destination)
        );
    }

    public static function parseLinkLabel(Cursor $cursor): int
    {
        $match = $cursor->match('/^\[(?:[^\\\\\[\]]|\\\\.){0,1000}\]/');
        if ($match === null) {
            return 0;
        }

        $length = \mb_strlen($match, 'UTF-8');

        if ($length > 1001) {
            return 0;
        }

        return $length;
    }

    public static function parsePartialLinkLabel(Cursor $cursor): ?string
    {
        return $cursor->match('/^(?:[^\\\\\[\]]++|\\\\.?)*+/');
    }

    /**
     * Attempt to parse link title (sans quotes)
     *
     * @return string|null The string, or null if no match
     */
    public static function parseLinkTitle(Cursor $cursor): ?string
    {
        if ($title = $cursor->match('/' . RegexHelper::PARTIAL_LINK_TITLE . '/')) {
            // Chop off quotes from title and unescape
            return RegexHelper::unescape(\substr($title, 1, -1));
        }

        return null;
    }

    public static function parsePartialLinkTitle(Cursor $cursor, string $endDelimiter): ?string
    {
        $endDelimiter = \preg_quote($endDelimiter, '/');
        $regex        = \sprintf('/(%s|[^%s\x00])*(?:%s)?/', RegexHelper::PARTIAL_ESCAPED_CHAR, $endDelimiter, $endDelimiter);
        if (($partialTitle = $cursor->match($regex)) === null) {
            return null;
        }

        return RegexHelper::unescape($partialTitle);
    }

    private static function manuallyParseLinkDestination(Cursor $cursor): ?string
    {
        $remainder  = $cursor->getRemainder();
        $openParens = 0;
        $len        = \strlen($remainder);
        for ($i = 0; $i < $len; $i++) {
            $c = $remainder[$i];
            if ($c === '\\' && $i + 1 < $len && RegexHelper::isEscapable($remainder[$i + 1])) {
                $i++;
            } elseif ($c === '(') {
                $openParens++;
                // Limit to 32 nested parens for pathological cases
                if ($openParens > 32) {
                    return null;
                }
            } elseif ($c === ')') {
                if ($openParens < 1) {
                    break;
                }

                $openParens--;
            } elseif (\ord($c) <= 32 && RegexHelper::isWhitespace($c)) {
                break;
            }
        }

        if ($openParens !== 0) {

            return null;
        }

        if ($i === 0 && (! isset($c) || $c !== ')')) {
            return null;
        }

        $destination = \substr($remainder, 0, $i);
        $cursor->advanceBy(\mb_strlen($destination, 'UTF-8'));

        return $destination;
    }

    /** @var \WeakReference<Cursor>|null */
    private static ?\WeakReference $lastCursor       = null;
    private static bool $lastCursorLacksClosingBrace = false;

    private static function parseDestinationBraces(Cursor $cursor): ?string
    {
        // Optimization: If we've previously parsed this cursor and returned `null`, we know
        // that no closing brace exists, so we can skip the regex entirely. This helps avoid
        // certain pathological cases where the regex engine can take a very long time to
        // determine that no match exists.
        if (self::$lastCursor !== null && self::$lastCursor->get() === $cursor) {
            if (self::$lastCursorLacksClosingBrace) {
                return null;
            }
        } else {
            self::$lastCursor = \WeakReference::create($cursor);
        }

        if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) {
            self::$lastCursorLacksClosingBrace = false;

            // Chop off surrounding <..>:
            return UrlEncoder::unescapeAndEncode(
                RegexHelper::unescape(\substr($res, 1, -1))
            );
        }

        self::$lastCursorLacksClosingBrace = true;

        return null;
    }
}


1		<?php
2
3		declare(strict_types=1);
4
5		/*
6		* This file is part of the league/commonmark package.
7		*
8		* (c) Colin O'Dell <[email protected]>
9		*
10		* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
11		* - (c) John MacFarlane
12		*
13		* For the full copyright and license information, please view the LICENSE
14		* file that was distributed with this source code.
15		*/
16
17		namespace League\CommonMark\Util;
18
19		use League\CommonMark\Parser\Cursor;
20
21		/**
22		* @psalm-immutable
23		*/
24		final class LinkParserHelper
25		{
26		/**
27		* Attempt to parse link destination
28		*
29		* @return string\|null The string, or null if no match
30		*/
31	402	public static function parseLinkDestination(Cursor $cursor): ?string
32		{
33	402	if ($cursor->getCurrentCharacter() === '<') {
34	34	return self::parseDestinationBraces($cursor);
35		}
36
37	370	$destination = self::manuallyParseLinkDestination($cursor);
38	370	if ($destination === null) {
39		return null;
40		}
41
42	370	return UrlEncoder::unescapeAndEncode(
43	370	RegexHelper::unescape($destination)
44	370	);
45		}
46
47	226	public static function parseLinkLabel(Cursor $cursor): int
48		{
49	226	$match = $cursor->match('/^\[(?:[^\\\\\[\]]\|\\\\.){0,1000}\]/');
50	226	if ($match === null) {
51	172	return 0;
52		}
53
54	62	$length = \mb_strlen($match, 'UTF-8');
55
56	62	if ($length > 1001) {
57		return 0;
58		}
59
60	62	return $length;
61		}
62
63	338	public static function parsePartialLinkLabel(Cursor $cursor): ?string
64		{
65	338	return $cursor->match('/^(?:[^\\\\\[\]]++\|\\\\.?)*+/');
66		}
67
68		/**
69		* Attempt to parse link title (sans quotes)
70		*
71		* @return string\|null The string, or null if no match
72		*/
73	44	public static function parseLinkTitle(Cursor $cursor): ?string
74		{
75	44	if ($title = $cursor->match('/' . RegexHelper::PARTIAL_LINK_TITLE . '/')) {
76		// Chop off quotes from title and unescape
77	22	return RegexHelper::unescape(\substr($title, 1, -1));
78		}
79
80	22	return null;
81		}
82
83	82	public static function parsePartialLinkTitle(Cursor $cursor, string $endDelimiter): ?string
84		{
85	82	$endDelimiter = \preg_quote($endDelimiter, '/');
86	82	$regex = \sprintf('/(%s\|[^%s\x00])*(?:%s)?/', RegexHelper::PARTIAL_ESCAPED_CHAR, $endDelimiter, $endDelimiter);
87	82	if (($partialTitle = $cursor->match($regex)) === null) {
88		return null;
89		}
90
91	82	return RegexHelper::unescape($partialTitle);
92		}
93
94	370	private static function manuallyParseLinkDestination(Cursor $cursor): ?string
95		{
96	370	$remainder = $cursor->getRemainder();
97	370	$openParens = 0;
98	370	$len = \strlen($remainder);
99	370	for ($i = 0; $i < $len; $i++) {
100	370	$c = $remainder[$i];
101	370	if ($c === '\\' && $i + 1 < $len && RegexHelper::isEscapable($remainder[$i + 1])) {
102	12	$i++;
103	370	} elseif ($c === '(') {
104	16	$openParens++;
105		// Limit to 32 nested parens for pathological cases
106	16	if ($openParens > 32) {
107	8	return null;
108		}
109	370	} elseif ($c === ')') {
110	160	if ($openParens < 1) {
111	156	break;
112		}
113
114	16	$openParens--;
115	364	} elseif (\ord($c) <= 32 && RegexHelper::isWhitespace($c)) {
116	126	break;
117		}
118		}
119
120	370	if ($openParens !== 0) {
		0 ignored issues – show introduced 2024-12-07 15:48 UTC by Report Bug Copy Issue Report Show Similar Issues like this The condition `$openParens !== 0` is always `false`. Loading history...
121		return null;
122		}
123
124	370	if ($i === 0 && (! isset($c) \|\| $c !== ')')) {
125		return null;
126		}
127
128	370	$destination = \substr($remainder, 0, $i);
129	370	$cursor->advanceBy(\mb_strlen($destination, 'UTF-8'));
130
131	370	return $destination;
132		}
133
134		/** @var \WeakReference<Cursor>\|null */
135		private static ?\WeakReference $lastCursor = null;
136		private static bool $lastCursorLacksClosingBrace = false;
137
138	34	private static function parseDestinationBraces(Cursor $cursor): ?string
139		{
140		// Optimization: If we've previously parsed this cursor and returned `null`, we know
141		// that no closing brace exists, so we can skip the regex entirely. This helps avoid
142		// certain pathological cases where the regex engine can take a very long time to
143		// determine that no match exists.
144	34	if (self::$lastCursor !== null && self::$lastCursor->get() === $cursor) {
145	2	if (self::$lastCursorLacksClosingBrace) {
146	2	return null;
147		}
148		} else {
149	34	self::$lastCursor = \WeakReference::create($cursor);
150		}
151
152	34	if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) {
153	24	self::$lastCursorLacksClosingBrace = false;
154
155		// Chop off surrounding <..>:
156	24	return UrlEncoder::unescapeAndEncode(
157	24	RegexHelper::unescape(\substr($res, 1, -1))
158	24	);
159		}
160
161	10	self::$lastCursorLacksClosingBrace = true;
162
163	10	return null;
164		}
165		}
166

thephpleague / commonmark

Issues (85)

src/Util/LinkParserHelper.php (1 issue)

Severity

Introduced By

Duplication Side-by-Side

Filter issues like