Passed
Push — commonmark-spec-0.31 ( f00425...7b65f6 )
by Colin
03:07
created

UrlAutolinkParser   A

Complexity

Total Complexity 13

Size/Duplication

Total Lines 131
Duplicated Lines 0 %

Test Coverage

Coverage 96.88%

Importance

Changes 0
Metric Value
eloc 33
c 0
b 0
f 0
dl 0
loc 131
ccs 31
cts 32
cp 0.9688
rs 10
wmc 13

4 Methods

Rating   Name   Duplication   Size   Complexity  
A getMatchDefinition() 0 3 1
A __construct() 0 9 2
A diffParens() 0 14 2
B parse() 0 45 8
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Extension\Autolink;
15
16
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
17
use League\CommonMark\Parser\Inline\InlineParserInterface;
18
use League\CommonMark\Parser\Inline\InlineParserMatch;
19
use League\CommonMark\Parser\InlineParserContext;
20
21
final class UrlAutolinkParser implements InlineParserInterface
22
{
23
    private const ALLOWED_AFTER = [null, ' ', "\t", "\n", "\x0b", "\x0c", "\x0d", '*', '_', '~', '('];
24
25
    // RegEx adapted from https://github.com/symfony/symfony/blob/6.3/src/Symfony/Component/Validator/Constraints/UrlValidator.php
26
    private const REGEX = '~
27
        (
28
            # Must start with a supported scheme + auth, or "www"
29
            (?:
30
                (?:%s)://                                                                            # protocol
31
                (?:(?:(?:[\_\.\pL\pN-]|%%[0-9A-Fa-f]{2})+:)?((?:[\_\.\pL\pN-]|%%[0-9A-Fa-f]{2})+)@)? # basic auth
32
            |www\.)
33
            (?:
34
                (?:
35
                    (?:xn--[a-z0-9-]++\.)*+xn--[a-z0-9-]++            # a domain name using punycode
36
                        |
37
                    (?:[\pL\pN\pS\pM\-\_]++\.)+[\pL\pN\pM]++          # a multi-level domain name
38
                        |
39
                    [a-z0-9\-\_]++                                    # a single-level domain name
40
                )\.?
41
                    |                                                 # or
42
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
43
                    |                                                 # or
44
                \[
45
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
46
                \]  # an IPv6 address
47
            )
48
            (?::[0-9]+)?                              # a port (optional)
49
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*        # a path
50
            (?:\? (?:[\pL\pN\-._\~!$&\'\[\]()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )? # a query (optional)
51
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?     # a fragment (optional)
52
        )~ixu';
53
54
    /**
55
     * @var string[]
56
     *
57
     * @psalm-readonly
58
     */
59
    private array $prefixes = ['www.'];
60
61
    /**
62
     * @psalm-var non-empty-string
63
     *
64
     * @psalm-readonly
65
     */
66
    private string $finalRegex;
67
68
    /**
69
     * @param array<int, string> $allowedProtocols
70
     */
71 182
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
72
    {
73
        /**
74
         * @psalm-suppress PropertyTypeCoercion
75
         */
76 182
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
77
78 182
        foreach ($allowedProtocols as $protocol) {
79 182
            $this->prefixes[] = $protocol . '://';
80
        }
81
    }
82
83 182
    public function getMatchDefinition(): InlineParserMatch
84
    {
85 182
        return InlineParserMatch::oneOf(...$this->prefixes);
86
    }
87
88 80
    public function parse(InlineParserContext $inlineContext): bool
89
    {
90 80
        $cursor = $inlineContext->getCursor();
91
92
        // Autolinks can only come at the beginning of a line, after whitespace, or certain delimiting characters
93 80
        $previousChar = $cursor->peek(-1);
94 80
        if (! \in_array($previousChar, self::ALLOWED_AFTER, true)) {
95 2
            return false;
96
        }
97
98
        // Check if we have a valid URL
99 78
        if (! \preg_match($this->finalRegex, $cursor->getRemainder(), $matches)) {
100
            return false;
101
        }
102
103 78
        $url = $matches[0];
104
105
        // Does the URL end with punctuation that should be stripped?
106 78
        if (\preg_match('/(.+?)([?!.,:*_~]+)$/', $url, $matches)) {
107
            // Add the punctuation later
108 18
            $url = $matches[1];
109
        }
110
111
        // Does the URL end with something that looks like an entity reference?
112 78
        if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $url, $matches)) {
113 4
            $url = $matches[1];
114
        }
115
116
        // Does the URL need unmatched parens chopped off?
117 78
        if (\substr($url, -1) === ')' && ($diff = self::diffParens($url)) > 0) {
118 10
            $url = \substr($url, 0, -$diff);
119
        }
120
121 78
        $cursor->advanceBy(\mb_strlen($url, 'UTF-8'));
122
123
        // Auto-prefix 'http://' onto 'www' URLs
124 78
        if (\substr($url, 0, 4) === 'www.') {
125 40
            $inlineContext->getContainer()->appendChild(new Link('http://' . $url, $url));
126
127 40
            return true;
128
        }
129
130 38
        $inlineContext->getContainer()->appendChild(new Link($url, $url));
131
132 38
        return true;
133
    }
134
135
    /**
136
     * @psalm-pure
137
     */
138 16
    private static function diffParens(string $content): int
139
    {
140
        // Scan the entire autolink for the total number of parentheses.
141
        // If there is a greater number of closing parentheses than opening ones,
142
        // we don’t consider ANY of the last characters as part of the autolink,
143
        // in order to facilitate including an autolink inside a parenthesis.
144 16
        \preg_match_all('/[()]/', $content, $matches);
145
146 16
        $charCount = ['(' => 0, ')' => 0];
147 16
        foreach ($matches[0] as $char) {
148 16
            $charCount[$char]++;
149
        }
150
151 16
        return $charCount[')'] - $charCount['('];
152
    }
153
}
154