Passed
Push — 2.0 ( b7ed7b...934f6b )
by Colin
35:11 queued 31:18
created

UrlAutolinkParser::diffParens()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 14
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 5
c 0
b 0
f 0
dl 0
loc 14
ccs 6
cts 6
cp 1
rs 10
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Extension\Autolink;
15
16
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
17
use League\CommonMark\Parser\Inline\InlineParserInterface;
18
use League\CommonMark\Parser\Inline\InlineParserMatch;
19
use League\CommonMark\Parser\InlineParserContext;
20
21
final class UrlAutolinkParser implements InlineParserInterface
22
{
23
    private const ALLOWED_AFTER = [null, ' ', "\t", "\n", "\x0b", "\x0c", "\x0d", '*', '_', '~', '('];
24
25
    // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php
26
    private const REGEX = '~
27
        (
28
            # Must start with a supported scheme + auth, or "www"
29
            (?:
30
                (?:%s)://                                 # protocol
31
                (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth
32
            |www\.)
33
            (?:
34
                (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
35
                    |                                                 # or
36
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
37
                    |                                                 # or
38
                \[
39
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
40
                \]  # an IPv6 address
41
            )
42
            (?::[0-9]+)?                              # a port (optional)
43
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path
44
            (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional)
45
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional)
46
        )~ixu';
47
48
    /**
49
     * @var string[]
50
     *
51
     * @psalm-readonly
52
     */
53
    private array $prefixes = ['www'];
54
55
    /** @psalm-readonly */
56
    private string $finalRegex;
57
58
    /**
59
     * @param array<int, string> $allowedProtocols
60
     */
61 237
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
62
    {
63 237
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
64
65 237
        foreach ($allowedProtocols as $protocol) {
66 237
            $this->prefixes[] = $protocol . '://';
67
        }
68 237
    }
69
70 237
    public function getMatchDefinition(): InlineParserMatch
71
    {
72 237
        return InlineParserMatch::oneOf(...$this->prefixes);
73
    }
74
75 102
    public function parse(InlineParserContext $inlineContext): bool
76
    {
77 102
        $cursor = $inlineContext->getCursor();
78
79
        // Autolinks can only come at the beginning of a line, after whitespace, or certain delimiting characters
80 102
        $previousChar = $cursor->peek(-1);
81 102
        if (! \in_array($previousChar, self::ALLOWED_AFTER, true)) {
82
            return false;
83
        }
84
85
        // Check if we have a valid URL
86 102
        if (! \preg_match($this->finalRegex, $cursor->getRemainder(), $matches)) {
87 3
            return false;
88
        }
89
90 99
        $url = $matches[0];
91
92
        // Does the URL end with punctuation that should be stripped?
93 99
        if (\preg_match('/(.+)([?!.,:*_~]+)$/', $url, $matches)) {
94
            // Add the punctuation later
95 24
            $url = $matches[1];
96
        }
97
98
        // Does the URL end with something that looks like an entity reference?
99 99
        if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $url, $matches)) {
100 6
            $url = $matches[1];
101
        }
102
103
        // Does the URL need unmatched parens chopped off?
104 99
        if (\substr($url, -1) === ')' && ($diff = self::diffParens($url)) > 0) {
105 15
            $url = \substr($url, 0, -$diff);
106
        }
107
108 99
        $cursor->advanceBy(\mb_strlen($url));
109
110
        // Auto-prefix 'http://' onto 'www' URLs
111 99
        if (\substr($url, 0, 4) === 'www.') {
112 57
            $inlineContext->getContainer()->appendChild(new Link('http://' . $url, $url));
113
114 57
            return true;
115
        }
116
117 42
        $inlineContext->getContainer()->appendChild(new Link($url, $url));
118
119 42
        return true;
120
    }
121
122
    /**
123
     * @psalm-pure
124
     */
125 24
    private static function diffParens(string $content): int
126
    {
127
        // Scan the entire autolink for the total number of parentheses.
128
        // If there is a greater number of closing parentheses than opening ones,
129
        // we don’t consider ANY of the last characters as part of the autolink,
130
        // in order to facilitate including an autolink inside a parenthesis.
131 24
        \preg_match_all('/[()]/', $content, $matches);
132
133 24
        $charCount = ['(' => 0, ')' => 0];
134 24
        foreach ($matches[0] as $char) {
135 24
            $charCount[$char]++;
136
        }
137
138 24
        return $charCount[')'] - $charCount['('];
139
    }
140
}
141