Completed
Push — latest ( 76d169...995567 )
by Colin
22s queued 10s
created

UrlAutolinkParser::parse()   B

Complexity

Conditions 8
Paths 18

Size

Total Lines 45
Code Lines 19

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 8.0079

Importance

Changes 0
Metric Value
eloc 19
c 0
b 0
f 0
dl 0
loc 45
ccs 19
cts 20
cp 0.95
rs 8.4444
cc 8
nc 18
nop 2
crap 8.0079
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Extension\Autolink;
15
16
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
17
use League\CommonMark\Parser\Inline\InlineParserInterface;
18
use League\CommonMark\Parser\Inline\InlineParserMatch;
19
use League\CommonMark\Parser\InlineParserContext;
20
21
final class UrlAutolinkParser implements InlineParserInterface
22
{
23
    private const ALLOWED_AFTER = [null, ' ', "\t", "\n", "\x0b", "\x0c", "\x0d", '*', '_', '~', '('];
24
25
    // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php
26
    private const REGEX = '~
27
        (
28
            # Must start with a supported scheme + auth, or "www"
29
            (?:
30
                (?:%s)://                                 # protocol
31
                (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth
32
            |www\.)
33
            (?:
34
                (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
35
                    |                                                 # or
36
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
37
                    |                                                 # or
38
                \[
39
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
40
                \]  # an IPv6 address
41
            )
42
            (?::[0-9]+)?                              # a port (optional)
43
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path
44
            (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional)
45
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional)
46
        )~ixu';
47
48
    /**
49
     * @var string[]
50
     *
51
     * @psalm-readonly
52
     */
53
    private $prefixes = ['www'];
54
55
    /**
56
     * @var string
57
     *
58
     * @psalm-readonly
59
     */
60
    private $finalRegex;
61
62
    /**
63
     * @param array<int, string> $allowedProtocols
64
     */
65 234
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
66
    {
67 234
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
68
69 234
        foreach ($allowedProtocols as $protocol) {
70 234
            $this->prefixes[] = $protocol . '://';
71
        }
72 234
    }
73
74 234
    public function getMatchDefinition(): InlineParserMatch
75
    {
76 234
        return InlineParserMatch::oneOf(...$this->prefixes);
77
    }
78
79 99
    public function parse(string $match, InlineParserContext $inlineContext): bool
80
    {
81 99
        $cursor = $inlineContext->getCursor();
82
83
        // Autolinks can only come at the beginning of a line, after whitespace, or certain delimiting characters
84 99
        $previousChar = $cursor->peek(-1);
85 99
        if (! \in_array($previousChar, self::ALLOWED_AFTER, true)) {
86
            return false;
87
        }
88
89
        // Check if we have a valid URL
90 99
        if (! \preg_match($this->finalRegex, $cursor->getRemainder(), $matches)) {
91 3
            return false;
92
        }
93
94 96
        $url = $matches[0];
95
96
        // Does the URL end with punctuation that should be stripped?
97 96
        if (\preg_match('/(.+)([?!.,:*_~]+)$/', $url, $matches)) {
98
            // Add the punctuation later
99 24
            $url = $matches[1];
100
        }
101
102
        // Does the URL end with something that looks like an entity reference?
103 96
        if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $url, $matches)) {
104 6
            $url = $matches[1];
105
        }
106
107
        // Does the URL need unmatched parens chopped off?
108 96
        if (\substr($url, -1) === ')' && ($diff = self::diffParens($url)) > 0) {
109 15
            $url = \substr($url, 0, -$diff);
110
        }
111
112 96
        $cursor->advanceBy(\mb_strlen($url));
113
114
        // Auto-prefix 'http://' onto 'www' URLs
115 96
        if (\substr($url, 0, 4) === 'www.') {
116 54
            $inlineContext->getContainer()->appendChild(new Link('http://' . $url, $url));
117
118 54
            return true;
119
        }
120
121 42
        $inlineContext->getContainer()->appendChild(new Link($url, $url));
122
123 42
        return true;
124
    }
125
126
    /**
127
     * @psalm-pure
128
     */
129 24
    private static function diffParens(string $content): int
130
    {
131
        // Scan the entire autolink for the total number of parentheses.
132
        // If there is a greater number of closing parentheses than opening ones,
133
        // we don’t consider ANY of the last characters as part of the autolink,
134
        // in order to facilitate including an autolink inside a parenthesis.
135 24
        \preg_match_all('/[()]/', $content, $matches);
136
137 24
        $charCount = ['(' => 0, ')' => 0];
138 24
        foreach ($matches[0] as $char) {
139 24
            $charCount[$char]++;
140
        }
141
142 24
        return $charCount[')'] - $charCount['('];
143
    }
144
}
145