Passed
Push — latest ( caaec2...2ad737 )
by Colin
02:16
created

hasMoreCloserParensThanOpeners()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 14
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 5
dl 0
loc 14
ccs 6
cts 6
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
declare(strict_types=1);
4
5
/*
6
 * This file is part of the league/commonmark package.
7
 *
8
 * (c) Colin O'Dell <[email protected]>
9
 *
10
 * For the full copyright and license information, please view the LICENSE
11
 * file that was distributed with this source code.
12
 */
13
14
namespace League\CommonMark\Extension\Autolink;
15
16
use League\CommonMark\Event\DocumentParsedEvent;
17
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
18
use League\CommonMark\Node\Inline\Text;
19
20
final class UrlAutolinkProcessor
21
{
22
    // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php
23
    private const REGEX = '~
24
        (?<=^|[ \\t\\n\\x0b\\x0c\\x0d*_\\~\\(])  # Can only come at the beginning of a line, after whitespace, or certain delimiting characters
25
        (
26
            # Must start with a supported scheme + auth, or "www"
27
            (?:
28
                (?:%s)://                                 # protocol
29
                (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth
30
            |www\.)
31
            (?:
32
                (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
33
                    |                                                 # or
34
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
35
                    |                                                 # or
36
                \[
37
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
38
                \]  # an IPv6 address
39
            )
40
            (?::[0-9]+)?                              # a port (optional)
41
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path
42
            (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional)
43
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional)
44
        )~ixu';
45
46
    /**
47
     * @var string
48
     *
49
     * @psalm-readonly
50
     */
51
    private $finalRegex;
52
53
    /**
54
     * @param array<int, string> $allowedProtocols
55
     */
56 231
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
57
    {
58 231
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
59 231
    }
60
61 231
    public function __invoke(DocumentParsedEvent $e): void
62
    {
63 231
        $walker = $e->getDocument()->walker();
64
65 231
        while ($event = $walker->next()) {
66 231
            $node = $event->getNode();
67 231
            if ($node instanceof Text && ! ($node->parent() instanceof Link)) {
68 213
                self::processAutolinks($node, $this->finalRegex);
69
            }
70
        }
71 231
    }
72
73 213
    private static function processAutolinks(Text $node, string $regex): void
74
    {
75 213
        $contents = \preg_split($regex, $node->getLiteral(), -1, PREG_SPLIT_DELIM_CAPTURE);
76
77 213
        if ($contents === false || \count($contents) === 1) {
78 120
            return;
79
        }
80
81 93
        $leftovers = '';
82 93
        foreach ($contents as $i => $content) {
83
            // Even-indexed elements are things before/after the URLs
84 93
            if ($i % 2 === 0) {
85
                // Insert any left-over characters here as well
86 93
                $text = $leftovers . $content;
87 93
                if ($text !== '') {
88 57
                    $node->insertBefore(new Text($leftovers . $content));
89
                }
90
91 93
                $leftovers = '';
92 93
                continue;
93
            }
94
95 93
            $leftovers = '';
96
97
            // Does the URL end with punctuation that should be stripped?
98 93
            if (\preg_match('/(.+)([?!.,:*_~]+)$/', $content, $matches)) {
99
                // Add the punctuation later
100 24
                $content   = $matches[1];
101 24
                $leftovers = $matches[2];
102
            }
103
104
            // Does the URL end with something that looks like an entity reference?
105 93
            if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $content, $matches)) {
106 6
                $content   = $matches[1];
107 6
                $leftovers = $matches[2] . $leftovers;
108
            }
109
110
            // Does the URL need unmatched parens chopped off?
111 93
            if (\substr($content, -1) === ')' && ($diff = self::diffParens($content)) > 0) {
112 15
                $content   = \substr($content, 0, -$diff);
113 15
                $leftovers = \str_repeat(')', $diff) . $leftovers;
114
            }
115
116 93
            self::addLink($node, $content);
117
        }
118
119 93
        $node->detach();
120 93
    }
121
122 93
    private static function addLink(Text $node, string $url): void
123
    {
124
        // Auto-prefix 'http://' onto 'www' URLs
125 93
        if (\substr($url, 0, 4) === 'www.') {
126 54
            $node->insertBefore(new Link('http://' . $url, $url));
127
128 54
            return;
129
        }
130
131 39
        $node->insertBefore(new Link($url, $url));
132 39
    }
133
134
    /**
135
     * @psalm-pure
136
     */
137 21
    private static function diffParens(string $content): int
138
    {
139
        // Scan the entire autolink for the total number of parentheses.
140
        // If there is a greater number of closing parentheses than opening ones,
141
        // we don’t consider ANY of the last characters as part of the autolink,
142
        // in order to facilitate including an autolink inside a parenthesis.
143 21
        \preg_match_all('/[()]/', $content, $matches);
144
145 21
        $charCount = ['(' => 0, ')' => 0];
146 21
        foreach ($matches[0] as $char) {
147 21
            $charCount[$char]++;
148
        }
149
150 21
        return $charCount[')'] - $charCount['('];
151
    }
152
}
153