Completed
Push — master ( 9eb60a...7a0a03 )
by Colin
36:39 queued 35:14
created

hasMoreCloserParensThanOpeners()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
dl 0
loc 15
ccs 0
cts 9
cp 0
rs 9.7666
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 6
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark\Extension\Autolink;
13
14
use League\CommonMark\Event\DocumentParsedEvent;
15
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
16
use League\CommonMark\Node\Inline\Text;
17
18
final class UrlAutolinkProcessor
19
{
20
    // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php
21
    const REGEX = '~
22
        (?<=^|[ \\t\\n\\x0b\\x0c\\x0d*_\\~\\(])  # Can only come at the beginning of a line, after whitespace, or certain delimiting characters
23
        (
24
            # Must start with a supported scheme + auth, or "www"
25
            (?:
26
                (?:%s)://                                 # protocol
27
                (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth
28
            |www\.)
29
            (?:
30
                (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
31
                    |                                                 # or
32
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
33
                    |                                                 # or
34
                \[
35
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
36
                \]  # an IPv6 address
37
            )
38
            (?::[0-9]+)?                              # a port (optional)
39
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path
40
            (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional)
41
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional)
42
        )~ixu';
43
44
    /** @var string */
45
    private $finalRegex;
46
47
    /**
48
     * @param array<int, string> $allowedProtocols
49
     */
50
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
51
    {
52
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
53
    }
54
55
    public function __invoke(DocumentParsedEvent $e): void
56
    {
57
        $walker = $e->getDocument()->walker();
58
59
        while ($event = $walker->next()) {
60
            $node = $event->getNode();
61
            if ($node instanceof Text && !($node->parent() instanceof Link)) {
62
                self::processAutolinks($node, $this->finalRegex);
63
            }
64
        }
65
    }
66
67
    private static function processAutolinks(Text $node, string $regex): void
68
    {
69
        $contents = \preg_split($regex, $node->getContent(), -1, PREG_SPLIT_DELIM_CAPTURE);
70
71
        if ($contents === false || \count($contents) === 1) {
72
            return;
73
        }
74
75
        $leftovers = '';
76
        foreach ($contents as $i => $content) {
77
            // Even-indexed elements are things before/after the URLs
78
            if ($i % 2 === 0) {
79
                // Insert any left-over characters here as well
80
                $text = $leftovers . $content;
81
                if ($text !== '') {
82
                    $node->insertBefore(new Text($leftovers . $content));
83
                }
84
85
                $leftovers = '';
86
                continue;
87
            }
88
89
            $leftovers = '';
90
91
            // Does the URL end with punctuation that should be stripped?
92
            if (\preg_match('/(.+)([?!.,:*_~]+)$/', $content, $matches)) {
93
                // Add the punctuation later
94
                $content = $matches[1];
95
                $leftovers = $matches[2];
96
            }
97
98
            // Does the URL end with something that looks like an entity reference?
99
            if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $content, $matches)) {
100
                $content = $matches[1];
101
                $leftovers = $matches[2] . $leftovers;
102
            }
103
104
            // Does the URL need its closing paren chopped off?
105
            if (\substr($content, -1) === ')' && self::hasMoreCloserParensThanOpeners($content)) {
106
                $content = \substr($content, 0, -1);
107
                $leftovers = ')' . $leftovers;
108
            }
109
110
            self::addLink($node, $content);
111
        }
112
113
        $node->detach();
114
    }
115
116
    private static function addLink(Text $node, string $url): void
117
    {
118
        // Auto-prefix 'http://' onto 'www' URLs
119
        if (\substr($url, 0, 4) === 'www.') {
120
            $node->insertBefore(new Link('http://' . $url, $url));
121
122
            return;
123
        }
124
125
        $node->insertBefore(new Link($url, $url));
126
    }
127
128
    /**
129
     * @param string $content
130
     *
131
     * @return bool
132
     */
133
    private static function hasMoreCloserParensThanOpeners(string $content): bool
134
    {
135
        // Scan the entire autolink for the total number of parentheses.
136
        // If there is a greater number of closing parentheses than opening ones,
137
        // we don’t consider the last character part of the autolink, in order to
138
        // facilitate including an autolink inside a parenthesis.
139
        \preg_match_all('/[()]/', $content, $matches);
140
141
        $charCount = ['(' => 0, ')' => 0];
142
        foreach ($matches[0] as $char) {
143
            $charCount[$char]++;
144
        }
145
146
        return $charCount[')'] > $charCount['('];
147
    }
148
}
149