Completed
Push — master ( 83739a...a13ee4 )
by Colin
14s queued 11s
created

UrlAutolinkProcessor   A

Complexity

Total Complexity 19

Size/Duplication

Total Lines 132
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 6

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 19
lcom 0
cbo 6
dl 0
loc 132
ccs 47
cts 47
cp 1
rs 10
c 0
b 0
f 0

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
A __invoke() 0 11 4
B processAutolinks() 0 48 10
A addLink() 0 11 2
A hasMoreCloserParensThanOpeners() 0 15 2
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark\Extension\Autolink;
13
14
use League\CommonMark\Event\DocumentParsedEvent;
15
use League\CommonMark\Inline\Element\Link;
16
use League\CommonMark\Inline\Element\Text;
17
18
final class UrlAutolinkProcessor
19
{
20
    // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php
21
    const REGEX = '~
22
        (?<=^|[ \\t\\n\\x0b\\x0c\\x0d*_\\~\\(])  # Can only come at the beginning of a line, after whitespace, or certain delimiting characters
23
        (
24
            # Must start with a supported scheme + auth, or "www"
25
            (?:
26
                (?:%s)://                                 # protocol
27
                (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth
28
            |www\.)
29
            (?:
30
                (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
31
                    |                                                 # or
32
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
33
                    |                                                 # or
34
                \[
35
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
36
                \]  # an IPv6 address
37
            )
38
            (?::[0-9]+)?                              # a port (optional)
39
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path
40
            (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional)
41
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional)
42
        )~ixu';
43
44
    private $finalRegex;
45
46 177
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
47
    {
48 177
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
49 177
    }
50
51
    /**
52
     * @param DocumentParsedEvent $e
53
     *
54
     * @return void
55
     */
56 177
    public function __invoke(DocumentParsedEvent $e)
57
    {
58 177
        $walker = $e->getDocument()->walker();
59
60 177
        while ($event = $walker->next()) {
61 177
            $node = $event->getNode();
62 177
            if ($node instanceof Text && !($node->parent() instanceof Link)) {
63 162
                self::processAutolinks($node, $this->finalRegex);
64
            }
65
        }
66 177
    }
67
68 162
    private static function processAutolinks(Text $node, $regex)
69
    {
70 162
        $contents = \preg_split($regex, $node->getContent(), -1, PREG_SPLIT_DELIM_CAPTURE);
71
72 162
        if ($contents === false || \count($contents) === 1) {
73 75
            return;
74
        }
75
76 87
        $leftovers = '';
77 87
        foreach ($contents as $i => $content) {
78
            // Even-indexed elements are things before/after the URLs
79 87
            if ($i % 2 === 0) {
80
                // Insert any left-over characters here as well
81 87
                $text = $leftovers . $content;
82 87
                if ($text !== '') {
83 51
                    $node->insertBefore(new Text($leftovers . $content));
84
                }
85
86 87
                $leftovers = '';
87 87
                continue;
88
            }
89
90 87
            $leftovers = '';
91
92
            // Does the URL end with punctuation that should be stripped?
93 87
            if (\preg_match('/(.+)([?!.,:*_~]+)$/', $content, $matches)) {
94
                // Add the punctuation later
95 24
                $content = $matches[1];
96 24
                $leftovers = $matches[2];
97
            }
98
99
            // Does the URL end with something that looks like an entity reference?
100 87
            if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $content, $matches)) {
101 6
                $content = $matches[1];
102 6
                $leftovers = $matches[2] . $leftovers;
103
            }
104
105
            // Does the URL need its closing paren chopped off?
106 87
            if (\substr($content, -1) === ')' && self::hasMoreCloserParensThanOpeners($content)) {
107 12
                $content = \substr($content, 0, -1);
108 12
                $leftovers = ')' . $leftovers;
109
            }
110
111 87
            self::addLink($node, $content);
112
        }
113
114 87
        $node->detach();
115 87
    }
116
117 87
    private static function addLink(Text $node, $url)
118
    {
119
        // Auto-prefix 'http://' onto 'www' URLs
120 87
        if (\substr($url, 0, 4) === 'www.') {
121 48
            $node->insertBefore(new Link('http://' . $url, $url));
122
123 48
            return;
124
        }
125
126 39
        $node->insertBefore(new Link($url, $url));
127 39
    }
128
129
    /**
130
     * @param string $content
131
     *
132
     * @return bool
133
     */
134 15
    private static function hasMoreCloserParensThanOpeners($content)
135
    {
136
        // Scan the entire autolink for the total number of parentheses.
137
        // If there is a greater number of closing parentheses than opening ones,
138
        // we don’t consider the last character part of the autolink, in order to
139
        // facilitate including an autolink inside a parenthesis.
140 15
        \preg_match_all('/[()]/', $content, $matches);
141
142 15
        $charCount = ['(' => 0, ')' => 0];
143 15
        foreach ($matches[0] as $char) {
144 15
            $charCount[$char]++;
145
        }
146
147 15
        return $charCount[')'] > $charCount['('];
148
    }
149
}
150