Completed
Push — master ( 45070b...de2ce9 )
by Colin
02:22
created

UrlAutolinkProcessor::__invoke()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 6
cts 6
cp 1
rs 9.9332
c 0
b 0
f 0
cc 3
nc 3
nop 1
crap 3
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark-ext-autolink package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark\Ext\Autolink;
13
14
use League\CommonMark\Event\DocumentParsedEvent;
15
use League\CommonMark\Inline\Element\Link;
16
use League\CommonMark\Inline\Element\Text;
17
18
final class UrlAutolinkProcessor
19
{
20
    // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php
21
    const REGEX = '~
22
        (?<=^|[ \\t\\n\\x0b\\x0c\\x0d*_\\~\\(])  # Can only come at the beginning of a line, after whitespace, or certain delimiting characters
23
        (
24
            # Must start with a supported scheme + auth, or "www"
25
            (?:
26
                (?:%s)://                                 # protocol
27
                (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth
28
            |www\.)
29
            (?:
30
                (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
31
                    |                                                 # or
32
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
33
                    |                                                 # or
34
                \[
35
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
36
                \]  # an IPv6 address
37
            )
38
            (?::[0-9]+)?                              # a port (optional)
39
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path
40
            (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional)
41
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional)
42
        )~ixu';
43
44
    private $finalRegex;
45
46 96
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
47
    {
48 96
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
49 96
    }
50
51
    /**
52
     * @param DocumentParsedEvent $e
53
     *
54
     * @return void
55
     */
56 96
    public function __invoke(DocumentParsedEvent $e)
57
    {
58 96
        $walker = $e->getDocument()->walker();
59
60 96
        while ($event = $walker->next()) {
61 96
            if ($event->getNode() instanceof Text) {
62 96
                self::processAutolinks($event->getNode(), $this->finalRegex);
63
            }
64
        }
65 96
    }
66
67 96
    private static function processAutolinks(Text $node, $regex)
68
    {
69 96
        $contents = \preg_split($regex, $node->getContent(), -1, PREG_SPLIT_DELIM_CAPTURE);
70
71 96
        if (\count($contents) === 1) {
72 36
            return;
73
        }
74
75 60
        $leftovers = '';
76 60
        foreach ($contents as $i => $content) {
77
            // Even-indexed elements are things before/after the URLs
78 60
            if ($i % 2 === 0) {
79
                // Insert any left-over characters here as well
80 60
                $text = $leftovers . $content;
81 60
                if ($text !== '') {
82 30
                    $node->insertBefore(new Text($leftovers . $content));
83
                }
84
85 60
                $leftovers = '';
86 60
                continue;
87
            }
88
89 60
            $leftovers = '';
90
91
            // Does the URL end with punctuation that should be stripped?
92 60
            if (\preg_match('/(.+)([?!.,:*_~]+)$/', $content, $matches)) {
93
                // Add the punctuation later
94 15
                $content = $matches[1];
95 15
                $leftovers = $matches[2];
96
            }
97
98
            // Does the URL end with something that looks like an entity reference?
99 60
            if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $content, $matches)) {
100 3
                $content = $matches[1];
101 3
                $leftovers = $matches[2] . $leftovers;
102
            }
103
104
            // Does the URL need its closing paren chopped off?
105 60
            if (\substr($content, -1) === ')' && self::hasMoreCloserParensThanOpeners($content)) {
106 3
                $content = \substr($content, 0, -1);
107 3
                $leftovers .= ')';
108
            }
109
110 60
            self::addLink($node, $content);
111
        }
112
113 60
        $node->detach();
114 60
    }
115
116 60
    private static function addLink(Text $node, $url)
117
    {
118
        // Auto-prefix 'http://' onto 'www' URLs
119 60
        if (\substr($url, 0, 4) === 'www.') {
120 27
            $node->insertBefore(new Link('http://' . $url, $url));
121
122 27
            return;
123
        }
124
125 33
        $node->insertBefore(new Link($url, $url));
126 33
    }
127
128
    /**
129
     * @param string $content
130
     *
131
     * @return bool
132
     */
133 6
    private static function hasMoreCloserParensThanOpeners($content)
134
    {
135
        // Scan the entire autolink for the total number of parentheses.
136
        // If there is a greater number of closing parentheses than opening ones,
137
        // we don’t consider the last character part of the autolink, in order to
138
        // facilitate including an autolink inside a parenthesis.
139 6
        \preg_match_all('/[()]/', $content, $matches);
140
141 6
        $charCount = ['(' => 0, ')' => 0];
142 6
        foreach ($matches[0] as $char) {
143 6
            $charCount[$char]++;
144
        }
145
146 6
        return $charCount[')'] > $charCount['('];
147
    }
148
}
149