Completed
Pull Request — master (#6)
by Colin
02:48 queued 59s
created

hasMoreCloserParensThanOpeners()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 15

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 6
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 15
ccs 6
cts 6
cp 1
rs 9.7666
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark-ext-autolink package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
namespace League\CommonMark\Ext\Autolink;
13
14
use League\CommonMark\Block\Element\Document;
15
use League\CommonMark\DocumentProcessorInterface;
16
use League\CommonMark\Inline\Element\Link;
17
use League\CommonMark\Inline\Element\Text;
18
19
final class UrlAutolinkProcessor implements DocumentProcessorInterface
20
{
21
    // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php
22
    const REGEX = '~
23
        (?<=^|[ \\t\\n\\x0b\\x0c\\x0d*_\\~\\(])  # Can only come at the beginning of a line, after whitespace, or certain delimiting characters
24
        (
25
            # Must start with a supported scheme + auth, or "www"
26
            (?:
27
                (?:%s)://                                 # protocol
28
                (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth
29
            |www\.)
30
            (?:
31
                (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name
32
                    |                                                 # or
33
                \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address
34
                    |                                                 # or
35
                \[
36
                    (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::))))
37
                \]  # an IPv6 address
38
            )
39
            (?::[0-9]+)?                              # a port (optional)
40
            (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path
41
            (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional)
42
            (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional)
43
        )~ixu';
44
45
    private $finalRegex;
46
47 93
    public function __construct(array $allowedProtocols = ['http', 'https', 'ftp'])
48
    {
49 93
        $this->finalRegex = \sprintf(self::REGEX, \implode('|', $allowedProtocols));
50 93
    }
51
52
    /**
53
     * @param Document $document
54
     *
55
     * @return void
56
     */
57 93
    public function processDocument(Document $document)
58
    {
59 93
        $walker = $document->walker();
60
61 93
        while ($event = $walker->next()) {
62 93
            if ($event->getNode() instanceof Text) {
63 93
                self::processAutolinks($event->getNode(), $this->finalRegex);
64
            }
65
        }
66 93
    }
67
68 93
    private static function processAutolinks(Text $node, $regex)
69
    {
70 93
        $contents = \preg_split($regex, $node->getContent(), -1, PREG_SPLIT_DELIM_CAPTURE);
71
72 93
        if (\count($contents) === 1) {
73 36
            return;
74
        }
75
76 57
        $leftovers = '';
77 57
        foreach ($contents as $i => $content) {
78
            // Even-indexed elements are things before/after the URLs
79 57
            if ($i % 2 === 0) {
80
                // Insert any left-over characters here as well
81 57
                $text = $leftovers . $content;
82 57
                if ($text !== '') {
83 30
                    $node->insertBefore(new Text($leftovers . $content));
84
                }
85
86 57
                $leftovers = '';
87 57
                continue;
88
            }
89
90 57
            $leftovers = '';
91
92
            // Does the URL end with punctuation that should be stripped?
93 57
            if (\preg_match('/(.+)([?!.,:*_~]+)$/', $content, $matches)) {
94
                // Add the punctuation later
95 15
                $content = $matches[1];
96 15
                $leftovers = $matches[2];
97
            }
98
99
            // Does the URL end with something that looks like an entity reference?
100 57
            if (\preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $content, $matches)) {
101 3
                $content = $matches[1];
102 3
                $leftovers = $matches[2] . $leftovers;
103
            }
104
105
            // Does the URL need its closing paren chopped off?
106 57
            if (\substr($content, -1) === ')' && self::hasMoreCloserParensThanOpeners($content)) {
107 3
                $content = \substr($content, 0, -1);
108 3
                $leftovers .= ')';
109
            }
110
111 57
            self::addLink($node, $content);
112
        }
113
114 57
        $node->detach();
115 57
    }
116
117 57
    private static function addLink(Text $node, $url)
118
    {
119
        // Auto-prefix 'http://' onto 'www' URLs
120 57
        if (\substr($url, 0, 4) === 'www.') {
121 27
            $node->insertBefore(new Link('http://' . $url, $url));
122
123 27
            return;
124
        }
125
126 30
        $node->insertBefore(new Link($url, $url));
127 30
    }
128
129
    /**
130
     * @param string $content
131
     *
132
     * @return bool
133
     */
134 6
    private static function hasMoreCloserParensThanOpeners($content)
135
    {
136
        // Scan the entire autolink for the total number of parentheses.
137
        // If there is a greater number of closing parentheses than opening ones,
138
        // we don’t consider the last character part of the autolink, in order to
139
        // facilitate including an autolink inside a parenthesis.
140 6
        \preg_match_all('/[()]/', $content, $matches);
141
142 6
        $charCount = ['(' => 0, ')' => 0];
143 6
        foreach ($matches[0] as $char) {
144 6
            $charCount[$char]++;
145
        }
146
147 6
        return $charCount[')'] > $charCount['('];
148
    }
149
}
150