| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | /* | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  * This file is part of the league/commonmark-ext-autolink package. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * (c) Colin O'Dell <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * For the full copyright and license information, please view the LICENSE | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * file that was distributed with this source code. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | namespace League\CommonMark\Ext\Autolink; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | use League\CommonMark\Block\Element\Document; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  | use League\CommonMark\DocumentProcessorInterface; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  | use League\CommonMark\Inline\Element\Link; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | use League\CommonMark\Inline\Element\Text; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  | final class UrlAutolinkProcessor implements DocumentProcessorInterface | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |     // RegEx adapted from https://github.com/symfony/symfony/blob/4.2/src/Symfony/Component/Validator/Constraints/UrlValidator.php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |     const REGEX = '~ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         (?<=^|[ \\t\\n\\x0b\\x0c\\x0d*_\\~\\(])  # Can only come at the beginning of a line, after whitespace, or certain delimiting characters | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |         ( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |             # Must start with a supported scheme + auth, or "www" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |             (?: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |                 (?:%s)://                                 # protocol | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |                 (?:([\.\pL\pN-]+:)?([\.\pL\pN-]+)@)?      # basic auth | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |             |www\.) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |             (?: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |                 (?:[\pL\pN\pS\-\.])+(?:\.?(?:[\pL\pN]|xn\-\-[\pL\pN-]+)+\.?) # a domain name | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |                     |                                                 # or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |                 \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}                    # an IP address | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |                     |                                                 # or | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |                 \[ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |                     (?:(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-f]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,1}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,2}(?:(?:[0-9a-f]{1,4})))?::(?:(?:(?:[0-9a-f]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,3}(?:(?:[0-9a-f]{1,4})))?::(?:(?:[0-9a-f]{1,4})):)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,4}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-f]{1,4})):(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,5}(?:(?:[0-9a-f]{1,4})))?::)(?:(?:[0-9a-f]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-f]{1,4})):){0,6}(?:(?:[0-9a-f]{1,4})))?::)))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |                 \]  # an IPv6 address | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |             ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |             (?::[0-9]+)?                              # a port (optional) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |             (?:/ (?:[\pL\pN\-._\~!$&\'()*+,;=:@]|%%[0-9A-Fa-f]{2})* )*      # a path | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |             (?:\? (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a query (optional) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |             (?:\# (?:[\pL\pN\-._\~!$&\'()*+,;=:@/?]|%%[0-9A-Fa-f]{2})* )?   # a fragment (optional) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |         )~ixu'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     private $allowedProtocols; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 | 75 |  |     public function __construct(array $allowedProtocols = ['http', 'https', 'ftp']) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 | 75 |  |         $this->allowedProtocols = $allowedProtocols; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 | 75 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |      * @param Document $document | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |      * @return void | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 | 75 |  |     public function processDocument(Document $document) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 | 75 |  |         $regex = sprintf(self::REGEX, implode('|', $this->allowedProtocols)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 | 75 |  |         $walker = $document->walker(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 | 75 |  |         while ($event = $walker->next()) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 | 75 |  |             if ($event->isEntering() && $event->getNode() instanceof Text) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |                 /** @var Text $node */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 | 75 |  |                 $node = $event->getNode(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 | 75 |  |                 $contents = preg_split($regex, $node->getContent(), -1, PREG_SPLIT_DELIM_CAPTURE); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 | 75 |  |                 $leftovers = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 | 75 |  |                 foreach ($contents as $i => $content) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 | 75 |  |                     if ($i % 2 === 0) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 | 75 |  |                         $text = $leftovers . $content; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 | 75 |  |                         if ($text !== '') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 | 48 |  |                             $node->insertBefore(new Text($leftovers . $content)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 | 75 |  |                         $leftovers = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |                     } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 | 57 |  |                         $leftovers = ''; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |                         // Does the URL end with punctuation that should be stripped? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 | 57 |  |                         if (preg_match('/(.+)([?!.,:*_~]+)$/', $content, $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |                             // Add the punctuation later | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 | 15 |  |                             $content = $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 | 15 |  |                             $leftovers = $matches[2]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |                         // Does the URL end with something that looks like an entity reference? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 | 57 |  |                         if (preg_match('/(.+)(&[A-Za-z0-9]+;)$/', $content, $matches)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 | 3 |  |                             $content = $matches[1]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 | 3 |  |                             $leftovers = $matches[2] . $leftovers; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |                         // Does the URL need its closing paren chopped off? | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 | 57 |  |                         if (substr($content, -1) === ')' && self::hasMoreCloserParensThanOpeners($content)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 | 3 |  |                             $content = substr($content, 0, -1); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 | 3 |  |                             $leftovers .= ')'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |                         // Auto-prefix 'http://' onto 'www' URLs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 | 57 |  |                         if (substr($content, 0, 4) === 'www.') { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 | 27 |  |                             $node->insertBefore(new Link('http://' . $content, $content)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |                         } else { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 | 45 |  |                             $node->insertBefore(new Link($content, $content)); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |                         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |                     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |                 } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 | 75 |  |                 $node->detach(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 | 75 |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |      * @param string $content | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |      * @return bool | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 118 |  |  |      */ | 
            
                                                        
            
                                    
            
            
                | 119 | 6 |  |     private static function hasMoreCloserParensThanOpeners($content) | 
            
                                                        
            
                                    
            
            
                | 120 |  |  |     { | 
            
                                                        
            
                                    
            
            
                | 121 |  |  |         // Scan the entire autolink for the total number of parentheses. | 
            
                                                        
            
                                    
            
            
                | 122 |  |  |         // If there is a greater number of closing parentheses than opening ones, | 
            
                                                        
            
                                    
            
            
                | 123 |  |  |         // we don’t consider the last character part of the autolink, in order to | 
            
                                                        
            
                                    
            
            
                | 124 |  |  |         // facilitate including an autolink inside a parenthesis. | 
            
                                                        
            
                                    
            
            
                | 125 | 6 |  |         preg_match_all('/[()]/', $content, $matches); | 
            
                                                        
            
                                    
            
            
                | 126 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 127 | 6 |  |         $charCount = ['(' => 0, ')' => 0]; | 
            
                                                        
            
                                    
            
            
                | 128 | 6 |  |         foreach ($matches[0] as $char) { | 
            
                                                        
            
                                    
            
            
                | 129 | 6 |  |             $charCount[$char]++; | 
            
                                                        
            
                                    
            
            
                | 130 |  |  |         } | 
            
                                                        
            
                                    
            
            
                | 131 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 132 | 6 |  |         return $charCount[')'] > $charCount['(']; | 
            
                                                        
            
                                    
            
            
                | 133 |  |  |     } | 
            
                                                        
            
                                    
            
            
                | 134 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 135 |  |  |  |