| 1 |  |  | <?php | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | namespace Riimu\Kit\UrlParser; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  * Provides a RFC 3986 compliant solution to URL parsing. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  * UriParser provides a method for parsing URLs that accurately complies with | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |  * the RFC specification. Unlike the built function `parse_url()`, the parser in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  * this library is based on the ABNF definition of the generic URI syntax. In | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  * other words, this library does not allow any kind of invalid URLs and parses | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  * them exactly as defined in the specification. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |  * While the intention of this library is to provide an accurate implementation | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  * for URL parsing, it possible to use this library for parsing any kind of | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  * valid URIs, since the parsing is simply based on the generic URI syntax. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  * Some of the features are simply more suited to dealing with URLs. The parser, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |  * however, does not provide any additional validation based on the URI scheme. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |  * While the RFC specification does not allow UTF-8 characters in URIs, these | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  * are still commonly used, especially in user input. To accommodate this fact, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  * the parser provides two additional compatibility modes that permit UTF-8 in | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |  * some of the URI components in addition to providing a simple support for | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |  * international domain names. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  * @see https://tools.ietf.org/html/rfc3986 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  * @author Riikka Kalliomäki <[email protected]> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |  * @copyright Copyright (c) 2015-2017 Riikka Kalliomäki | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |  * @license http://opensource.org/licenses/mit-license.php MIT License | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  | class UriParser | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  | { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     /** Parsing mode that conforms strictly to the RFC 3986 specification */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     const MODE_RFC3986 = 1; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     /** Parsing mode that allows UTF-8 characters in some URI components */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |     const MODE_UTF8 = 2; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |      * Parsing mode that also converts international domain names to ascii. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |      * @deprecated Use MODE_IDNA instead | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |      * @see UriParser::MODE_IDNA | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |     const MODE_IDNA2003 = 4; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     /** Parsing mode that also converts international domain names to ascii */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |     const MODE_IDNA = 4; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     /** @var array<string,string> List of methods used to assign the URI components */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |     private static $setters = [ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         'scheme' => 'withScheme', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         'host' => 'withHost', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |         'port' => 'withPort', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         'path_abempty' => 'withPath', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |         'path_absolute' => 'withPath', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         'path_noscheme' => 'withPath', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         'path_rootless' => 'withPath', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         'query' => 'withQuery', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |         'fragment' => 'withFragment', | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     ]; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |     /** @var int The current parsing mode */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |     private $mode; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |      * Creates a new instance of UriParser. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |     public function __construct() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |         $this->mode = self::MODE_RFC3986; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |      * Sets the parsing mode. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |      * The parser supports three different parsing modes as indicated by the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |      * available parsing mode constants. The modes are as follows: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |      * - `MODE_RFC3986` adheres strictly to the RFC specification and does not | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |      *   allow any non ascii characters in the URIs. This is the default mode. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |      * - `MODE_UTF8` allows UTF-8 characters in the user information, path, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |      *   query and fragment components of the URI. These characters will be | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |      *   converted to appropriate percent encoded sequences. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |      * - `MODE_IDNA` also allows UTF-8 characters in the domain name and | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |      *   converts the international domain name to ascii according to the IDNA | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |      *   standard. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |      * @param int $mode One of the parsing mode constants | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |     public function setMode($mode) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |         $this->mode = (int) $mode; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |      * Parses the URL using the generic URI syntax. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |      * This method returns the `Uri` instance constructed from the components | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |      * parsed from the URL. The URL is parsed using either the absolute URI | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |      * pattern or the relative URI pattern based on which one matches the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |      * provided string. If the URL cannot be parsed as a valid URI, null is | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |      * returned instead. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |      * | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |      * @param string $uri The URL to parse | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |      * @return Uri|null The parsed URL or null if the URL is invalid | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |     public function parse($uri) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 111 |  |  |         if (!$this->isValidString($uri)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 112 |  |  |             return null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 113 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 114 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |         $pattern = new UriPattern(); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |         $pattern->allowNonAscii($this->mode !== self::MODE_RFC3986); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |         if ($pattern->matchUri($uri, $match)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |             try { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |                 return $this->buildUri($match); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |             } catch (\InvalidArgumentException $exception) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |                 return null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |             } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         return null; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |      * Tells if the URI string is valid for the current parser mode. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |      * @param string $uri The URI to validate | 
            
                                                                                                            
                            
            
                                    
            
            
                | 132 |  |  |      * @return bool True if the string is valid, false if not | 
            
                                                                                                            
                            
            
                                    
            
            
                | 133 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 134 |  |  |     private function isValidString($uri) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 135 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 136 |  |  |         if (preg_match('/^[\\x00-\\x7F]*$/', $uri)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 137 |  |  |             return true; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 138 |  |  |         } elseif ($this->mode === self::MODE_RFC3986) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 139 |  |  |             return false; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 140 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 141 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 142 |  |  |         // Validate UTF-8 via regular expression to avoid mbstring dependency | 
            
                                                                                                            
                            
            
                                    
            
            
                | 143 |  |  |         $pattern = | 
            
                                                                                                            
                            
            
                                    
            
            
                | 144 |  |  |             '/^(?> | 
            
                                                                                                            
                            
            
                                    
            
            
                | 145 |  |  |                 [\x00-\x7F]+                       # ASCII | 
            
                                                                                                            
                            
            
                                    
            
            
                | 146 |  |  |               | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte | 
            
                                                                                                            
                            
            
                                    
            
            
                | 147 |  |  |               |  \xE0[\xA0-\xBF][\x80-\xBF]        # excluding over longs | 
            
                                                                                                            
                            
            
                                    
            
            
                | 148 |  |  |               | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte | 
            
                                                                                                            
                            
            
                                    
            
            
                | 149 |  |  |               |  \xED[\x80-\x9F][\x80-\xBF]        # excluding surrogates | 
            
                                                                                                            
                            
            
                                    
            
            
                | 150 |  |  |               |  \xF0[\x90-\xBF][\x80-\xBF]{2}     # planes 1-3 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 151 |  |  |               | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 152 |  |  |               |  \xF4[\x80-\x8F][\x80-\xBF]{2}     # plane 16 | 
            
                                                                                                            
                            
            
                                    
            
            
                | 153 |  |  |             )*$/x'; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 154 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 155 |  |  |         return (bool) preg_match($pattern, $uri); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 156 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 157 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 158 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 159 |  |  |      * Builds the Uri instance from the parsed components. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 160 |  |  |      * @param array<string, string> $components Components parsed from the URI | 
            
                                                                                                            
                            
            
                                    
            
            
                | 161 |  |  |      * @return Uri The constructed URI representation | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 162 |  |  |      */ | 
            
                                                                        
                            
            
                                    
            
            
                | 163 |  |  |     private function buildUri(array $components) | 
            
                                                                        
                            
            
                                    
            
            
                | 164 |  |  |     { | 
            
                                                                        
                            
            
                                    
            
            
                | 165 |  |  |         $uri = new Uri(); | 
            
                                                                        
                            
            
                                    
            
            
                | 166 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 167 |  |  |         if (isset($components['reg_name'])) { | 
            
                                                                        
                            
            
                                    
            
            
                | 168 |  |  |             $components['host'] = $this->decodeHost($components['host']); | 
            
                                                                        
                            
            
                                    
            
            
                | 169 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 170 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 171 |  |  |         foreach (array_intersect_key(self::$setters, $components) as $key => $method) { | 
            
                                                                        
                            
            
                                    
            
            
                | 172 |  |  |             $uri = call_user_func([$uri, $method], $components[$key]); | 
            
                                                                        
                            
            
                                    
            
            
                | 173 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 174 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 175 |  |  |         if (isset($components['userinfo'])) { | 
            
                                                                        
                            
            
                                    
            
            
                | 176 |  |  |             list($username, $password) = preg_split('/:|$/', $components['userinfo'], 2); | 
            
                                                                        
                            
            
                                    
            
            
                | 177 |  |  |             $uri = $uri->withUserInfo(rawurldecode($username), rawurldecode($password)); | 
            
                                                                        
                            
            
                                    
            
            
                | 178 |  |  |         } | 
            
                                                                        
                            
            
                                    
            
            
                | 179 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 180 |  |  |         return $uri; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 181 |  |  |     } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 182 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 183 |  |  |     /** | 
            
                                                                                                            
                            
            
                                    
            
            
                | 184 |  |  |      * Decodes the hostname component according to parser mode. | 
            
                                                                                                            
                            
            
                                    
            
            
                | 185 |  |  |      * @param string $hostname The parsed hostname | 
            
                                                                                                            
                            
            
                                    
            
            
                | 186 |  |  |      * @return string The decoded hostname | 
            
                                                                                                            
                            
            
                                    
            
            
                | 187 |  |  |      * @throws \InvalidArgumentException If the hostname is not valid | 
            
                                                                                                            
                            
            
                                    
            
            
                | 188 |  |  |      */ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 189 |  |  |     private function decodeHost($hostname) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 190 |  |  |     { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 191 |  |  |         if (preg_match('/^[\\x00-\\x7F]*$/', $hostname)) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 192 |  |  |             return $hostname; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 193 |  |  |         } elseif ($this->mode !== self::MODE_IDNA) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 194 |  |  |             throw new \InvalidArgumentException("Invalid hostname '$hostname'"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 195 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 196 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 197 |  |  |         $mode = defined('INTL_IDNA_VARIANT_UTS46') ? INTL_IDNA_VARIANT_UTS46 : INTL_IDNA_VARIANT_2003; | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 198 |  |  |         $hostname = idn_to_ascii($hostname, IDNA_DEFAULT, $mode); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 199 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 200 |  |  |         if ($hostname === false) { | 
            
                                                                                                            
                            
            
                                    
            
            
                | 201 |  |  |             throw new \InvalidArgumentException("Invalid hostname '$hostname'"); | 
            
                                                                                                            
                            
            
                                    
            
            
                | 202 |  |  |         } | 
            
                                                                                                            
                            
            
                                    
            
            
                | 203 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 204 |  |  |         return $hostname; | 
            
                                                                                                            
                            
            
                                    
            
            
                | 205 |  |  |     } | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 206 |  |  | } | 
            
                                                        
            
                                    
            
            
                | 207 |  |  |  |