| Total Complexity | 64 |
| Total Lines | 420 |
| Duplicated Lines | 0 % |
| Changes | 6 | ||
| Bugs | 0 | Features | 0 |
Complex classes like URLInfo_Parser often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use URLInfo_Parser, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 24 | class URLInfo_Parser |
||
| 25 | { |
||
| 26 | /** |
||
| 27 | * @var string |
||
| 28 | */ |
||
| 29 | protected $url; |
||
| 30 | |||
| 31 | /** |
||
| 32 | * @var bool |
||
| 33 | */ |
||
| 34 | protected $isValid = false; |
||
| 35 | |||
| 36 | /** |
||
| 37 | * @var array |
||
| 38 | */ |
||
| 39 | protected $info; |
||
| 40 | |||
| 41 | /** |
||
| 42 | * @var array|NULL |
||
| 43 | */ |
||
| 44 | protected $error; |
||
| 45 | |||
| 46 | /** |
||
| 47 | * @var string[] |
||
| 48 | */ |
||
| 49 | protected $knownSchemes = array( |
||
| 50 | 'ftp', |
||
| 51 | 'http', |
||
| 52 | 'https', |
||
| 53 | 'mailto', |
||
| 54 | 'tel', |
||
| 55 | 'data', |
||
| 56 | 'file', |
||
| 57 | 'git' |
||
| 58 | ); |
||
| 59 | |||
| 60 | /** |
||
| 61 | * Stores a list of all unicode characters in the URL |
||
| 62 | * that have been filtered out before parsing it with |
||
| 63 | * parse_url. |
||
| 64 | * |
||
| 65 | * @var string[]string |
||
| 66 | */ |
||
| 67 | protected $unicodeChars = array(); |
||
| 68 | |||
| 69 | /** |
||
| 70 | * @var bool |
||
| 71 | */ |
||
| 72 | protected $encodeUTF = false; |
||
| 73 | |||
| 74 | /** |
||
| 75 | * |
||
| 76 | * @param string $url The target URL. |
||
| 77 | * @param bool $encodeUTF Whether to URL encode any plain text unicode characters. |
||
| 78 | */ |
||
| 79 | public function __construct(string $url, bool $encodeUTF) |
||
| 80 | { |
||
| 81 | $this->url = $url; |
||
| 82 | $this->encodeUTF = $encodeUTF; |
||
| 83 | |||
| 84 | $this->parse(); |
||
| 85 | |||
| 86 | if(!$this->detectType()) { |
||
| 87 | $this->validate(); |
||
| 88 | } |
||
| 89 | } |
||
| 90 | |||
| 91 | /** |
||
| 92 | * Retrieves the array as parsed by PHP's parse_url, |
||
| 93 | * filtered and adjusted as necessary. |
||
| 94 | * |
||
| 95 | * @return array |
||
| 96 | */ |
||
| 97 | public function getInfo() : array |
||
| 98 | { |
||
| 99 | return $this->info; |
||
| 100 | } |
||
| 101 | |||
| 102 | protected function parse() |
||
| 103 | { |
||
| 104 | $this->filterUnicodeChars(); |
||
| 105 | |||
| 106 | $this->info = parse_url($this->url); |
||
| 107 | |||
| 108 | $this->filterParsed(); |
||
| 109 | |||
| 110 | // if the URL contains any URL characters, and we |
||
| 111 | // do not want them URL encoded, restore them. |
||
| 112 | if(!$this->encodeUTF && !empty($this->unicodeChars)) |
||
| 113 | { |
||
| 114 | $this->info = $this->restoreUnicodeChars($this->info); |
||
| 115 | } |
||
| 116 | } |
||
| 117 | |||
| 118 | /** |
||
| 119 | * Finds any non-url encoded unicode characters in |
||
| 120 | * the URL, and encodes them before the URL is |
||
| 121 | * passed to parse_url. |
||
| 122 | */ |
||
| 123 | protected function filterUnicodeChars() : void |
||
| 124 | { |
||
| 125 | $chars = ConvertHelper::string2array($this->url); |
||
| 126 | |||
| 127 | $keep = array(); |
||
| 128 | |||
| 129 | foreach($chars as $char) |
||
| 130 | { |
||
| 131 | if(preg_match('/\p{L}/usix', $char)) |
||
| 132 | { |
||
| 133 | $encoded = rawurlencode($char); |
||
| 134 | |||
| 135 | if($encoded != $char) |
||
| 136 | { |
||
| 137 | $this->unicodeChars[$encoded] = $char; |
||
| 138 | $char = $encoded; |
||
| 139 | } |
||
| 140 | } |
||
| 141 | |||
| 142 | $keep[] = $char; |
||
| 143 | } |
||
| 144 | |||
| 145 | $this->url = implode('', $keep); |
||
| 146 | } |
||
| 147 | |||
| 148 | protected function detectType() : bool |
||
| 149 | { |
||
| 150 | $types = array( |
||
| 151 | 'email', |
||
| 152 | 'fragmentLink', |
||
| 153 | 'phoneLink', |
||
| 154 | 'ipAddress' |
||
| 155 | ); |
||
| 156 | |||
| 157 | foreach($types as $type) |
||
| 158 | { |
||
| 159 | $method = 'detectType_'.$type; |
||
| 160 | |||
| 161 | if($this->$method() === true) |
||
| 162 | { |
||
| 163 | $this->isValid = true; |
||
| 164 | return true; |
||
| 165 | } |
||
| 166 | } |
||
| 167 | |||
| 168 | return false; |
||
| 169 | } |
||
| 170 | |||
| 171 | protected function validate() |
||
| 189 | } |
||
| 190 | |||
| 191 | protected function validate_hostIsPresent() : bool |
||
| 192 | { |
||
| 193 | // every link needs a host. This case can happen for ex, if |
||
| 194 | // the link starts with a typo with only one slash, like: |
||
| 195 | // "http:/hostname" |
||
| 196 | if(isset($this->info['host'])) { |
||
| 197 | return true; |
||
| 198 | } |
||
| 199 | |||
| 200 | $this->setError( |
||
| 201 | URLInfo::ERROR_MISSING_HOST, |
||
| 202 | t('Cannot determine the link\'s host name.') . ' ' . |
||
| 203 | t('This usually happens when there\'s a typo somewhere.') |
||
| 204 | ); |
||
| 205 | |||
| 206 | return false; |
||
| 207 | } |
||
| 208 | |||
| 209 | protected function validate_schemeIsSet() : bool |
||
| 210 | { |
||
| 211 | if(isset($this->info['scheme'])) { |
||
| 212 | return true; |
||
| 213 | } |
||
| 214 | |||
| 215 | // no scheme found: it may be an email address without the mailto: |
||
| 216 | // It can't be a variable, since without the scheme it would already |
||
| 217 | // have been recognized as a variable only link. |
||
| 218 | $this->setError( |
||
| 219 | URLInfo::ERROR_MISSING_SCHEME, |
||
| 220 | t('Cannot determine the link\'s scheme, e.g. %1$s.', 'http') |
||
| 221 | ); |
||
| 222 | |||
| 223 | return false; |
||
| 224 | } |
||
| 225 | |||
| 226 | protected function validate_schemeIsKnown() : bool |
||
| 227 | { |
||
| 228 | if(in_array($this->info['scheme'], $this->knownSchemes)) { |
||
| 229 | return true; |
||
| 230 | } |
||
| 231 | |||
| 232 | $this->setError( |
||
| 233 | URLInfo::ERROR_INVALID_SCHEME, |
||
| 234 | t('The scheme %1$s is not supported for links.', $this->info['scheme']) . ' ' . |
||
| 235 | t('Valid schemes are: %1$s.', implode(', ', $this->knownSchemes)) |
||
| 236 | ); |
||
| 237 | |||
| 238 | return false; |
||
| 239 | } |
||
| 240 | |||
| 241 | /** |
||
| 242 | * Goes through all information in the parse_url result |
||
| 243 | * array, and attempts to fix any user errors in formatting |
||
| 244 | * that can be recovered from, mostly regarding stray spaces. |
||
| 245 | */ |
||
| 246 | protected function filterParsed() : void |
||
| 284 | } |
||
| 285 | } |
||
| 286 | |||
| 287 | /** |
||
| 288 | * Recursively goes through the array, and converts all previously |
||
| 289 | * URL encoded characters with their unicode character counterparts. |
||
| 290 | * |
||
| 291 | * @param array $subject |
||
| 292 | * @return array |
||
| 293 | */ |
||
| 294 | protected function restoreUnicodeChars(array $subject) : array |
||
| 315 | } |
||
| 316 | |||
| 317 | /** |
||
| 318 | * Replaces all URL encoded unicode characters |
||
| 319 | * in the string with the unicode character. |
||
| 320 | * |
||
| 321 | * @param string $string |
||
| 322 | * @return string |
||
| 323 | */ |
||
| 324 | protected function restoreUnicodeChar(string $string) : string |
||
| 325 | { |
||
| 326 | if(strstr($string, '%')) |
||
| 327 | { |
||
| 328 | return str_replace(array_keys($this->unicodeChars), array_values($this->unicodeChars), $string); |
||
| 329 | } |
||
| 330 | |||
| 331 | return $string; |
||
| 332 | } |
||
| 333 | |||
| 334 | protected function detectType_email() : bool |
||
| 335 | { |
||
| 336 | if(isset($this->info['scheme']) && $this->info['scheme'] == 'mailto') { |
||
| 337 | $this->info['type'] = URLInfo::TYPE_EMAIL; |
||
| 338 | return true; |
||
| 339 | } |
||
| 340 | |||
| 341 | if(isset($this->info['path']) && preg_match(RegexHelper::REGEX_EMAIL, $this->info['path'])) |
||
| 342 | { |
||
| 343 | $this->info['scheme'] = 'mailto'; |
||
| 344 | $this->info['type'] = URLInfo::TYPE_EMAIL; |
||
| 345 | return true; |
||
| 346 | } |
||
| 347 | |||
| 348 | return false; |
||
| 349 | } |
||
| 350 | |||
| 351 | protected function detectType_ipAddress() : bool |
||
| 352 | { |
||
| 353 | if($this->isPathOnly() && preg_match(RegexHelper::REGEX_IPV4, $this->info['path'])) { |
||
| 354 | $this->info['host'] = $this->info['path']; |
||
| 355 | $this->info['scheme'] = 'https'; |
||
| 356 | unset($this->info['path']); |
||
| 357 | } |
||
| 358 | |||
| 359 | if($this->isHostOnly() && preg_match(RegexHelper::REGEX_IPV4, $this->info['host'])) { |
||
| 360 | $this->info['ip'] = $this->info['host']; |
||
| 361 | return true; |
||
| 362 | } |
||
| 363 | |||
| 364 | return false; |
||
| 365 | } |
||
| 366 | |||
| 367 | private function isPathOnly() : bool |
||
| 368 | { |
||
| 369 | return isset($this->info['path']) && !isset($this->info['host']) && !isset($this->info['scheme']); |
||
| 370 | } |
||
| 371 | |||
| 372 | private function isHostOnly() : bool |
||
| 375 | } |
||
| 376 | |||
| 377 | protected function detectType_fragmentLink() : bool |
||
| 378 | { |
||
| 379 | if(isset($this->info['fragment']) && !isset($this->info['scheme'])) { |
||
| 380 | $this->info['type'] = URLInfo::TYPE_FRAGMENT; |
||
| 381 | return true; |
||
| 382 | } |
||
| 383 | |||
| 384 | return false; |
||
| 385 | } |
||
| 386 | |||
| 387 | protected function detectType_phoneLink() : bool |
||
| 388 | { |
||
| 389 | if(isset($this->info['scheme']) && $this->info['scheme'] == 'tel') { |
||
| 390 | $this->info['type'] = URLInfo::TYPE_PHONE; |
||
| 391 | return true; |
||
| 392 | } |
||
| 393 | |||
| 394 | return false; |
||
| 395 | } |
||
| 396 | |||
| 397 | protected function setError(int $code, string $message) |
||
| 398 | { |
||
| 399 | $this->isValid = false; |
||
| 400 | |||
| 401 | $this->error = array( |
||
| 402 | 'code' => $code, |
||
| 403 | 'message' => $message |
||
| 404 | ); |
||
| 405 | } |
||
| 406 | |||
| 407 | /** |
||
| 408 | * Checks whether the URL that was parsed is valid. |
||
| 409 | * @return bool |
||
| 410 | */ |
||
| 411 | public function isValid() : bool |
||
| 412 | { |
||
| 413 | return $this->isValid; |
||
| 414 | } |
||
| 415 | |||
| 416 | /** |
||
| 417 | * If the validation failed, retrieves the validation |
||
| 418 | * error message. |
||
| 419 | * |
||
| 420 | * @return string |
||
| 421 | */ |
||
| 422 | public function getErrorMessage() : string |
||
| 423 | { |
||
| 424 | if(isset($this->error)) { |
||
| 425 | return $this->error['message']; |
||
| 426 | } |
||
| 427 | |||
| 428 | return ''; |
||
| 429 | } |
||
| 430 | |||
| 431 | /** |
||
| 432 | * If the validation failed, retrieves the validation |
||
| 433 | * error code. |
||
| 434 | * |
||
| 435 | * @return int |
||
| 436 | */ |
||
| 437 | public function getErrorCode() : int |
||
| 444 | } |
||
| 445 | } |
||
| 446 |