| Total Complexity | 85 |
| Total Lines | 579 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
Complex classes like Punycode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Punycode, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 29 | class Punycode |
||
| 30 | { |
||
| 31 | /** |
||
| 32 | * Bootstring parameter values |
||
| 33 | * |
||
| 34 | */ |
||
| 35 | const BASE = 36; |
||
| 36 | const TMIN = 1; |
||
| 37 | const TMAX = 26; |
||
| 38 | const SKEW = 38; |
||
| 39 | const DAMP = 700; |
||
| 40 | const INITIAL_BIAS = 72; |
||
| 41 | const INITIAL_N = 128; |
||
| 42 | const PREFIX = 'xn--'; |
||
| 43 | const DELIMITER = '-'; |
||
| 44 | |||
| 45 | /** |
||
| 46 | * IDNA Error constants |
||
| 47 | */ |
||
| 48 | const IDNA_ERROR_EMPTY_LABEL = 1; |
||
| 49 | const IDNA_ERROR_LABEL_TOO_LONG = 2; |
||
| 50 | const IDNA_ERROR_DOMAIN_NAME_TOO_LONG = 4; |
||
| 51 | const IDNA_ERROR_LEADING_HYPHEN = 8; |
||
| 52 | const IDNA_ERROR_TRAILING_HYPHEN = 16; |
||
| 53 | const IDNA_ERROR_HYPHEN_3_4 = 32; |
||
| 54 | const IDNA_ERROR_LEADING_COMBINING_MARK = 64; |
||
| 55 | const IDNA_ERROR_DISALLOWED = 128; |
||
| 56 | const IDNA_ERROR_PUNYCODE = 256; |
||
| 57 | const IDNA_ERROR_LABEL_HAS_DOT = 512; |
||
| 58 | const IDNA_ERROR_INVALID_ACE_LABEL = 1024; |
||
| 59 | const IDNA_ERROR_BIDI = 2048; |
||
| 60 | const IDNA_ERROR_CONTEXTJ = 4096; |
||
| 61 | |||
| 62 | /** |
||
| 63 | * Encode table |
||
| 64 | * |
||
| 65 | * @param array |
||
| 66 | */ |
||
| 67 | protected static $encodeTable = array( |
||
| 68 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', |
||
| 69 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', |
||
| 70 | 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
||
| 71 | ); |
||
| 72 | |||
| 73 | /** |
||
| 74 | * Decode table |
||
| 75 | * |
||
| 76 | * @param array |
||
| 77 | */ |
||
| 78 | protected static $decodeTable = array( |
||
| 79 | 'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, |
||
| 80 | 'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, |
||
| 81 | 'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, |
||
| 82 | 's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, |
||
| 83 | 'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, |
||
| 84 | '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 |
||
| 85 | ); |
||
| 86 | |||
| 87 | /** |
||
| 88 | * Character encoding |
||
| 89 | * |
||
| 90 | * @param string |
||
| 91 | */ |
||
| 92 | protected $encoding; |
||
| 93 | |||
| 94 | /** |
||
| 95 | * Whether to use Non-Transitional Processing. |
||
| 96 | * Setting this to true breaks backward compatibility with IDNA2003. |
||
| 97 | * |
||
| 98 | * @param bool |
||
| 99 | */ |
||
| 100 | protected $nonTransitional = false; |
||
| 101 | |||
| 102 | /** |
||
| 103 | * Whether to use STD3 ASCII rules. |
||
| 104 | * |
||
| 105 | * @param bool |
||
| 106 | */ |
||
| 107 | protected $std3 = false; |
||
| 108 | |||
| 109 | /** |
||
| 110 | * Constructor |
||
| 111 | * |
||
| 112 | * @param string $encoding Character encoding |
||
| 113 | */ |
||
| 114 | public function __construct($encoding = 'UTF-8') |
||
| 115 | { |
||
| 116 | $this->encoding = $encoding; |
||
| 117 | } |
||
| 118 | |||
| 119 | /** |
||
| 120 | * Enable/disable Non-Transitional Processing |
||
| 121 | * |
||
| 122 | * @param bool $nonTransitional Whether to use Non-Transitional Processing |
||
| 123 | */ |
||
| 124 | public function useNonTransitional(bool $nonTransitional) |
||
| 125 | { |
||
| 126 | $this->nonTransitional = $nonTransitional; |
||
| 127 | } |
||
| 128 | |||
| 129 | /** |
||
| 130 | * Enable/disable STD3 ASCII rules |
||
| 131 | * |
||
| 132 | * @param bool $std3 Whether to use STD3 ASCII rules |
||
| 133 | */ |
||
| 134 | public function useStd3(bool $std3) |
||
| 135 | { |
||
| 136 | $this->std3 = $std3; |
||
| 137 | } |
||
| 138 | |||
| 139 | /** |
||
| 140 | * Encode a domain to its Punycode version |
||
| 141 | * |
||
| 142 | * @param string $input Domain name in Unicode to be encoded |
||
| 143 | * @return string Punycode representation in ASCII |
||
| 144 | */ |
||
| 145 | public function encode($input) |
||
| 146 | { |
||
| 147 | // For compatibility with idn_to_* functions |
||
| 148 | if ($this->decode($input) === false) |
||
|
|
|||
| 149 | return false; |
||
| 150 | |||
| 151 | $errors = array(); |
||
| 152 | $preprocessed = $this->preprocess($input, $errors); |
||
| 153 | |||
| 154 | if (!empty($errors)) |
||
| 155 | { |
||
| 156 | return false; |
||
| 157 | } |
||
| 158 | |||
| 159 | $parts = explode('.', $preprocessed); |
||
| 160 | foreach ($parts as $p => &$part) { |
||
| 161 | $part = $this->encodePart($part); |
||
| 162 | |||
| 163 | $validation_status = $this->validateLabel($part, true); |
||
| 164 | |||
| 165 | switch ($validation_status) { |
||
| 166 | case IDNA_ERROR_LABEL_TOO_LONG: |
||
| 167 | case IDNA_ERROR_LEADING_HYPHEN: |
||
| 168 | case IDNA_ERROR_TRAILING_HYPHEN: |
||
| 169 | case IDNA_ERROR_LEADING_COMBINING_MARK: |
||
| 170 | case IDNA_ERROR_DISALLOWED: |
||
| 171 | case IDNA_ERROR_PUNYCODE: |
||
| 172 | case IDNA_ERROR_LABEL_HAS_DOT: |
||
| 173 | case IDNA_ERROR_INVALID_ACE_LABEL: |
||
| 174 | case IDNA_ERROR_BIDI: |
||
| 175 | case IDNA_ERROR_CONTEXTJ: |
||
| 176 | return false; |
||
| 177 | break; |
||
| 178 | |||
| 179 | case IDNA_ERROR_HYPHEN_3_4: |
||
| 180 | $part = $parts[$p]; |
||
| 181 | break; |
||
| 182 | |||
| 183 | case IDNA_ERROR_EMPTY_LABEL: |
||
| 184 | $parts_count = count($parts); |
||
| 185 | if ($parts_count === 1 || $p !== $parts_count - 1) |
||
| 186 | return false; |
||
| 187 | break; |
||
| 188 | |||
| 189 | default: |
||
| 190 | break; |
||
| 191 | } |
||
| 192 | } |
||
| 193 | $output = implode('.', $parts); |
||
| 194 | |||
| 195 | // IDNA_ERROR_DOMAIN_NAME_TOO_LONG |
||
| 196 | if (strlen(rtrim($output, '.')) > 253) |
||
| 197 | return false; |
||
| 198 | |||
| 199 | return $output; |
||
| 200 | } |
||
| 201 | |||
| 202 | /** |
||
| 203 | * Encode a part of a domain name, such as tld, to its Punycode version |
||
| 204 | * |
||
| 205 | * @param string $input Part of a domain name |
||
| 206 | * @return string Punycode representation of a domain part |
||
| 207 | */ |
||
| 208 | protected function encodePart($input) |
||
| 209 | { |
||
| 210 | $codePoints = $this->listCodePoints($input); |
||
| 211 | |||
| 212 | $n = static::INITIAL_N; |
||
| 213 | $bias = static::INITIAL_BIAS; |
||
| 214 | $delta = 0; |
||
| 215 | $h = $b = count($codePoints['basic']); |
||
| 216 | |||
| 217 | $output = ''; |
||
| 218 | foreach ($codePoints['basic'] as $code) { |
||
| 219 | $output .= $this->codePointToChar($code); |
||
| 220 | } |
||
| 221 | if ($input === $output) { |
||
| 222 | return $output; |
||
| 223 | } |
||
| 224 | if ($b > 0) { |
||
| 225 | $output .= static::DELIMITER; |
||
| 226 | } |
||
| 227 | |||
| 228 | $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); |
||
| 229 | sort($codePoints['nonBasic']); |
||
| 230 | |||
| 231 | $i = 0; |
||
| 232 | $length = mb_strlen($input, $this->encoding); |
||
| 233 | while ($h < $length) { |
||
| 234 | $m = $codePoints['nonBasic'][$i++]; |
||
| 235 | $delta = $delta + ($m - $n) * ($h + 1); |
||
| 236 | $n = $m; |
||
| 237 | |||
| 238 | foreach ($codePoints['all'] as $c) { |
||
| 239 | if ($c < $n || $c < static::INITIAL_N) { |
||
| 240 | $delta++; |
||
| 241 | } |
||
| 242 | if ($c === $n) { |
||
| 243 | $q = $delta; |
||
| 244 | for ($k = static::BASE;; $k += static::BASE) { |
||
| 245 | $t = $this->calculateThreshold($k, $bias); |
||
| 246 | if ($q < $t) { |
||
| 247 | break; |
||
| 248 | } |
||
| 249 | |||
| 250 | $code = $t + (((int) $q - $t) % (static::BASE - $t)); |
||
| 251 | $output .= static::$encodeTable[$code]; |
||
| 252 | |||
| 253 | $q = ($q - $t) / (static::BASE - $t); |
||
| 254 | } |
||
| 255 | |||
| 256 | $output .= static::$encodeTable[(int) $q]; |
||
| 257 | $bias = $this->adapt($delta, $h + 1, ($h === $b)); |
||
| 258 | $delta = 0; |
||
| 259 | $h++; |
||
| 260 | } |
||
| 261 | } |
||
| 262 | |||
| 263 | $delta++; |
||
| 264 | $n++; |
||
| 265 | } |
||
| 266 | $out = static::PREFIX . $output; |
||
| 267 | |||
| 268 | return $out; |
||
| 269 | } |
||
| 270 | |||
| 271 | /** |
||
| 272 | * Decode a Punycode domain name to its Unicode counterpart |
||
| 273 | * |
||
| 274 | * @param string $input Domain name in Punycode |
||
| 275 | * @return string Unicode domain name |
||
| 276 | */ |
||
| 277 | public function decode($input) |
||
| 278 | { |
||
| 279 | $errors = array(); |
||
| 280 | $preprocessed = $this->preprocess($input, $errors); |
||
| 281 | |||
| 282 | if (!empty($errors)) |
||
| 283 | { |
||
| 284 | return false; |
||
| 285 | } |
||
| 286 | |||
| 287 | $parts = explode('.', $preprocessed); |
||
| 288 | foreach ($parts as $p => &$part) |
||
| 289 | { |
||
| 290 | if (strpos($part, static::PREFIX) === 0) |
||
| 291 | { |
||
| 292 | $part = substr($part, strlen(static::PREFIX)); |
||
| 293 | $part = $this->decodePart($part); |
||
| 294 | |||
| 295 | if ($part === false) |
||
| 296 | return false; |
||
| 297 | } |
||
| 298 | |||
| 299 | if ($this->validateLabel($part, false) !== 0) |
||
| 300 | { |
||
| 301 | if ($part === '') |
||
| 302 | { |
||
| 303 | $parts_count = count($parts); |
||
| 304 | |||
| 305 | if ($parts_count === 1 || $p !== $parts_count - 1) |
||
| 306 | return false; |
||
| 307 | } |
||
| 308 | else |
||
| 309 | return false; |
||
| 310 | } |
||
| 311 | } |
||
| 312 | $output = implode('.', $parts); |
||
| 313 | |||
| 314 | return $output; |
||
| 315 | } |
||
| 316 | |||
| 317 | /** |
||
| 318 | * Decode a part of domain name, such as tld |
||
| 319 | * |
||
| 320 | * @param string $input Part of a domain name |
||
| 321 | * @return string Unicode domain part |
||
| 322 | */ |
||
| 323 | protected function decodePart($input) |
||
| 373 | } |
||
| 374 | |||
| 375 | /** |
||
| 376 | * Calculate the bias threshold to fall between TMIN and TMAX |
||
| 377 | * |
||
| 378 | * @param integer $k |
||
| 379 | * @param integer $bias |
||
| 380 | * @return integer |
||
| 381 | */ |
||
| 382 | protected function calculateThreshold($k, $bias) |
||
| 393 | } |
||
| 394 | |||
| 395 | /** |
||
| 396 | * Bias adaptation |
||
| 397 | * |
||
| 398 | * @param integer $delta |
||
| 399 | * @param integer $numPoints |
||
| 400 | * @param boolean $firstTime |
||
| 401 | * @return integer |
||
| 402 | */ |
||
| 403 | protected function adapt($delta, $numPoints, $firstTime) |
||
| 404 | { |
||
| 405 | $delta = (int) ( |
||
| 406 | ($firstTime) |
||
| 407 | ? $delta / static::DAMP |
||
| 408 | : $delta / 2 |
||
| 409 | ); |
||
| 410 | $delta += (int) ($delta / $numPoints); |
||
| 411 | |||
| 412 | $k = 0; |
||
| 413 | while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) |
||
| 414 | { |
||
| 415 | $delta = (int) ($delta / (static::BASE - static::TMIN)); |
||
| 416 | $k = $k + static::BASE; |
||
| 417 | } |
||
| 418 | $k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW)); |
||
| 419 | |||
| 420 | return $k; |
||
| 421 | } |
||
| 422 | |||
| 423 | /** |
||
| 424 | * List code points for a given input |
||
| 425 | * |
||
| 426 | * @param string $input |
||
| 427 | * @return array Multi-dimension array with basic, non-basic and aggregated code points |
||
| 428 | */ |
||
| 429 | protected function listCodePoints($input) |
||
| 430 | { |
||
| 431 | $codePoints = array( |
||
| 432 | 'all' => array(), |
||
| 433 | 'basic' => array(), |
||
| 434 | 'nonBasic' => array(), |
||
| 435 | ); |
||
| 436 | |||
| 437 | $length = mb_strlen($input, $this->encoding); |
||
| 438 | for ($i = 0; $i < $length; $i++) |
||
| 439 | { |
||
| 440 | $char = mb_substr($input, $i, 1, $this->encoding); |
||
| 441 | $code = $this->charToCodePoint($char); |
||
| 442 | if ($code < 128) |
||
| 443 | { |
||
| 444 | $codePoints['all'][] = $codePoints['basic'][] = $code; |
||
| 445 | } |
||
| 446 | else |
||
| 447 | { |
||
| 448 | $codePoints['all'][] = $codePoints['nonBasic'][] = $code; |
||
| 449 | } |
||
| 450 | } |
||
| 451 | |||
| 452 | return $codePoints; |
||
| 453 | } |
||
| 454 | |||
| 455 | /** |
||
| 456 | * Convert a single or multi-byte character to its code point |
||
| 457 | * |
||
| 458 | * @param string $char |
||
| 459 | * @return integer |
||
| 460 | */ |
||
| 461 | protected function charToCodePoint($char) |
||
| 462 | { |
||
| 463 | $code = ord($char[0]); |
||
| 464 | if ($code < 128) |
||
| 465 | { |
||
| 466 | return $code; |
||
| 467 | } |
||
| 468 | elseif ($code < 224) |
||
| 469 | { |
||
| 470 | return (($code - 192) * 64) + (ord($char[1]) - 128); |
||
| 471 | } |
||
| 472 | elseif ($code < 240) |
||
| 473 | { |
||
| 474 | return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128); |
||
| 475 | } |
||
| 476 | else |
||
| 477 | { |
||
| 478 | return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128); |
||
| 479 | } |
||
| 480 | } |
||
| 481 | |||
| 482 | /** |
||
| 483 | * Convert a code point to its single or multi-byte character |
||
| 484 | * |
||
| 485 | * @param integer $code |
||
| 486 | * @return string |
||
| 487 | */ |
||
| 488 | protected function codePointToChar($code) |
||
| 489 | { |
||
| 490 | if ($code <= 0x7F) |
||
| 491 | { |
||
| 492 | return chr($code); |
||
| 493 | } |
||
| 494 | elseif ($code <= 0x7FF) |
||
| 495 | { |
||
| 496 | return chr(($code >> 6) + 192) . chr(($code & 63) + 128); |
||
| 497 | } |
||
| 498 | elseif ($code <= 0xFFFF) |
||
| 499 | { |
||
| 500 | return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
||
| 501 | } |
||
| 502 | else |
||
| 503 | { |
||
| 504 | return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
||
| 505 | } |
||
| 506 | } |
||
| 507 | |||
| 508 | /** |
||
| 509 | * Prepare domain name string for Punycode processing. |
||
| 510 | * See https://www.unicode.org/reports/tr46/#Processing |
||
| 511 | * |
||
| 512 | * @param string $domain A domain name |
||
| 513 | * @param array $errors Will record any errors encountered during preprocessing |
||
| 514 | */ |
||
| 515 | protected function preprocess(string $domain, array &$errors = array()) |
||
| 516 | { |
||
| 517 | global $sourcedir; |
||
| 518 | |||
| 519 | require_once($sourcedir . '/Unicode/Idna.php'); |
||
| 520 | require_once($sourcedir . '/Subs-Charset.php'); |
||
| 521 | |||
| 522 | $regexes = idna_regex(); |
||
| 523 | |||
| 524 | if (preg_match('/[' . $regexes['disallowed'] . ($this->std3 ? $regexes['disallowed_std3'] : '') . ']/u', $domain)) |
||
| 525 | $errors[] = 'disallowed'; |
||
| 526 | |||
| 527 | $domain = preg_replace('/[' . $regexes['ignored'] . ']/u', '', $domain); |
||
| 528 | |||
| 529 | unset($regexes); |
||
| 530 | |||
| 531 | $maps = idna_maps(); |
||
| 532 | |||
| 533 | if (!$this->nonTransitional) |
||
| 534 | $maps = array_merge($maps, idna_maps_deviation()); |
||
| 535 | |||
| 536 | if (!$this->std3) |
||
| 537 | $maps = array_merge($maps, idna_maps_not_std3()); |
||
| 538 | |||
| 539 | return utf8_normalize_c(strtr($domain, $maps)); |
||
| 540 | } |
||
| 541 | |||
| 542 | /** |
||
| 543 | * Validates an individual part of a domain name. |
||
| 544 | * |
||
| 545 | * @param string $label Individual part of a domain name. |
||
| 546 | * @param bool $toPunycode True for encoding to Punycode, false for decoding. |
||
| 547 | */ |
||
| 548 | protected function validateLabel(string $label, bool $toPunycode = true) |
||
| 608 | } |
||
| 609 | } |
||
| 610 | |||
| 611 | ?> |