Complex classes like Punycode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Punycode, and based on these observations, apply Extract Interface, too.
| 1 | <?php | ||
| 12 | class Punycode | ||
| 13 | { | ||
| 14 | |||
| 15 | /** | ||
| 16 | * Bootstring parameter values | ||
| 17 | * | ||
| 18 | */ | ||
| 19 | const BASE = 36; | ||
| 20 | const TMIN = 1; | ||
| 21 | const TMAX = 26; | ||
| 22 | const SKEW = 38; | ||
| 23 | const DAMP = 700; | ||
| 24 | const INITIAL_BIAS = 72; | ||
| 25 | const INITIAL_N = 128; | ||
| 26 | const PREFIX = 'xn--'; | ||
| 27 | const DELIMITER = '-'; | ||
| 28 | |||
| 29 | /** | ||
| 30 | * Encode table | ||
| 31 | * | ||
| 32 | * @param array | ||
| 33 | */ | ||
| 34 | protected static $encodeTable = array( | ||
| 35 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', | ||
| 36 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', | ||
| 37 | 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', | ||
| 38 | ); | ||
| 39 | |||
| 40 | /** | ||
| 41 | * Decode table | ||
| 42 | * | ||
| 43 | * @param array | ||
| 44 | */ | ||
| 45 | protected static $decodeTable = array( | ||
| 46 | 'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, | ||
| 47 | 'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, | ||
| 48 | 'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, | ||
| 49 | 's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, | ||
| 50 | 'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, | ||
| 51 | '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 | ||
| 52 | ); | ||
| 53 | |||
| 54 | /** | ||
| 55 | * Character encoding | ||
| 56 | * | ||
| 57 | * @param string | ||
| 58 | */ | ||
| 59 | protected $encoding; | ||
| 60 | |||
| 61 | /** | ||
| 62 | * Constructor | ||
| 63 | * | ||
| 64 | * @param string $encoding Character encoding | ||
| 65 | */ | ||
| 66 | 86 | public function __construct($encoding = 'UTF-8') | |
| 70 | |||
| 71 | /** | ||
| 72 | * Encode a domain to its Punycode version | ||
| 73 | * | ||
| 74 | * @param string $input Domain name in Unicode to be encoded | ||
| 75 | * @return string Punycode representation in ASCII | ||
| 76 | */ | ||
| 77 | 43 | public function encode($input) | |
| 78 |     { | ||
| 79 | 43 | $input = mb_strtolower($input, $this->encoding); | |
| 80 | 43 |         $parts = explode('.', $input); | |
| 81 | 43 |         foreach ($parts as &$part) { | |
| 82 | 43 | $length = strlen($part); | |
| 83 | 43 |             if ($length < 1) { | |
| 84 | 1 |                 throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length)); | |
| 85 | } | ||
| 86 | 43 | $part = $this->encodePart($part); | |
| 87 | 42 | } | |
| 88 | 41 |         $output = implode('.', $parts); | |
| 89 | 41 | $length = strlen($output); | |
| 90 | 41 |         if ($length > 255) { | |
| 91 | 1 |             throw new DomainOutOfBoundsException(sprintf('A full domain name is limited to 255 octets (including the separators), %s given.', $length)); | |
| 92 | } | ||
| 93 | |||
| 94 | 40 | return $output; | |
| 95 | } | ||
| 96 | |||
| 97 | /** | ||
| 98 | * Encode a part of a domain name, such as tld, to its Punycode version | ||
| 99 | * | ||
| 100 | * @param string $input Part of a domain name | ||
| 101 | * @return string Punycode representation of a domain part | ||
| 102 | */ | ||
| 103 | 43 | protected function encodePart($input) | |
| 104 |     { | ||
| 105 | 43 | $codePoints = $this->listCodePoints($input); | |
| 106 | |||
| 107 | 43 | $n = static::INITIAL_N; | |
| 108 | 43 | $bias = static::INITIAL_BIAS; | |
| 109 | 43 | $delta = 0; | |
| 110 | 43 | $h = $b = count($codePoints['basic']); | |
| 111 | |||
| 112 | 43 | $output = ''; | |
| 113 | 43 |         foreach ($codePoints['basic'] as $code) { | |
| 114 | 18 | $output .= $this->codePointToChar($code); | |
| 115 | 43 | } | |
| 116 | 43 |         if ($input === $output) { | |
| 117 | 18 | return $output; | |
| 118 | } | ||
| 119 | 42 |         if ($b > 0) { | |
| 120 | 6 | $output .= static::DELIMITER; | |
| 121 | 6 | } | |
| 122 | |||
| 123 | 42 | $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); | |
| 124 | 42 | sort($codePoints['nonBasic']); | |
| 125 | |||
| 126 | 42 | $i = 0; | |
| 127 | 42 | $length = mb_strlen($input, $this->encoding); | |
| 128 | 42 |         while ($h < $length) { | |
| 129 | 42 | $m = $codePoints['nonBasic'][$i++]; | |
| 130 | 42 | $delta = $delta + ($m - $n) * ($h + 1); | |
| 131 | 42 | $n = $m; | |
| 132 | |||
| 133 | 42 |             foreach ($codePoints['all'] as $c) { | |
| 134 | 42 |                 if ($c < $n || $c < static::INITIAL_N) { | |
| 135 | 42 | $delta++; | |
| 136 | 42 | } | |
| 137 | 42 |                 if ($c === $n) { | |
| 138 | 42 | $q = $delta; | |
| 139 | 42 |                     for ($k = static::BASE;; $k += static::BASE) { | |
| 140 | 42 | $t = $this->calculateThreshold($k, $bias); | |
| 141 | 42 |                         if ($q < $t) { | |
| 142 | 42 | break; | |
| 143 | } | ||
| 144 | |||
| 145 | 42 | $code = $t + (($q - $t) % (static::BASE - $t)); | |
| 146 | 42 | $output .= static::$encodeTable[$code]; | |
| 147 | |||
| 148 | 42 | $q = ($q - $t) / (static::BASE - $t); | |
| 149 | 42 | } | |
| 150 | |||
| 151 | 42 | $output .= static::$encodeTable[$q]; | |
| 152 | 42 | $bias = $this->adapt($delta, $h + 1, ($h === $b)); | |
| 153 | 42 | $delta = 0; | |
| 154 | 42 | $h++; | |
| 155 | 42 | } | |
| 156 | 42 | } | |
| 157 | |||
| 158 | 42 | $delta++; | |
| 159 | 42 | $n++; | |
| 160 | 42 | } | |
| 161 | 42 | $out = static::PREFIX . $output; | |
| 162 | 42 | $length = strlen($out); | |
| 163 | 42 |         if ($length > 63 || $length < 1) { | |
| 164 | 1 |             throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length)); | |
| 165 | } | ||
| 166 | |||
| 167 | 41 | return $out; | |
| 168 | } | ||
| 169 | |||
| 170 | /** | ||
| 171 | * Decode a Punycode domain name to its Unicode counterpart | ||
| 172 | * | ||
| 173 | * @param string $input Domain name in Punycode | ||
| 174 | * @return string Unicode domain name | ||
| 175 | */ | ||
| 176 | 43 | public function decode($input) | |
| 177 |     { | ||
| 178 | 43 | $input = strtolower($input); | |
| 179 | 43 |         $parts = explode('.', $input); | |
| 180 | 43 |         foreach ($parts as &$part) { | |
| 181 | 43 | $length = strlen($part); | |
| 182 | 43 |             if ($length > 63 || $length < 1) { | |
| 183 | 2 |                 throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length)); | |
| 184 | } | ||
| 185 | 41 |             if (strpos($part, static::PREFIX) !== 0) { | |
| 186 | 17 | continue; | |
| 187 | } | ||
| 188 | |||
| 189 | 41 | $part = substr($part, strlen(static::PREFIX)); | |
| 190 | 41 | $part = $this->decodePart($part); | |
| 191 | 41 | } | |
| 192 | 41 |         $output = implode('.', $parts); | |
| 193 | 41 | $length = strlen($output); | |
| 194 | 41 |         if ($length > 255) { | |
| 195 | 1 |             throw new DomainOutOfBoundsException(sprintf('A full domain name is limited to 255 octets (including the separators), %s given.', $length)); | |
| 196 | } | ||
| 197 | |||
| 198 | 40 | return $output; | |
| 199 | } | ||
| 200 | |||
| 201 | /** | ||
| 202 | * Decode a part of domain name, such as tld | ||
| 203 | * | ||
| 204 | * @param string $input Part of a domain name | ||
| 205 | * @return string Unicode domain part | ||
| 206 | */ | ||
| 207 | 41 | protected function decodePart($input) | |
| 208 |     { | ||
| 209 | 41 | $n = static::INITIAL_N; | |
| 210 | 41 | $i = 0; | |
| 211 | 41 | $bias = static::INITIAL_BIAS; | |
| 212 | 41 | $output = ''; | |
| 213 | |||
| 214 | 41 | $pos = strrpos($input, static::DELIMITER); | |
| 215 | 41 |         if ($pos !== false) { | |
| 216 | 6 | $output = substr($input, 0, $pos++); | |
| 217 | 6 |         } else { | |
| 218 | 35 | $pos = 0; | |
| 219 | } | ||
| 220 | |||
| 221 | 41 | $outputLength = strlen($output); | |
| 222 | 41 | $inputLength = strlen($input); | |
| 223 | 41 |         while ($pos < $inputLength) { | |
| 224 | 41 | $oldi = $i; | |
| 225 | 41 | $w = 1; | |
| 226 | |||
| 227 | 41 |             for ($k = static::BASE;; $k += static::BASE) { | |
| 228 | 41 | $digit = static::$decodeTable[$input[$pos++]]; | |
| 229 | 41 | $i = $i + ($digit * $w); | |
| 230 | 41 | $t = $this->calculateThreshold($k, $bias); | |
| 231 | |||
| 232 | 41 |                 if ($digit < $t) { | |
| 233 | 41 | break; | |
| 234 | } | ||
| 235 | |||
| 236 | 41 | $w = $w * (static::BASE - $t); | |
| 237 | 41 | } | |
| 238 | |||
| 239 | 41 | $bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0)); | |
| 240 | 41 | $n = $n + (int) ($i / $outputLength); | |
| 241 | 41 | $i = $i % ($outputLength); | |
| 242 | 41 | $output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding); | |
| 243 | |||
| 244 | 41 | $i++; | |
| 245 | 41 | } | |
| 246 | |||
| 247 | 41 | return $output; | |
| 248 | } | ||
| 249 | |||
| 250 | /** | ||
| 251 | * Calculate the bias threshold to fall between TMIN and TMAX | ||
| 252 | * | ||
| 253 | * @param integer $k | ||
| 254 | * @param integer $bias | ||
| 255 | * @return integer | ||
| 256 | */ | ||
| 257 | 83 | protected function calculateThreshold($k, $bias) | |
| 266 | |||
| 267 | /** | ||
| 268 | * Bias adaptation | ||
| 269 | * | ||
| 270 | * @param integer $delta | ||
| 271 | * @param integer $numPoints | ||
| 272 | * @param boolean $firstTime | ||
| 273 | * @return integer | ||
| 274 | */ | ||
| 275 | 83 | protected function adapt($delta, $numPoints, $firstTime) | |
| 293 | |||
| 294 | /** | ||
| 295 | * List code points for a given input | ||
| 296 | * | ||
| 297 | * @param string $input | ||
| 298 | * @return array Multi-dimension array with basic, non-basic and aggregated code points | ||
| 299 | */ | ||
| 300 | 43 | protected function listCodePoints($input) | |
| 301 |     { | ||
| 302 | $codePoints = array( | ||
| 303 | 43 | 'all' => array(), | |
| 304 | 43 | 'basic' => array(), | |
| 305 | 43 | 'nonBasic' => array(), | |
| 306 | 43 | ); | |
| 307 | |||
| 308 | 43 | $length = mb_strlen($input, $this->encoding); | |
| 309 | 43 |         for ($i = 0; $i < $length; $i++) { | |
| 310 | 43 | $char = mb_substr($input, $i, 1, $this->encoding); | |
| 311 | 43 | $code = $this->charToCodePoint($char); | |
| 312 | 43 |             if ($code < 128) { | |
| 313 | 18 | $codePoints['all'][] = $codePoints['basic'][] = $code; | |
| 314 | 18 |             } else { | |
| 315 | 42 | $codePoints['all'][] = $codePoints['nonBasic'][] = $code; | |
| 316 | } | ||
| 317 | 43 | } | |
| 318 | |||
| 319 | 43 | return $codePoints; | |
| 320 | } | ||
| 321 | |||
| 322 | /** | ||
| 323 | * Convert a single or multi-byte character to its code point | ||
| 324 | * | ||
| 325 | * @param string $char | ||
| 326 | * @return integer | ||
| 327 | */ | ||
| 328 | 43 | protected function charToCodePoint($char) | |
| 341 | |||
| 342 | /** | ||
| 343 | * Convert a code point to its single or multi-byte character | ||
| 344 | * | ||
| 345 | * @param integer $code | ||
| 346 | * @return string | ||
| 347 | */ | ||
| 348 | 59 | protected function codePointToChar($code) | |
| 360 | } | ||
| 361 |