Complex classes like Punycode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Punycode, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
12 | class Punycode |
||
13 | { |
||
14 | |||
15 | /** |
||
16 | * Bootstring parameter values |
||
17 | * |
||
18 | */ |
||
19 | const BASE = 36; |
||
20 | const TMIN = 1; |
||
21 | const TMAX = 26; |
||
22 | const SKEW = 38; |
||
23 | const DAMP = 700; |
||
24 | const INITIAL_BIAS = 72; |
||
25 | const INITIAL_N = 128; |
||
26 | const PREFIX = 'xn--'; |
||
27 | const DELIMITER = '-'; |
||
28 | |||
29 | /** |
||
30 | * Encode table |
||
31 | * |
||
32 | * @param array |
||
33 | */ |
||
34 | protected static $encodeTable = array( |
||
35 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', |
||
36 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', |
||
37 | 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
||
38 | ); |
||
39 | |||
40 | /** |
||
41 | * Decode table |
||
42 | * |
||
43 | * @param array |
||
44 | */ |
||
45 | protected static $decodeTable = array( |
||
46 | 'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, |
||
47 | 'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, |
||
48 | 'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, |
||
49 | 's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, |
||
50 | 'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, |
||
51 | '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 |
||
52 | ); |
||
53 | |||
54 | /** |
||
55 | * Character encoding |
||
56 | * |
||
57 | * @param string |
||
58 | */ |
||
59 | protected $encoding; |
||
60 | |||
61 | /** |
||
62 | * Constructor |
||
63 | * |
||
64 | * @param string $encoding Character encoding |
||
65 | */ |
||
66 | 86 | public function __construct($encoding = 'UTF-8') |
|
70 | |||
71 | /** |
||
72 | * Encode a domain to its Punycode version |
||
73 | * |
||
74 | * @param string $input Domain name in Unicode to be encoded |
||
75 | * @return string Punycode representation in ASCII |
||
76 | */ |
||
77 | 43 | public function encode($input) |
|
78 | { |
||
79 | 43 | $input = mb_strtolower($input, $this->encoding); |
|
80 | 43 | $parts = explode('.', $input); |
|
81 | 43 | foreach ($parts as &$part) { |
|
82 | 43 | $length = strlen($part); |
|
83 | 43 | if ($length < 1) { |
|
84 | 1 | throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length)); |
|
85 | } |
||
86 | 43 | $part = $this->encodePart($part); |
|
87 | 42 | } |
|
88 | 41 | $output = implode('.', $parts); |
|
89 | 41 | $length = strlen($output); |
|
90 | 41 | if ($length > 255) { |
|
91 | 1 | throw new DomainOutOfBoundsException(sprintf('A full domain name is limited to 255 octets (including the separators), %s given.', $length)); |
|
92 | } |
||
93 | |||
94 | 40 | return $output; |
|
95 | } |
||
96 | |||
97 | /** |
||
98 | * Encode a part of a domain name, such as tld, to its Punycode version |
||
99 | * |
||
100 | * @param string $input Part of a domain name |
||
101 | * @return string Punycode representation of a domain part |
||
102 | */ |
||
103 | 43 | protected function encodePart($input) |
|
104 | { |
||
105 | 43 | $codePoints = $this->listCodePoints($input); |
|
106 | |||
107 | 43 | $n = static::INITIAL_N; |
|
108 | 43 | $bias = static::INITIAL_BIAS; |
|
109 | 43 | $delta = 0; |
|
110 | 43 | $h = $b = count($codePoints['basic']); |
|
111 | |||
112 | 43 | $output = ''; |
|
113 | 43 | foreach ($codePoints['basic'] as $code) { |
|
114 | 18 | $output .= $this->codePointToChar($code); |
|
115 | 43 | } |
|
116 | 43 | if ($input === $output) { |
|
117 | 18 | return $output; |
|
118 | } |
||
119 | 42 | if ($b > 0) { |
|
120 | 6 | $output .= static::DELIMITER; |
|
121 | 6 | } |
|
122 | |||
123 | 42 | $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); |
|
124 | 42 | sort($codePoints['nonBasic']); |
|
125 | |||
126 | 42 | $i = 0; |
|
127 | 42 | $length = mb_strlen($input, $this->encoding); |
|
128 | 42 | while ($h < $length) { |
|
129 | 42 | $m = $codePoints['nonBasic'][$i++]; |
|
130 | 42 | $delta = $delta + ($m - $n) * ($h + 1); |
|
131 | 42 | $n = $m; |
|
132 | |||
133 | 42 | foreach ($codePoints['all'] as $c) { |
|
134 | 42 | if ($c < $n || $c < static::INITIAL_N) { |
|
135 | 42 | $delta++; |
|
136 | 42 | } |
|
137 | 42 | if ($c === $n) { |
|
138 | 42 | $q = $delta; |
|
139 | 42 | for ($k = static::BASE;; $k += static::BASE) { |
|
140 | 42 | $t = $this->calculateThreshold($k, $bias); |
|
141 | 42 | if ($q < $t) { |
|
142 | 42 | break; |
|
143 | } |
||
144 | |||
145 | 42 | $code = $t + (($q - $t) % (static::BASE - $t)); |
|
146 | 42 | $output .= static::$encodeTable[$code]; |
|
147 | |||
148 | 42 | $q = ($q - $t) / (static::BASE - $t); |
|
149 | 42 | } |
|
150 | |||
151 | 42 | $output .= static::$encodeTable[$q]; |
|
152 | 42 | $bias = $this->adapt($delta, $h + 1, ($h === $b)); |
|
153 | 42 | $delta = 0; |
|
154 | 42 | $h++; |
|
155 | 42 | } |
|
156 | 42 | } |
|
157 | |||
158 | 42 | $delta++; |
|
159 | 42 | $n++; |
|
160 | 42 | } |
|
161 | 42 | $out = static::PREFIX . $output; |
|
162 | 42 | $length = strlen($out); |
|
163 | 42 | if ($length > 63 || $length < 1) { |
|
164 | 1 | throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length)); |
|
165 | } |
||
166 | |||
167 | 41 | return $out; |
|
168 | } |
||
169 | |||
170 | /** |
||
171 | * Decode a Punycode domain name to its Unicode counterpart |
||
172 | * |
||
173 | * @param string $input Domain name in Punycode |
||
174 | * @return string Unicode domain name |
||
175 | */ |
||
176 | 43 | public function decode($input) |
|
177 | { |
||
178 | 43 | $input = strtolower($input); |
|
179 | 43 | $parts = explode('.', $input); |
|
180 | 43 | foreach ($parts as &$part) { |
|
181 | 43 | $length = strlen($part); |
|
182 | 43 | if ($length > 63 || $length < 1) { |
|
183 | 2 | throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length)); |
|
184 | } |
||
185 | 41 | if (strpos($part, static::PREFIX) !== 0) { |
|
186 | 17 | continue; |
|
187 | } |
||
188 | |||
189 | 41 | $part = substr($part, strlen(static::PREFIX)); |
|
190 | 41 | $part = $this->decodePart($part); |
|
191 | 41 | } |
|
192 | 41 | $output = implode('.', $parts); |
|
193 | 41 | $length = strlen($output); |
|
194 | 41 | if ($length > 255) { |
|
195 | 1 | throw new DomainOutOfBoundsException(sprintf('A full domain name is limited to 255 octets (including the separators), %s given.', $length)); |
|
196 | } |
||
197 | |||
198 | 40 | return $output; |
|
199 | } |
||
200 | |||
201 | /** |
||
202 | * Decode a part of domain name, such as tld |
||
203 | * |
||
204 | * @param string $input Part of a domain name |
||
205 | * @return string Unicode domain part |
||
206 | */ |
||
207 | 41 | protected function decodePart($input) |
|
208 | { |
||
209 | 41 | $n = static::INITIAL_N; |
|
210 | 41 | $i = 0; |
|
211 | 41 | $bias = static::INITIAL_BIAS; |
|
212 | 41 | $output = ''; |
|
213 | |||
214 | 41 | $pos = strrpos($input, static::DELIMITER); |
|
215 | 41 | if ($pos !== false) { |
|
216 | 6 | $output = substr($input, 0, $pos++); |
|
217 | 6 | } else { |
|
218 | 35 | $pos = 0; |
|
219 | } |
||
220 | |||
221 | 41 | $outputLength = strlen($output); |
|
222 | 41 | $inputLength = strlen($input); |
|
223 | 41 | while ($pos < $inputLength) { |
|
224 | 41 | $oldi = $i; |
|
225 | 41 | $w = 1; |
|
226 | |||
227 | 41 | for ($k = static::BASE;; $k += static::BASE) { |
|
228 | 41 | $digit = static::$decodeTable[$input[$pos++]]; |
|
229 | 41 | $i = $i + ($digit * $w); |
|
230 | 41 | $t = $this->calculateThreshold($k, $bias); |
|
231 | |||
232 | 41 | if ($digit < $t) { |
|
233 | 41 | break; |
|
234 | } |
||
235 | |||
236 | 41 | $w = $w * (static::BASE - $t); |
|
237 | 41 | } |
|
238 | |||
239 | 41 | $bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0)); |
|
240 | 41 | $n = $n + (int) ($i / $outputLength); |
|
241 | 41 | $i = $i % ($outputLength); |
|
242 | 41 | $output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding); |
|
243 | |||
244 | 41 | $i++; |
|
245 | 41 | } |
|
246 | |||
247 | 41 | return $output; |
|
248 | } |
||
249 | |||
250 | /** |
||
251 | * Calculate the bias threshold to fall between TMIN and TMAX |
||
252 | * |
||
253 | * @param integer $k |
||
254 | * @param integer $bias |
||
255 | * @return integer |
||
256 | */ |
||
257 | 83 | protected function calculateThreshold($k, $bias) |
|
266 | |||
267 | /** |
||
268 | * Bias adaptation |
||
269 | * |
||
270 | * @param integer $delta |
||
271 | * @param integer $numPoints |
||
272 | * @param boolean $firstTime |
||
273 | * @return integer |
||
274 | */ |
||
275 | 83 | protected function adapt($delta, $numPoints, $firstTime) |
|
293 | |||
294 | /** |
||
295 | * List code points for a given input |
||
296 | * |
||
297 | * @param string $input |
||
298 | * @return array Multi-dimension array with basic, non-basic and aggregated code points |
||
299 | */ |
||
300 | 43 | protected function listCodePoints($input) |
|
301 | { |
||
302 | $codePoints = array( |
||
303 | 43 | 'all' => array(), |
|
304 | 43 | 'basic' => array(), |
|
305 | 43 | 'nonBasic' => array(), |
|
306 | 43 | ); |
|
307 | |||
308 | 43 | $length = mb_strlen($input, $this->encoding); |
|
309 | 43 | for ($i = 0; $i < $length; $i++) { |
|
310 | 43 | $char = mb_substr($input, $i, 1, $this->encoding); |
|
311 | 43 | $code = $this->charToCodePoint($char); |
|
312 | 43 | if ($code < 128) { |
|
313 | 18 | $codePoints['all'][] = $codePoints['basic'][] = $code; |
|
314 | 18 | } else { |
|
315 | 42 | $codePoints['all'][] = $codePoints['nonBasic'][] = $code; |
|
316 | } |
||
317 | 43 | } |
|
318 | |||
319 | 43 | return $codePoints; |
|
320 | } |
||
321 | |||
322 | /** |
||
323 | * Convert a single or multi-byte character to its code point |
||
324 | * |
||
325 | * @param string $char |
||
326 | * @return integer |
||
327 | */ |
||
328 | 43 | protected function charToCodePoint($char) |
|
341 | |||
342 | /** |
||
343 | * Convert a code point to its single or multi-byte character |
||
344 | * |
||
345 | * @param integer $code |
||
346 | * @return string |
||
347 | */ |
||
348 | 59 | protected function codePointToChar($code) |
|
360 | } |
||
361 |