Total Complexity | 89 |
Total Lines | 589 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Punycode often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Punycode, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
29 | class Punycode |
||
30 | { |
||
31 | /** |
||
32 | * Bootstring parameter values |
||
33 | * |
||
34 | */ |
||
35 | const BASE = 36; |
||
36 | const TMIN = 1; |
||
37 | const TMAX = 26; |
||
38 | const SKEW = 38; |
||
39 | const DAMP = 700; |
||
40 | const INITIAL_BIAS = 72; |
||
41 | const INITIAL_N = 128; |
||
42 | const PREFIX = 'xn--'; |
||
43 | const DELIMITER = '-'; |
||
44 | |||
45 | /** |
||
46 | * IDNA Error constants |
||
47 | */ |
||
48 | const IDNA_ERROR_EMPTY_LABEL = 1; |
||
49 | const IDNA_ERROR_LABEL_TOO_LONG = 2; |
||
50 | const IDNA_ERROR_DOMAIN_NAME_TOO_LONG = 4; |
||
51 | const IDNA_ERROR_LEADING_HYPHEN = 8; |
||
52 | const IDNA_ERROR_TRAILING_HYPHEN = 16; |
||
53 | const IDNA_ERROR_HYPHEN_3_4 = 32; |
||
54 | const IDNA_ERROR_LEADING_COMBINING_MARK = 64; |
||
55 | const IDNA_ERROR_DISALLOWED = 128; |
||
56 | const IDNA_ERROR_PUNYCODE = 256; |
||
57 | const IDNA_ERROR_LABEL_HAS_DOT = 512; |
||
58 | const IDNA_ERROR_INVALID_ACE_LABEL = 1024; |
||
59 | const IDNA_ERROR_BIDI = 2048; |
||
60 | const IDNA_ERROR_CONTEXTJ = 4096; |
||
61 | |||
62 | /** |
||
63 | * Encode table |
||
64 | * |
||
65 | * @param array |
||
66 | */ |
||
67 | protected static $encodeTable = array( |
||
68 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', |
||
69 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', |
||
70 | 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
||
71 | ); |
||
72 | |||
73 | /** |
||
74 | * Decode table |
||
75 | * |
||
76 | * @param array |
||
77 | */ |
||
78 | protected static $decodeTable = array( |
||
79 | 'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, |
||
80 | 'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, |
||
81 | 'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, |
||
82 | 's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, |
||
83 | 'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, |
||
84 | '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 |
||
85 | ); |
||
86 | |||
87 | /** |
||
88 | * Character encoding |
||
89 | * |
||
90 | * @param string |
||
91 | */ |
||
92 | protected $encoding; |
||
93 | |||
94 | /** |
||
95 | * Whether to use Non-Transitional Processing. |
||
96 | * Setting this to true breaks backward compatibility with IDNA2003. |
||
97 | * |
||
98 | * @param bool |
||
99 | */ |
||
100 | protected $nonTransitional = false; |
||
101 | |||
102 | /** |
||
103 | * Whether to use STD3 ASCII rules. |
||
104 | * |
||
105 | * @param bool |
||
106 | */ |
||
107 | protected $std3 = false; |
||
108 | |||
109 | /** |
||
110 | * Constructor |
||
111 | * |
||
112 | * @param string $encoding Character encoding |
||
113 | */ |
||
114 | public function __construct($encoding = 'UTF-8') |
||
115 | { |
||
116 | $this->encoding = $encoding; |
||
117 | } |
||
118 | |||
119 | /** |
||
120 | * Enable/disable Non-Transitional Processing |
||
121 | * |
||
122 | * @param bool $nonTransitional Whether to use Non-Transitional Processing |
||
123 | */ |
||
124 | public function useNonTransitional(bool $nonTransitional) |
||
125 | { |
||
126 | $this->nonTransitional = $nonTransitional; |
||
127 | } |
||
128 | |||
129 | /** |
||
130 | * Enable/disable STD3 ASCII rules |
||
131 | * |
||
132 | * @param bool $std3 Whether to use STD3 ASCII rules |
||
133 | */ |
||
134 | public function useStd3(bool $std3) |
||
135 | { |
||
136 | $this->std3 = $std3; |
||
137 | } |
||
138 | |||
139 | /** |
||
140 | * Encode a domain to its Punycode version |
||
141 | * |
||
142 | * @param string $input Domain name in Unicode to be encoded |
||
143 | * @return string Punycode representation in ASCII |
||
144 | */ |
||
145 | public function encode($input) |
||
146 | { |
||
147 | // For compatibility with idn_to_* functions |
||
148 | if ($this->decode($input) === false) |
||
|
|||
149 | return false; |
||
150 | |||
151 | $errors = array(); |
||
152 | $preprocessed = $this->preprocess($input, $errors); |
||
153 | |||
154 | if (!empty($errors)) |
||
155 | { |
||
156 | return false; |
||
157 | } |
||
158 | |||
159 | $parts = explode('.', $preprocessed); |
||
160 | foreach ($parts as $p => &$part) { |
||
161 | $part = $this->encodePart($part); |
||
162 | |||
163 | $validation_status = $this->validateLabel($part, true); |
||
164 | |||
165 | switch ($validation_status) { |
||
166 | case self::IDNA_ERROR_LABEL_TOO_LONG: |
||
167 | case self::IDNA_ERROR_LEADING_HYPHEN: |
||
168 | case self::IDNA_ERROR_TRAILING_HYPHEN: |
||
169 | case self::IDNA_ERROR_LEADING_COMBINING_MARK: |
||
170 | case self::IDNA_ERROR_DISALLOWED: |
||
171 | case self::IDNA_ERROR_PUNYCODE: |
||
172 | case self::IDNA_ERROR_LABEL_HAS_DOT: |
||
173 | case self::IDNA_ERROR_INVALID_ACE_LABEL: |
||
174 | case self::IDNA_ERROR_BIDI: |
||
175 | case self::IDNA_ERROR_CONTEXTJ: |
||
176 | return false; |
||
177 | break; |
||
178 | |||
179 | case self::IDNA_ERROR_HYPHEN_3_4: |
||
180 | $part = $parts[$p]; |
||
181 | break; |
||
182 | |||
183 | case self::IDNA_ERROR_EMPTY_LABEL: |
||
184 | $parts_count = count($parts); |
||
185 | if ($parts_count === 1 || $p !== $parts_count - 1) |
||
186 | return false; |
||
187 | break; |
||
188 | |||
189 | default: |
||
190 | break; |
||
191 | } |
||
192 | } |
||
193 | $output = implode('.', $parts); |
||
194 | |||
195 | // IDNA_ERROR_DOMAIN_NAME_TOO_LONG |
||
196 | if (strlen(rtrim($output, '.')) > 253) |
||
197 | return false; |
||
198 | |||
199 | return $output; |
||
200 | } |
||
201 | |||
202 | /** |
||
203 | * Encode a part of a domain name, such as tld, to its Punycode version |
||
204 | * |
||
205 | * @param string $input Part of a domain name |
||
206 | * @return string Punycode representation of a domain part |
||
207 | */ |
||
208 | protected function encodePart($input) |
||
209 | { |
||
210 | $codePoints = $this->listCodePoints($input); |
||
211 | |||
212 | $n = static::INITIAL_N; |
||
213 | $bias = static::INITIAL_BIAS; |
||
214 | $delta = 0; |
||
215 | $h = $b = count($codePoints['basic']); |
||
216 | |||
217 | $output = ''; |
||
218 | foreach ($codePoints['basic'] as $code) { |
||
219 | $output .= $this->codePointToChar($code); |
||
220 | } |
||
221 | if ($input === $output) { |
||
222 | return $output; |
||
223 | } |
||
224 | if ($b > 0) { |
||
225 | $output .= static::DELIMITER; |
||
226 | } |
||
227 | |||
228 | $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); |
||
229 | sort($codePoints['nonBasic']); |
||
230 | |||
231 | $i = 0; |
||
232 | $length = mb_strlen($input, $this->encoding); |
||
233 | while ($h < $length) { |
||
234 | $m = $codePoints['nonBasic'][$i++]; |
||
235 | $delta = $delta + ($m - $n) * ($h + 1); |
||
236 | $n = $m; |
||
237 | |||
238 | foreach ($codePoints['all'] as $c) { |
||
239 | if ($c < $n || $c < static::INITIAL_N) { |
||
240 | $delta++; |
||
241 | } |
||
242 | if ($c === $n) { |
||
243 | $q = $delta; |
||
244 | for ($k = static::BASE;; $k += static::BASE) { |
||
245 | $t = $this->calculateThreshold($k, $bias); |
||
246 | if ($q < $t) { |
||
247 | break; |
||
248 | } |
||
249 | |||
250 | $code = $t + (((int) $q - $t) % (static::BASE - $t)); |
||
251 | $output .= static::$encodeTable[$code]; |
||
252 | |||
253 | $q = ($q - $t) / (static::BASE - $t); |
||
254 | } |
||
255 | |||
256 | $output .= static::$encodeTable[(int) $q]; |
||
257 | $bias = $this->adapt($delta, $h + 1, ($h === $b)); |
||
258 | $delta = 0; |
||
259 | $h++; |
||
260 | } |
||
261 | } |
||
262 | |||
263 | $delta++; |
||
264 | $n++; |
||
265 | } |
||
266 | $out = static::PREFIX . $output; |
||
267 | |||
268 | return $out; |
||
269 | } |
||
270 | |||
271 | /** |
||
272 | * Decode a Punycode domain name to its Unicode counterpart |
||
273 | * |
||
274 | * @param string $input Domain name in Punycode |
||
275 | * @return string Unicode domain name |
||
276 | */ |
||
277 | public function decode($input) |
||
278 | { |
||
279 | $errors = array(); |
||
280 | $preprocessed = $this->preprocess($input, $errors); |
||
281 | |||
282 | if (!empty($errors)) |
||
283 | { |
||
284 | return false; |
||
285 | } |
||
286 | |||
287 | $parts = explode('.', $preprocessed); |
||
288 | foreach ($parts as $p => &$part) |
||
289 | { |
||
290 | if (strpos($part, static::PREFIX) === 0) |
||
291 | { |
||
292 | $part = substr($part, strlen(static::PREFIX)); |
||
293 | $part = $this->decodePart($part); |
||
294 | |||
295 | if ($part === false) |
||
296 | return false; |
||
297 | } |
||
298 | |||
299 | if ($this->validateLabel($part, false) !== 0) |
||
300 | { |
||
301 | if ($part === '') |
||
302 | { |
||
303 | $parts_count = count($parts); |
||
304 | |||
305 | if ($parts_count === 1 || $p !== $parts_count - 1) |
||
306 | return false; |
||
307 | } |
||
308 | else |
||
309 | return false; |
||
310 | } |
||
311 | } |
||
312 | $output = implode('.', $parts); |
||
313 | |||
314 | return $output; |
||
315 | } |
||
316 | |||
317 | /** |
||
318 | * Decode a part of domain name, such as tld |
||
319 | * |
||
320 | * @param string $input Part of a domain name |
||
321 | * @return string Unicode domain part |
||
322 | */ |
||
323 | protected function decodePart($input) |
||
373 | } |
||
374 | |||
375 | /** |
||
376 | * Calculate the bias threshold to fall between TMIN and TMAX |
||
377 | * |
||
378 | * @param integer $k |
||
379 | * @param integer $bias |
||
380 | * @return integer |
||
381 | */ |
||
382 | protected function calculateThreshold($k, $bias) |
||
393 | } |
||
394 | |||
395 | /** |
||
396 | * Bias adaptation |
||
397 | * |
||
398 | * @param integer $delta |
||
399 | * @param integer $numPoints |
||
400 | * @param boolean $firstTime |
||
401 | * @return integer |
||
402 | */ |
||
403 | protected function adapt($delta, $numPoints, $firstTime) |
||
404 | { |
||
405 | $delta = (int) ( |
||
406 | ($firstTime) |
||
407 | ? $delta / static::DAMP |
||
408 | : $delta / 2 |
||
409 | ); |
||
410 | $delta += (int) ($delta / $numPoints); |
||
411 | |||
412 | $k = 0; |
||
413 | while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) |
||
414 | { |
||
415 | $delta = (int) ($delta / (static::BASE - static::TMIN)); |
||
416 | $k = $k + static::BASE; |
||
417 | } |
||
418 | $k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW)); |
||
419 | |||
420 | return $k; |
||
421 | } |
||
422 | |||
423 | /** |
||
424 | * List code points for a given input |
||
425 | * |
||
426 | * @param string $input |
||
427 | * @return array Multi-dimension array with basic, non-basic and aggregated code points |
||
428 | */ |
||
429 | protected function listCodePoints($input) |
||
430 | { |
||
431 | $codePoints = array( |
||
432 | 'all' => array(), |
||
433 | 'basic' => array(), |
||
434 | 'nonBasic' => array(), |
||
435 | ); |
||
436 | |||
437 | $length = mb_strlen($input, $this->encoding); |
||
438 | for ($i = 0; $i < $length; $i++) |
||
439 | { |
||
440 | $char = mb_substr($input, $i, 1, $this->encoding); |
||
441 | $code = $this->charToCodePoint($char); |
||
442 | if ($code < 128) |
||
443 | { |
||
444 | $codePoints['all'][] = $codePoints['basic'][] = $code; |
||
445 | } |
||
446 | else |
||
447 | { |
||
448 | $codePoints['all'][] = $codePoints['nonBasic'][] = $code; |
||
449 | } |
||
450 | } |
||
451 | |||
452 | return $codePoints; |
||
453 | } |
||
454 | |||
455 | /** |
||
456 | * Convert a single or multi-byte character to its code point |
||
457 | * |
||
458 | * @param string $char |
||
459 | * @return integer |
||
460 | */ |
||
461 | protected function charToCodePoint($char) |
||
479 | } |
||
480 | } |
||
481 | |||
482 | /** |
||
483 | * Convert a code point to its single or multi-byte character |
||
484 | * |
||
485 | * @param integer $code |
||
486 | * @return string |
||
487 | */ |
||
488 | protected function codePointToChar($code) |
||
505 | } |
||
506 | } |
||
507 | |||
508 | /** |
||
509 | * Prepare domain name string for Punycode processing. |
||
510 | * See https://www.unicode.org/reports/tr46/#Processing |
||
511 | * |
||
512 | * @param string $domain A domain name |
||
513 | * @param array $errors Will record any errors encountered during preprocessing |
||
514 | */ |
||
515 | protected function preprocess(string $domain, array &$errors = array()) |
||
516 | { |
||
517 | global $sourcedir; |
||
518 | |||
519 | require_once($sourcedir . '/Unicode/Idna.php'); |
||
520 | require_once($sourcedir . '/Subs-Charset.php'); |
||
521 | |||
522 | $regexes = idna_regex(); |
||
523 | $maps = idna_maps(); |
||
524 | |||
525 | if (!$this->nonTransitional && function_exists('idna_maps_deviation')) |
||
526 | $maps = array_merge($maps, idna_maps_deviation()); |
||
527 | |||
528 | if (!$this->std3 && function_exists('idna_maps_not_std3')) |
||
529 | $maps = array_merge($maps, idna_maps_not_std3()); |
||
530 | |||
531 | $labels = explode('.', $domain); |
||
532 | |||
533 | foreach ($labels as $l => $label) { |
||
534 | $label = preg_replace('/[' . $regexes['ignored'] . ']/u', '', $label); |
||
535 | |||
536 | $label = utf8_normalize_c(strtr($label, $maps)); |
||
537 | |||
538 | if ($this->std3) |
||
539 | $label = strtolower($label); |
||
540 | |||
541 | if (preg_match('/[' . $regexes['disallowed'] . ($this->std3 ? $regexes['disallowed_std3'] ?? '\x{0}-\x{2C}\x{2E}-\x{2F}\x{3A}-\x{60}\x{7B}-\x{7F}' : '') . ']/u', $label)) |
||
542 | $errors[] = 'disallowed'; |
||
543 | |||
544 | $labels[$l] = $label; |
||
545 | } |
||
546 | |||
547 | $errors = array_unique($errors); |
||
548 | |||
549 | return implode('.', $labels); |
||
550 | } |
||
551 | |||
552 | /** |
||
553 | * Validates an individual part of a domain name. |
||
554 | * |
||
555 | * @param string $label Individual part of a domain name. |
||
556 | * @param bool $toPunycode True for encoding to Punycode, false for decoding. |
||
557 | */ |
||
558 | protected function validateLabel(string $label, bool $toPunycode = true) |
||
618 | } |
||
619 | } |
||
621 | ?> |