1 | <?php |
||
2 | /** |
||
3 | * A class for encoding/decoding Punycode. |
||
4 | * |
||
5 | * Derived from this library: https://github.com/true/php-punycode |
||
6 | * |
||
7 | * @author TrueServer B.V. <[email protected]> |
||
8 | * @package php-punycode |
||
9 | * @license MIT |
||
10 | * |
||
11 | * Simple Machines Forum (SMF) |
||
12 | * |
||
13 | * @package SMF |
||
14 | * @author Simple Machines https://www.simplemachines.org |
||
15 | * @copyright 2022 Simple Machines and individual contributors |
||
16 | * @license https://www.simplemachines.org/about/smf/license.php BSD |
||
17 | * |
||
18 | * @version 2.1.0 |
||
19 | */ |
||
20 | |||
21 | if (!defined('SMF')) |
||
22 | die('No direct access...'); |
||
23 | |||
24 | /** |
||
25 | * Punycode implementation as described in RFC 3492 |
||
26 | * |
||
27 | * @link http://tools.ietf.org/html/rfc3492 |
||
28 | */ |
||
29 | class Punycode |
||
30 | { |
||
31 | /** |
||
32 | * Bootstring parameter values |
||
33 | * |
||
34 | */ |
||
35 | const BASE = 36; |
||
36 | const TMIN = 1; |
||
37 | const TMAX = 26; |
||
38 | const SKEW = 38; |
||
39 | const DAMP = 700; |
||
40 | const INITIAL_BIAS = 72; |
||
41 | const INITIAL_N = 128; |
||
42 | const PREFIX = 'xn--'; |
||
43 | const DELIMITER = '-'; |
||
44 | |||
45 | /** |
||
46 | * Encode table |
||
47 | * |
||
48 | * @param array |
||
49 | */ |
||
50 | protected static $encodeTable = array( |
||
51 | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', |
||
52 | 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', |
||
53 | 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
||
54 | ); |
||
55 | |||
56 | /** |
||
57 | * Decode table |
||
58 | * |
||
59 | * @param array |
||
60 | */ |
||
61 | protected static $decodeTable = array( |
||
62 | 'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, |
||
63 | 'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, |
||
64 | 'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, |
||
65 | 's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, |
||
66 | 'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, |
||
67 | '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 |
||
68 | ); |
||
69 | |||
70 | /** |
||
71 | * Character encoding |
||
72 | * |
||
73 | * @param string |
||
74 | */ |
||
75 | protected $encoding; |
||
76 | |||
77 | /** |
||
78 | * Constructor |
||
79 | * |
||
80 | * @param string $encoding Character encoding |
||
81 | */ |
||
82 | public function __construct($encoding = 'UTF-8') |
||
83 | { |
||
84 | $this->encoding = $encoding; |
||
85 | } |
||
86 | |||
87 | /** |
||
88 | * Encode a domain to its Punycode version |
||
89 | * |
||
90 | * @param string $input Domain name in Unicode to be encoded |
||
91 | * @return string Punycode representation in ASCII |
||
92 | */ |
||
93 | public function encode($input) |
||
94 | { |
||
95 | $input = mb_strtolower($input, $this->encoding); |
||
96 | $parts = explode('.', $input); |
||
97 | foreach ($parts as &$part) { |
||
98 | $part = $this->encodePart($part); |
||
99 | } |
||
100 | $output = implode('.', $parts); |
||
101 | $length = strlen($output); |
||
0 ignored issues
–
show
Unused Code
introduced
by
![]() |
|||
102 | |||
103 | return $output; |
||
104 | } |
||
105 | |||
106 | /** |
||
107 | * Encode a part of a domain name, such as tld, to its Punycode version |
||
108 | * |
||
109 | * @param string $input Part of a domain name |
||
110 | * @return string Punycode representation of a domain part |
||
111 | */ |
||
112 | protected function encodePart($input) |
||
113 | { |
||
114 | $codePoints = $this->listCodePoints($input); |
||
115 | |||
116 | $n = static::INITIAL_N; |
||
117 | $bias = static::INITIAL_BIAS; |
||
118 | $delta = 0; |
||
119 | $h = $b = count($codePoints['basic']); |
||
120 | |||
121 | $output = ''; |
||
122 | foreach ($codePoints['basic'] as $code) { |
||
123 | $output .= $this->codePointToChar($code); |
||
124 | } |
||
125 | if ($input === $output) { |
||
126 | return $output; |
||
127 | } |
||
128 | if ($b > 0) { |
||
129 | $output .= static::DELIMITER; |
||
130 | } |
||
131 | |||
132 | $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); |
||
133 | sort($codePoints['nonBasic']); |
||
134 | |||
135 | $i = 0; |
||
136 | $length = mb_strlen($input, $this->encoding); |
||
137 | while ($h < $length) { |
||
138 | $m = $codePoints['nonBasic'][$i++]; |
||
139 | $delta = $delta + ($m - $n) * ($h + 1); |
||
140 | $n = $m; |
||
141 | |||
142 | foreach ($codePoints['all'] as $c) { |
||
143 | if ($c < $n || $c < static::INITIAL_N) { |
||
144 | $delta++; |
||
145 | } |
||
146 | if ($c === $n) { |
||
147 | $q = $delta; |
||
148 | for ($k = static::BASE;; $k += static::BASE) { |
||
149 | $t = $this->calculateThreshold($k, $bias); |
||
150 | if ($q < $t) { |
||
151 | break; |
||
152 | } |
||
153 | |||
154 | $code = $t + (((int) $q - $t) % (static::BASE - $t)); |
||
155 | $output .= static::$encodeTable[$code]; |
||
156 | |||
157 | $q = ($q - $t) / (static::BASE - $t); |
||
158 | } |
||
159 | |||
160 | $output .= static::$encodeTable[(int) $q]; |
||
161 | $bias = $this->adapt($delta, $h + 1, ($h === $b)); |
||
162 | $delta = 0; |
||
163 | $h++; |
||
164 | } |
||
165 | } |
||
166 | |||
167 | $delta++; |
||
168 | $n++; |
||
169 | } |
||
170 | $out = static::PREFIX . $output; |
||
171 | |||
172 | return $out; |
||
173 | } |
||
174 | |||
175 | /** |
||
176 | * Decode a Punycode domain name to its Unicode counterpart |
||
177 | * |
||
178 | * @param string $input Domain name in Punycode |
||
179 | * @return string Unicode domain name |
||
180 | */ |
||
181 | public function decode($input) |
||
182 | { |
||
183 | $input = strtolower($input); |
||
184 | $parts = explode('.', $input); |
||
185 | foreach ($parts as &$part) |
||
186 | { |
||
187 | if (strpos($part, static::PREFIX) !== 0) |
||
188 | { |
||
189 | continue; |
||
190 | } |
||
191 | |||
192 | $part = substr($part, strlen(static::PREFIX)); |
||
193 | $part = $this->decodePart($part); |
||
194 | } |
||
195 | $output = implode('.', $parts); |
||
196 | |||
197 | return $output; |
||
198 | } |
||
199 | |||
200 | /** |
||
201 | * Decode a part of domain name, such as tld |
||
202 | * |
||
203 | * @param string $input Part of a domain name |
||
204 | * @return string Unicode domain part |
||
205 | */ |
||
206 | protected function decodePart($input) |
||
207 | { |
||
208 | $n = static::INITIAL_N; |
||
209 | $i = 0; |
||
210 | $bias = static::INITIAL_BIAS; |
||
211 | $output = ''; |
||
212 | |||
213 | $pos = strrpos($input, static::DELIMITER); |
||
214 | if ($pos !== false) |
||
215 | { |
||
216 | $output = substr($input, 0, $pos++); |
||
217 | } |
||
218 | else |
||
219 | { |
||
220 | $pos = 0; |
||
221 | } |
||
222 | |||
223 | $outputLength = strlen($output); |
||
224 | $inputLength = strlen($input); |
||
225 | while ($pos < $inputLength) |
||
226 | { |
||
227 | $oldi = $i; |
||
228 | $w = 1; |
||
229 | |||
230 | for ($k = static::BASE;; $k += static::BASE) |
||
231 | { |
||
232 | $digit = static::$decodeTable[$input[$pos++]]; |
||
233 | $i = $i + ($digit * $w); |
||
234 | $t = $this->calculateThreshold($k, $bias); |
||
235 | |||
236 | if ($digit < $t) |
||
237 | { |
||
238 | break; |
||
239 | } |
||
240 | |||
241 | $w = $w * (static::BASE - $t); |
||
242 | } |
||
243 | |||
244 | $bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0)); |
||
245 | $n = $n + (int) ($i / $outputLength); |
||
246 | $i = $i % ($outputLength); |
||
247 | $output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding); |
||
248 | |||
249 | $i++; |
||
250 | } |
||
251 | |||
252 | return $output; |
||
253 | } |
||
254 | |||
255 | /** |
||
256 | * Calculate the bias threshold to fall between TMIN and TMAX |
||
257 | * |
||
258 | * @param integer $k |
||
259 | * @param integer $bias |
||
260 | * @return integer |
||
261 | */ |
||
262 | protected function calculateThreshold($k, $bias) |
||
263 | { |
||
264 | if ($k <= $bias + static::TMIN) |
||
265 | { |
||
266 | return static::TMIN; |
||
267 | } |
||
268 | elseif ($k >= $bias + static::TMAX) |
||
269 | { |
||
270 | return static::TMAX; |
||
271 | } |
||
272 | return $k - $bias; |
||
273 | } |
||
274 | |||
275 | /** |
||
276 | * Bias adaptation |
||
277 | * |
||
278 | * @param integer $delta |
||
279 | * @param integer $numPoints |
||
280 | * @param boolean $firstTime |
||
281 | * @return integer |
||
282 | */ |
||
283 | protected function adapt($delta, $numPoints, $firstTime) |
||
284 | { |
||
285 | $delta = (int) ( |
||
286 | ($firstTime) |
||
287 | ? $delta / static::DAMP |
||
288 | : $delta / 2 |
||
289 | ); |
||
290 | $delta += (int) ($delta / $numPoints); |
||
291 | |||
292 | $k = 0; |
||
293 | while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) |
||
294 | { |
||
295 | $delta = (int) ($delta / (static::BASE - static::TMIN)); |
||
296 | $k = $k + static::BASE; |
||
297 | } |
||
298 | $k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW)); |
||
299 | |||
300 | return $k; |
||
301 | } |
||
302 | |||
303 | /** |
||
304 | * List code points for a given input |
||
305 | * |
||
306 | * @param string $input |
||
307 | * @return array Multi-dimension array with basic, non-basic and aggregated code points |
||
308 | */ |
||
309 | protected function listCodePoints($input) |
||
310 | { |
||
311 | $codePoints = array( |
||
312 | 'all' => array(), |
||
313 | 'basic' => array(), |
||
314 | 'nonBasic' => array(), |
||
315 | ); |
||
316 | |||
317 | $length = mb_strlen($input, $this->encoding); |
||
318 | for ($i = 0; $i < $length; $i++) |
||
319 | { |
||
320 | $char = mb_substr($input, $i, 1, $this->encoding); |
||
321 | $code = $this->charToCodePoint($char); |
||
322 | if ($code < 128) |
||
323 | { |
||
324 | $codePoints['all'][] = $codePoints['basic'][] = $code; |
||
325 | } |
||
326 | else |
||
327 | { |
||
328 | $codePoints['all'][] = $codePoints['nonBasic'][] = $code; |
||
329 | } |
||
330 | } |
||
331 | |||
332 | return $codePoints; |
||
333 | } |
||
334 | |||
335 | /** |
||
336 | * Convert a single or multi-byte character to its code point |
||
337 | * |
||
338 | * @param string $char |
||
339 | * @return integer |
||
340 | */ |
||
341 | protected function charToCodePoint($char) |
||
342 | { |
||
343 | $code = ord($char[0]); |
||
344 | if ($code < 128) |
||
345 | { |
||
346 | return $code; |
||
347 | } |
||
348 | elseif ($code < 224) |
||
349 | { |
||
350 | return (($code - 192) * 64) + (ord($char[1]) - 128); |
||
351 | } |
||
352 | elseif ($code < 240) |
||
353 | { |
||
354 | return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128); |
||
355 | } |
||
356 | else |
||
357 | { |
||
358 | return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128); |
||
359 | } |
||
360 | } |
||
361 | |||
362 | /** |
||
363 | * Convert a code point to its single or multi-byte character |
||
364 | * |
||
365 | * @param integer $code |
||
366 | * @return string |
||
367 | */ |
||
368 | protected function codePointToChar($code) |
||
369 | { |
||
370 | if ($code <= 0x7F) |
||
371 | { |
||
372 | return chr($code); |
||
373 | } |
||
374 | elseif ($code <= 0x7FF) |
||
375 | { |
||
376 | return chr(($code >> 6) + 192) . chr(($code & 63) + 128); |
||
377 | } |
||
378 | elseif ($code <= 0xFFFF) |
||
379 | { |
||
380 | return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
||
381 | } |
||
382 | else |
||
383 | { |
||
384 | return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
||
385 | } |
||
386 | } |
||
387 | } |
||
388 | |||
389 | ?> |