1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* A class for encoding/decoding Punycode. |
4
|
|
|
* |
5
|
|
|
* Derived from this library: https://github.com/true/php-punycode |
6
|
|
|
* |
7
|
|
|
* @author TrueServer B.V. <[email protected]> |
8
|
|
|
* @package php-punycode |
9
|
|
|
* @license MIT |
10
|
|
|
* |
11
|
|
|
* Simple Machines Forum (SMF) |
12
|
|
|
* |
13
|
|
|
* @package SMF |
14
|
|
|
* @author Simple Machines https://www.simplemachines.org |
15
|
|
|
* @copyright 2022 Simple Machines and individual contributors |
16
|
|
|
* @license https://www.simplemachines.org/about/smf/license.php BSD |
17
|
|
|
* |
18
|
|
|
* @version 2.1.0 |
19
|
|
|
*/ |
20
|
|
|
|
21
|
|
|
if (!defined('SMF')) |
22
|
|
|
die('No direct access...'); |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* Punycode implementation as described in RFC 3492 |
26
|
|
|
* |
27
|
|
|
* @link http://tools.ietf.org/html/rfc3492 |
28
|
|
|
*/ |
29
|
|
|
class Punycode |
30
|
|
|
{ |
31
|
|
|
/** |
32
|
|
|
* Bootstring parameter values |
33
|
|
|
* |
34
|
|
|
*/ |
35
|
|
|
const BASE = 36; |
36
|
|
|
const TMIN = 1; |
37
|
|
|
const TMAX = 26; |
38
|
|
|
const SKEW = 38; |
39
|
|
|
const DAMP = 700; |
40
|
|
|
const INITIAL_BIAS = 72; |
41
|
|
|
const INITIAL_N = 128; |
42
|
|
|
const PREFIX = 'xn--'; |
43
|
|
|
const DELIMITER = '-'; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* Encode table |
47
|
|
|
* |
48
|
|
|
* @param array |
49
|
|
|
*/ |
50
|
|
|
protected static $encodeTable = array( |
51
|
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', |
52
|
|
|
'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', |
53
|
|
|
'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
54
|
|
|
); |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* Decode table |
58
|
|
|
* |
59
|
|
|
* @param array |
60
|
|
|
*/ |
61
|
|
|
protected static $decodeTable = array( |
62
|
|
|
'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, |
63
|
|
|
'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, |
64
|
|
|
'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, |
65
|
|
|
's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, |
66
|
|
|
'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, |
67
|
|
|
'4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35 |
68
|
|
|
); |
69
|
|
|
|
70
|
|
|
/** |
71
|
|
|
* Character encoding |
72
|
|
|
* |
73
|
|
|
* @param string |
74
|
|
|
*/ |
75
|
|
|
protected $encoding; |
76
|
|
|
|
77
|
|
|
/** |
78
|
|
|
* Constructor |
79
|
|
|
* |
80
|
|
|
* @param string $encoding Character encoding |
81
|
|
|
*/ |
82
|
|
|
public function __construct($encoding = 'UTF-8') |
83
|
|
|
{ |
84
|
|
|
$this->encoding = $encoding; |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* Encode a domain to its Punycode version |
89
|
|
|
* |
90
|
|
|
* @param string $input Domain name in Unicode to be encoded |
91
|
|
|
* @return string Punycode representation in ASCII |
92
|
|
|
*/ |
93
|
|
|
public function encode($input) |
94
|
|
|
{ |
95
|
|
|
$input = mb_strtolower($input, $this->encoding); |
96
|
|
|
$parts = explode('.', $input); |
97
|
|
|
foreach ($parts as &$part) { |
98
|
|
|
$part = $this->encodePart($part); |
99
|
|
|
} |
100
|
|
|
$output = implode('.', $parts); |
101
|
|
|
$length = strlen($output); |
|
|
|
|
102
|
|
|
|
103
|
|
|
return $output; |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
/** |
107
|
|
|
* Encode a part of a domain name, such as tld, to its Punycode version |
108
|
|
|
* |
109
|
|
|
* @param string $input Part of a domain name |
110
|
|
|
* @return string Punycode representation of a domain part |
111
|
|
|
*/ |
112
|
|
|
protected function encodePart($input) |
113
|
|
|
{ |
114
|
|
|
$codePoints = $this->listCodePoints($input); |
115
|
|
|
|
116
|
|
|
$n = static::INITIAL_N; |
117
|
|
|
$bias = static::INITIAL_BIAS; |
118
|
|
|
$delta = 0; |
119
|
|
|
$h = $b = count($codePoints['basic']); |
120
|
|
|
|
121
|
|
|
$output = ''; |
122
|
|
|
foreach ($codePoints['basic'] as $code) { |
123
|
|
|
$output .= $this->codePointToChar($code); |
124
|
|
|
} |
125
|
|
|
if ($input === $output) { |
126
|
|
|
return $output; |
127
|
|
|
} |
128
|
|
|
if ($b > 0) { |
129
|
|
|
$output .= static::DELIMITER; |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
$codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); |
133
|
|
|
sort($codePoints['nonBasic']); |
134
|
|
|
|
135
|
|
|
$i = 0; |
136
|
|
|
$length = mb_strlen($input, $this->encoding); |
137
|
|
|
while ($h < $length) { |
138
|
|
|
$m = $codePoints['nonBasic'][$i++]; |
139
|
|
|
$delta = $delta + ($m - $n) * ($h + 1); |
140
|
|
|
$n = $m; |
141
|
|
|
|
142
|
|
|
foreach ($codePoints['all'] as $c) { |
143
|
|
|
if ($c < $n || $c < static::INITIAL_N) { |
144
|
|
|
$delta++; |
145
|
|
|
} |
146
|
|
|
if ($c === $n) { |
147
|
|
|
$q = $delta; |
148
|
|
|
for ($k = static::BASE;; $k += static::BASE) { |
149
|
|
|
$t = $this->calculateThreshold($k, $bias); |
150
|
|
|
if ($q < $t) { |
151
|
|
|
break; |
152
|
|
|
} |
153
|
|
|
|
154
|
|
|
$code = $t + (((int) $q - $t) % (static::BASE - $t)); |
155
|
|
|
$output .= static::$encodeTable[$code]; |
156
|
|
|
|
157
|
|
|
$q = ($q - $t) / (static::BASE - $t); |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
$output .= static::$encodeTable[(int) $q]; |
161
|
|
|
$bias = $this->adapt($delta, $h + 1, ($h === $b)); |
162
|
|
|
$delta = 0; |
163
|
|
|
$h++; |
164
|
|
|
} |
165
|
|
|
} |
166
|
|
|
|
167
|
|
|
$delta++; |
168
|
|
|
$n++; |
169
|
|
|
} |
170
|
|
|
$out = static::PREFIX . $output; |
171
|
|
|
|
172
|
|
|
return $out; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
/** |
176
|
|
|
* Decode a Punycode domain name to its Unicode counterpart |
177
|
|
|
* |
178
|
|
|
* @param string $input Domain name in Punycode |
179
|
|
|
* @return string Unicode domain name |
180
|
|
|
*/ |
181
|
|
|
public function decode($input) |
182
|
|
|
{ |
183
|
|
|
$input = strtolower($input); |
184
|
|
|
$parts = explode('.', $input); |
185
|
|
|
foreach ($parts as &$part) |
186
|
|
|
{ |
187
|
|
|
if (strpos($part, static::PREFIX) !== 0) |
188
|
|
|
{ |
189
|
|
|
continue; |
190
|
|
|
} |
191
|
|
|
|
192
|
|
|
$part = substr($part, strlen(static::PREFIX)); |
193
|
|
|
$part = $this->decodePart($part); |
194
|
|
|
} |
195
|
|
|
$output = implode('.', $parts); |
196
|
|
|
|
197
|
|
|
return $output; |
198
|
|
|
} |
199
|
|
|
|
200
|
|
|
/** |
201
|
|
|
* Decode a part of domain name, such as tld |
202
|
|
|
* |
203
|
|
|
* @param string $input Part of a domain name |
204
|
|
|
* @return string Unicode domain part |
205
|
|
|
*/ |
206
|
|
|
protected function decodePart($input) |
207
|
|
|
{ |
208
|
|
|
$n = static::INITIAL_N; |
209
|
|
|
$i = 0; |
210
|
|
|
$bias = static::INITIAL_BIAS; |
211
|
|
|
$output = ''; |
212
|
|
|
|
213
|
|
|
$pos = strrpos($input, static::DELIMITER); |
214
|
|
|
if ($pos !== false) |
215
|
|
|
{ |
216
|
|
|
$output = substr($input, 0, $pos++); |
217
|
|
|
} |
218
|
|
|
else |
219
|
|
|
{ |
220
|
|
|
$pos = 0; |
221
|
|
|
} |
222
|
|
|
|
223
|
|
|
$outputLength = strlen($output); |
224
|
|
|
$inputLength = strlen($input); |
225
|
|
|
while ($pos < $inputLength) |
226
|
|
|
{ |
227
|
|
|
$oldi = $i; |
228
|
|
|
$w = 1; |
229
|
|
|
|
230
|
|
|
for ($k = static::BASE;; $k += static::BASE) |
231
|
|
|
{ |
232
|
|
|
$digit = static::$decodeTable[$input[$pos++]]; |
233
|
|
|
$i = $i + ($digit * $w); |
234
|
|
|
$t = $this->calculateThreshold($k, $bias); |
235
|
|
|
|
236
|
|
|
if ($digit < $t) |
237
|
|
|
{ |
238
|
|
|
break; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
$w = $w * (static::BASE - $t); |
242
|
|
|
} |
243
|
|
|
|
244
|
|
|
$bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0)); |
245
|
|
|
$n = $n + (int) ($i / $outputLength); |
246
|
|
|
$i = $i % ($outputLength); |
247
|
|
|
$output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding); |
248
|
|
|
|
249
|
|
|
$i++; |
250
|
|
|
} |
251
|
|
|
|
252
|
|
|
return $output; |
253
|
|
|
} |
254
|
|
|
|
255
|
|
|
/** |
256
|
|
|
* Calculate the bias threshold to fall between TMIN and TMAX |
257
|
|
|
* |
258
|
|
|
* @param integer $k |
259
|
|
|
* @param integer $bias |
260
|
|
|
* @return integer |
261
|
|
|
*/ |
262
|
|
|
protected function calculateThreshold($k, $bias) |
263
|
|
|
{ |
264
|
|
|
if ($k <= $bias + static::TMIN) |
265
|
|
|
{ |
266
|
|
|
return static::TMIN; |
267
|
|
|
} |
268
|
|
|
elseif ($k >= $bias + static::TMAX) |
269
|
|
|
{ |
270
|
|
|
return static::TMAX; |
271
|
|
|
} |
272
|
|
|
return $k - $bias; |
273
|
|
|
} |
274
|
|
|
|
275
|
|
|
/** |
276
|
|
|
* Bias adaptation |
277
|
|
|
* |
278
|
|
|
* @param integer $delta |
279
|
|
|
* @param integer $numPoints |
280
|
|
|
* @param boolean $firstTime |
281
|
|
|
* @return integer |
282
|
|
|
*/ |
283
|
|
|
protected function adapt($delta, $numPoints, $firstTime) |
284
|
|
|
{ |
285
|
|
|
$delta = (int) ( |
286
|
|
|
($firstTime) |
287
|
|
|
? $delta / static::DAMP |
288
|
|
|
: $delta / 2 |
289
|
|
|
); |
290
|
|
|
$delta += (int) ($delta / $numPoints); |
291
|
|
|
|
292
|
|
|
$k = 0; |
293
|
|
|
while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) |
294
|
|
|
{ |
295
|
|
|
$delta = (int) ($delta / (static::BASE - static::TMIN)); |
296
|
|
|
$k = $k + static::BASE; |
297
|
|
|
} |
298
|
|
|
$k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW)); |
299
|
|
|
|
300
|
|
|
return $k; |
301
|
|
|
} |
302
|
|
|
|
303
|
|
|
/** |
304
|
|
|
* List code points for a given input |
305
|
|
|
* |
306
|
|
|
* @param string $input |
307
|
|
|
* @return array Multi-dimension array with basic, non-basic and aggregated code points |
308
|
|
|
*/ |
309
|
|
|
protected function listCodePoints($input) |
310
|
|
|
{ |
311
|
|
|
$codePoints = array( |
312
|
|
|
'all' => array(), |
313
|
|
|
'basic' => array(), |
314
|
|
|
'nonBasic' => array(), |
315
|
|
|
); |
316
|
|
|
|
317
|
|
|
$length = mb_strlen($input, $this->encoding); |
318
|
|
|
for ($i = 0; $i < $length; $i++) |
319
|
|
|
{ |
320
|
|
|
$char = mb_substr($input, $i, 1, $this->encoding); |
321
|
|
|
$code = $this->charToCodePoint($char); |
322
|
|
|
if ($code < 128) |
323
|
|
|
{ |
324
|
|
|
$codePoints['all'][] = $codePoints['basic'][] = $code; |
325
|
|
|
} |
326
|
|
|
else |
327
|
|
|
{ |
328
|
|
|
$codePoints['all'][] = $codePoints['nonBasic'][] = $code; |
329
|
|
|
} |
330
|
|
|
} |
331
|
|
|
|
332
|
|
|
return $codePoints; |
333
|
|
|
} |
334
|
|
|
|
335
|
|
|
/** |
336
|
|
|
* Convert a single or multi-byte character to its code point |
337
|
|
|
* |
338
|
|
|
* @param string $char |
339
|
|
|
* @return integer |
340
|
|
|
*/ |
341
|
|
|
protected function charToCodePoint($char) |
342
|
|
|
{ |
343
|
|
|
$code = ord($char[0]); |
344
|
|
|
if ($code < 128) |
345
|
|
|
{ |
346
|
|
|
return $code; |
347
|
|
|
} |
348
|
|
|
elseif ($code < 224) |
349
|
|
|
{ |
350
|
|
|
return (($code - 192) * 64) + (ord($char[1]) - 128); |
351
|
|
|
} |
352
|
|
|
elseif ($code < 240) |
353
|
|
|
{ |
354
|
|
|
return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128); |
355
|
|
|
} |
356
|
|
|
else |
357
|
|
|
{ |
358
|
|
|
return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128); |
359
|
|
|
} |
360
|
|
|
} |
361
|
|
|
|
362
|
|
|
/** |
363
|
|
|
* Convert a code point to its single or multi-byte character |
364
|
|
|
* |
365
|
|
|
* @param integer $code |
366
|
|
|
* @return string |
367
|
|
|
*/ |
368
|
|
|
protected function codePointToChar($code) |
369
|
|
|
{ |
370
|
|
|
if ($code <= 0x7F) |
371
|
|
|
{ |
372
|
|
|
return chr($code); |
373
|
|
|
} |
374
|
|
|
elseif ($code <= 0x7FF) |
375
|
|
|
{ |
376
|
|
|
return chr(($code >> 6) + 192) . chr(($code & 63) + 128); |
377
|
|
|
} |
378
|
|
|
elseif ($code <= 0xFFFF) |
379
|
|
|
{ |
380
|
|
|
return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
381
|
|
|
} |
382
|
|
|
else |
383
|
|
|
{ |
384
|
|
|
return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); |
385
|
|
|
} |
386
|
|
|
} |
387
|
|
|
} |
388
|
|
|
|
389
|
|
|
?> |