Punycode::codePointToChar()   A
last analyzed

Complexity

Conditions 4
Paths 4

Size

Total Lines 17
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 8
c 0
b 0
f 0
nop 1
dl 0
loc 17
rs 10
nc 4
1
<?php
2
/**
3
 * A class for encoding/decoding Punycode.
4
 *
5
 * Derived from this library: https://github.com/true/php-punycode
6
 *
7
 * @author TrueServer B.V. <[email protected]>
8
 * @package php-punycode
9
 * @license MIT
10
 *
11
 * Simple Machines Forum (SMF)
12
 *
13
 * @package SMF
14
 * @author Simple Machines https://www.simplemachines.org
15
 * @copyright 2022 Simple Machines and individual contributors
16
 * @license https://www.simplemachines.org/about/smf/license.php BSD
17
 *
18
 * @version 2.1.0
19
 */
20
21
if (!defined('SMF'))
22
	die('No direct access...');
23
24
/**
25
 * Punycode implementation as described in RFC 3492
26
 *
27
 * @link http://tools.ietf.org/html/rfc3492
28
 */
29
class Punycode
30
{
31
	/**
32
	 * Bootstring parameter values
33
	 *
34
	 */
35
	const BASE = 36;
36
	const TMIN = 1;
37
	const TMAX = 26;
38
	const SKEW = 38;
39
	const DAMP = 700;
40
	const INITIAL_BIAS = 72;
41
	const INITIAL_N = 128;
42
	const PREFIX = 'xn--';
43
	const DELIMITER = '-';
44
45
	/**
46
	 * Encode table
47
	 *
48
	 * @param array
49
	 */
50
	protected static $encodeTable = array(
51
		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
52
		'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
53
		'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
54
	);
55
56
	/**
57
	 * Decode table
58
	 *
59
	 * @param array
60
	 */
61
	protected static $decodeTable = array(
62
		'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5,
63
		'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11,
64
		'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17,
65
		's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23,
66
		'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29,
67
		'4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35
68
	);
69
70
	/**
71
	 * Character encoding
72
	 *
73
	 * @param string
74
	 */
75
	protected $encoding;
76
77
	/**
78
	 * Constructor
79
	 *
80
	 * @param string $encoding Character encoding
81
	 */
82
	public function __construct($encoding = 'UTF-8')
83
	{
84
		$this->encoding = $encoding;
85
	}
86
87
	/**
88
	 * Encode a domain to its Punycode version
89
	 *
90
	 * @param string $input Domain name in Unicode to be encoded
91
	 * @return string Punycode representation in ASCII
92
	 */
93
	public function encode($input)
94
	{
95
		$input = mb_strtolower($input, $this->encoding);
96
		$parts = explode('.', $input);
97
		foreach ($parts as &$part) {
98
			$part = $this->encodePart($part);
99
		}
100
		$output = implode('.', $parts);
101
		$length = strlen($output);
0 ignored issues
show
Unused Code introduced by
The assignment to $length is dead and can be removed.
Loading history...
102
103
		return $output;
104
	}
105
106
	/**
107
	 * Encode a part of a domain name, such as tld, to its Punycode version
108
	 *
109
	 * @param string $input Part of a domain name
110
	 * @return string Punycode representation of a domain part
111
	 */
112
	protected function encodePart($input)
113
	{
114
		$codePoints = $this->listCodePoints($input);
115
116
		$n = static::INITIAL_N;
117
		$bias = static::INITIAL_BIAS;
118
		$delta = 0;
119
		$h = $b = count($codePoints['basic']);
120
121
		$output = '';
122
		foreach ($codePoints['basic'] as $code) {
123
			$output .= $this->codePointToChar($code);
124
		}
125
		if ($input === $output) {
126
			return $output;
127
		}
128
		if ($b > 0) {
129
			$output .= static::DELIMITER;
130
		}
131
132
		$codePoints['nonBasic'] = array_unique($codePoints['nonBasic']);
133
		sort($codePoints['nonBasic']);
134
135
		$i = 0;
136
		$length = mb_strlen($input, $this->encoding);
137
		while ($h < $length) {
138
			$m = $codePoints['nonBasic'][$i++];
139
			$delta = $delta + ($m - $n) * ($h + 1);
140
			$n = $m;
141
142
			foreach ($codePoints['all'] as $c) {
143
				if ($c < $n || $c < static::INITIAL_N) {
144
					$delta++;
145
				}
146
				if ($c === $n) {
147
					$q = $delta;
148
					for ($k = static::BASE;; $k += static::BASE) {
149
						$t = $this->calculateThreshold($k, $bias);
150
						if ($q < $t) {
151
							break;
152
						}
153
154
						$code = $t + (((int) $q - $t) % (static::BASE - $t));
155
						$output .= static::$encodeTable[$code];
156
157
						$q = ($q - $t) / (static::BASE - $t);
158
					}
159
160
					$output .= static::$encodeTable[(int) $q];
161
					$bias = $this->adapt($delta, $h + 1, ($h === $b));
162
					$delta = 0;
163
					$h++;
164
				}
165
			}
166
167
			$delta++;
168
			$n++;
169
		}
170
		$out = static::PREFIX . $output;
171
172
		return $out;
173
	}
174
175
	/**
176
	 * Decode a Punycode domain name to its Unicode counterpart
177
	 *
178
	 * @param string $input Domain name in Punycode
179
	 * @return string Unicode domain name
180
	 */
181
	public function decode($input)
182
	{
183
		$input = strtolower($input);
184
		$parts = explode('.', $input);
185
		foreach ($parts as &$part)
186
		{
187
			if (strpos($part, static::PREFIX) !== 0)
188
			{
189
				continue;
190
			}
191
192
			$part = substr($part, strlen(static::PREFIX));
193
			$part = $this->decodePart($part);
194
		}
195
		$output = implode('.', $parts);
196
197
		return $output;
198
	}
199
200
	/**
201
	 * Decode a part of domain name, such as tld
202
	 *
203
	 * @param string $input Part of a domain name
204
	 * @return string Unicode domain part
205
	 */
206
	protected function decodePart($input)
207
	{
208
		$n = static::INITIAL_N;
209
		$i = 0;
210
		$bias = static::INITIAL_BIAS;
211
		$output = '';
212
213
		$pos = strrpos($input, static::DELIMITER);
214
		if ($pos !== false)
215
		{
216
			$output = substr($input, 0, $pos++);
217
		}
218
		else
219
		{
220
			$pos = 0;
221
		}
222
223
		$outputLength = strlen($output);
224
		$inputLength = strlen($input);
225
		while ($pos < $inputLength)
226
		{
227
			$oldi = $i;
228
			$w = 1;
229
230
			for ($k = static::BASE;; $k += static::BASE)
231
			{
232
				$digit = static::$decodeTable[$input[$pos++]];
233
				$i = $i + ($digit * $w);
234
				$t = $this->calculateThreshold($k, $bias);
235
236
				if ($digit < $t)
237
				{
238
					break;
239
				}
240
241
				$w = $w * (static::BASE - $t);
242
			}
243
244
			$bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0));
245
			$n = $n + (int) ($i / $outputLength);
246
			$i = $i % ($outputLength);
247
			$output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding);
248
249
			$i++;
250
		}
251
252
		return $output;
253
	}
254
255
	/**
256
	 * Calculate the bias threshold to fall between TMIN and TMAX
257
	 *
258
	 * @param integer $k
259
	 * @param integer $bias
260
	 * @return integer
261
	 */
262
	protected function calculateThreshold($k, $bias)
263
	{
264
		if ($k <= $bias + static::TMIN)
265
		{
266
			return static::TMIN;
267
		}
268
		elseif ($k >= $bias + static::TMAX)
269
		{
270
			return static::TMAX;
271
		}
272
		return $k - $bias;
273
	}
274
275
	/**
276
	 * Bias adaptation
277
	 *
278
	 * @param integer $delta
279
	 * @param integer $numPoints
280
	 * @param boolean $firstTime
281
	 * @return integer
282
	 */
283
	protected function adapt($delta, $numPoints, $firstTime)
284
	{
285
		$delta = (int) (
286
			($firstTime)
287
			? $delta / static::DAMP
288
			: $delta / 2
289
		);
290
		$delta += (int) ($delta / $numPoints);
291
292
		$k = 0;
293
		while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2)
294
		{
295
			$delta = (int) ($delta / (static::BASE - static::TMIN));
296
			$k = $k + static::BASE;
297
		}
298
		$k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW));
299
300
		return $k;
301
	}
302
303
	/**
304
	 * List code points for a given input
305
	 *
306
	 * @param string $input
307
	 * @return array Multi-dimension array with basic, non-basic and aggregated code points
308
	 */
309
	protected function listCodePoints($input)
310
	{
311
		$codePoints = array(
312
			'all' => array(),
313
			'basic' => array(),
314
			'nonBasic' => array(),
315
		);
316
317
		$length = mb_strlen($input, $this->encoding);
318
		for ($i = 0; $i < $length; $i++)
319
		{
320
			$char = mb_substr($input, $i, 1, $this->encoding);
321
			$code = $this->charToCodePoint($char);
322
			if ($code < 128)
323
			{
324
				$codePoints['all'][] = $codePoints['basic'][] = $code;
325
			}
326
			else
327
			{
328
				$codePoints['all'][] = $codePoints['nonBasic'][] = $code;
329
			}
330
		}
331
332
		return $codePoints;
333
	}
334
335
	/**
336
	 * Convert a single or multi-byte character to its code point
337
	 *
338
	 * @param string $char
339
	 * @return integer
340
	 */
341
	protected function charToCodePoint($char)
342
	{
343
		$code = ord($char[0]);
344
		if ($code < 128)
345
		{
346
			return $code;
347
		}
348
		elseif ($code < 224)
349
		{
350
			return (($code - 192) * 64) + (ord($char[1]) - 128);
351
		}
352
		elseif ($code < 240)
353
		{
354
			return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128);
355
		}
356
		else
357
		{
358
			return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128);
359
		}
360
	}
361
362
	/**
363
	 * Convert a code point to its single or multi-byte character
364
	 *
365
	 * @param integer $code
366
	 * @return string
367
	 */
368
	protected function codePointToChar($code)
369
	{
370
		if ($code <= 0x7F)
371
		{
372
			return chr($code);
373
		}
374
		elseif ($code <= 0x7FF)
375
		{
376
			return chr(($code >> 6) + 192) . chr(($code & 63) + 128);
377
		}
378
		elseif ($code <= 0xFFFF)
379
		{
380
			return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
381
		}
382
		else
383
		{
384
			return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
385
		}
386
	}
387
}
388
389
?>