Issues (1065)

Sources/Class-Punycode.php (2 issues)

1
<?php
2
/**
3
 * A class for encoding/decoding Punycode.
4
 *
5
 * Derived from this library: https://github.com/true/php-punycode
6
 *
7
 * @author TrueServer B.V. <[email protected]>
8
 * @package php-punycode
9
 * @license MIT
10
 *
11
 * Simple Machines Forum (SMF)
12
 *
13
 * @package SMF
14
 * @author Simple Machines https://www.simplemachines.org
15
 * @copyright 2022 Simple Machines and individual contributors
16
 * @license https://www.simplemachines.org/about/smf/license.php BSD
17
 *
18
 * @version 2.1.3
19
 */
20
21
if (!defined('SMF'))
22
	die('No direct access...');
23
24
/**
25
 * Punycode implementation as described in RFC 3492
26
 *
27
 * @link http://tools.ietf.org/html/rfc3492
28
 */
29
class Punycode
30
{
31
	/**
32
	 * Bootstring parameter values
33
	 *
34
	 */
35
	const BASE = 36;
36
	const TMIN = 1;
37
	const TMAX = 26;
38
	const SKEW = 38;
39
	const DAMP = 700;
40
	const INITIAL_BIAS = 72;
41
	const INITIAL_N = 128;
42
	const PREFIX = 'xn--';
43
	const DELIMITER = '-';
44
45
	/**
46
	 * IDNA Error constants
47
	 */
48
	const IDNA_ERROR_EMPTY_LABEL = 1;
49
	const IDNA_ERROR_LABEL_TOO_LONG = 2;
50
	const IDNA_ERROR_DOMAIN_NAME_TOO_LONG = 4;
51
	const IDNA_ERROR_LEADING_HYPHEN = 8;
52
	const IDNA_ERROR_TRAILING_HYPHEN = 16;
53
	const IDNA_ERROR_HYPHEN_3_4 = 32;
54
	const IDNA_ERROR_LEADING_COMBINING_MARK = 64;
55
	const IDNA_ERROR_DISALLOWED = 128;
56
	const IDNA_ERROR_PUNYCODE = 256;
57
	const IDNA_ERROR_LABEL_HAS_DOT = 512;
58
	const IDNA_ERROR_INVALID_ACE_LABEL = 1024;
59
	const IDNA_ERROR_BIDI = 2048;
60
	const IDNA_ERROR_CONTEXTJ = 4096;
61
62
	/**
63
	 * Encode table
64
	 *
65
	 * @param array
66
	 */
67
	protected static $encodeTable = array(
68
		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
69
		'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
70
		'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
71
	);
72
73
	/**
74
	 * Decode table
75
	 *
76
	 * @param array
77
	 */
78
	protected static $decodeTable = array(
79
		'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5,
80
		'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11,
81
		'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17,
82
		's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23,
83
		'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29,
84
		'4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35
85
	);
86
87
	/**
88
	 * Character encoding
89
	 *
90
	 * @param string
91
	 */
92
	protected $encoding;
93
94
	/**
95
	 * Whether to use Non-Transitional Processing.
96
	 * Setting this to true breaks backward compatibility with IDNA2003.
97
	 *
98
	 * @param bool
99
	 */
100
	protected $nonTransitional = false;
101
102
	/**
103
	 * Whether to use STD3 ASCII rules.
104
	 *
105
	 * @param bool
106
	 */
107
	protected $std3 = false;
108
109
	/**
110
	 * Constructor
111
	 *
112
	 * @param string $encoding Character encoding
113
	 */
114
	public function __construct($encoding = 'UTF-8')
115
	{
116
		$this->encoding = $encoding;
117
	}
118
119
	/**
120
	 * Enable/disable Non-Transitional Processing
121
	 *
122
	 * @param bool $nonTransitional Whether to use Non-Transitional Processing
123
	 */
124
	public function useNonTransitional(bool $nonTransitional)
125
	{
126
		$this->nonTransitional = $nonTransitional;
127
	}
128
129
	/**
130
	 * Enable/disable STD3 ASCII rules
131
	 *
132
	 * @param bool $std3 Whether to use STD3 ASCII rules
133
	 */
134
	public function useStd3(bool $std3)
135
	{
136
		$this->std3 = $std3;
137
	}
138
139
	/**
140
	 * Encode a domain to its Punycode version
141
	 *
142
	 * @param string $input Domain name in Unicode to be encoded
143
	 * @return string Punycode representation in ASCII
144
	 */
145
	public function encode($input)
146
	{
147
		// For compatibility with idn_to_* functions
148
		if ($this->decode($input) === false)
0 ignored issues
show
The condition $this->decode($input) === false is always false.
Loading history...
149
			return false;
150
151
		$errors = array();
152
		$preprocessed = $this->preprocess($input, $errors);
153
154
		if (!empty($errors))
155
		{
156
			return false;
157
		}
158
159
		$parts = explode('.', $preprocessed);
160
		foreach ($parts as $p => &$part) {
161
			$part = $this->encodePart($part);
162
163
			$validation_status = $this->validateLabel($part, true);
164
165
			switch ($validation_status) {
166
				case self::IDNA_ERROR_LABEL_TOO_LONG:
167
				case self::IDNA_ERROR_LEADING_HYPHEN:
168
				case self::IDNA_ERROR_TRAILING_HYPHEN:
169
				case self::IDNA_ERROR_LEADING_COMBINING_MARK:
170
				case self::IDNA_ERROR_DISALLOWED:
171
				case self::IDNA_ERROR_PUNYCODE:
172
				case self::IDNA_ERROR_LABEL_HAS_DOT:
173
				case self::IDNA_ERROR_INVALID_ACE_LABEL:
174
				case self::IDNA_ERROR_BIDI:
175
				case self::IDNA_ERROR_CONTEXTJ:
176
					return false;
177
					break;
0 ignored issues
show
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
178
179
				case self::IDNA_ERROR_HYPHEN_3_4:
180
					$part = $parts[$p];
181
					break;
182
183
				case self::IDNA_ERROR_EMPTY_LABEL:
184
					$parts_count = count($parts);
185
					if ($parts_count === 1 || $p !== $parts_count - 1)
186
						return false;
187
					break;
188
189
				default:
190
					break;
191
			}
192
		}
193
		$output = implode('.', $parts);
194
195
		// IDNA_ERROR_DOMAIN_NAME_TOO_LONG
196
		if (strlen(rtrim($output, '.')) > 253)
197
			return false;
198
199
		return $output;
200
	}
201
202
	/**
203
	 * Encode a part of a domain name, such as tld, to its Punycode version
204
	 *
205
	 * @param string $input Part of a domain name
206
	 * @return string Punycode representation of a domain part
207
	 */
208
	protected function encodePart($input)
209
	{
210
		$codePoints = $this->listCodePoints($input);
211
212
		$n = static::INITIAL_N;
213
		$bias = static::INITIAL_BIAS;
214
		$delta = 0;
215
		$h = $b = count($codePoints['basic']);
216
217
		$output = '';
218
		foreach ($codePoints['basic'] as $code) {
219
			$output .= $this->codePointToChar($code);
220
		}
221
		if ($input === $output) {
222
			return $output;
223
		}
224
		if ($b > 0) {
225
			$output .= static::DELIMITER;
226
		}
227
228
		$codePoints['nonBasic'] = array_unique($codePoints['nonBasic']);
229
		sort($codePoints['nonBasic']);
230
231
		$i = 0;
232
		$length = mb_strlen($input, $this->encoding);
233
		while ($h < $length) {
234
			$m = $codePoints['nonBasic'][$i++];
235
			$delta = $delta + ($m - $n) * ($h + 1);
236
			$n = $m;
237
238
			foreach ($codePoints['all'] as $c) {
239
				if ($c < $n || $c < static::INITIAL_N) {
240
					$delta++;
241
				}
242
				if ($c === $n) {
243
					$q = $delta;
244
					for ($k = static::BASE;; $k += static::BASE) {
245
						$t = $this->calculateThreshold($k, $bias);
246
						if ($q < $t) {
247
							break;
248
						}
249
250
						$code = $t + (((int) $q - $t) % (static::BASE - $t));
251
						$output .= static::$encodeTable[$code];
252
253
						$q = ($q - $t) / (static::BASE - $t);
254
					}
255
256
					$output .= static::$encodeTable[(int) $q];
257
					$bias = $this->adapt($delta, $h + 1, ($h === $b));
258
					$delta = 0;
259
					$h++;
260
				}
261
			}
262
263
			$delta++;
264
			$n++;
265
		}
266
		$out = static::PREFIX . $output;
267
268
		return $out;
269
	}
270
271
	/**
272
	 * Decode a Punycode domain name to its Unicode counterpart
273
	 *
274
	 * @param string $input Domain name in Punycode
275
	 * @return string Unicode domain name
276
	 */
277
	public function decode($input)
278
	{
279
		$errors = array();
280
		$preprocessed = $this->preprocess($input, $errors);
281
282
		if (!empty($errors))
283
		{
284
			return false;
285
		}
286
287
		$parts = explode('.', $preprocessed);
288
		foreach ($parts as $p => &$part)
289
		{
290
			if (strpos($part, static::PREFIX) === 0)
291
			{
292
				$part = substr($part, strlen(static::PREFIX));
293
				$part = $this->decodePart($part);
294
295
				if ($part === false)
296
					return false;
297
			}
298
299
			if ($this->validateLabel($part, false) !== 0)
300
			{
301
				if ($part === '')
302
				{
303
					$parts_count = count($parts);
304
305
					if ($parts_count === 1 || $p !== $parts_count - 1)
306
						return false;
307
				}
308
				else
309
					return false;
310
			}
311
		}
312
		$output = implode('.', $parts);
313
314
		return $output;
315
	}
316
317
	/**
318
	 * Decode a part of domain name, such as tld
319
	 *
320
	 * @param string $input Part of a domain name
321
	 * @return string Unicode domain part
322
	 */
323
	protected function decodePart($input)
324
	{
325
		$n = static::INITIAL_N;
326
		$i = 0;
327
		$bias = static::INITIAL_BIAS;
328
		$output = '';
329
330
		$pos = strrpos($input, static::DELIMITER);
331
		if ($pos !== false)
332
		{
333
			$output = substr($input, 0, $pos++);
334
		}
335
		else
336
		{
337
			$pos = 0;
338
		}
339
340
		$outputLength = strlen($output);
341
		$inputLength = strlen($input);
342
		while ($pos < $inputLength)
343
		{
344
			$oldi = $i;
345
			$w = 1;
346
347
			for ($k = static::BASE;; $k += static::BASE)
348
			{
349
				if (!isset($input[$pos]) || !isset(static::$decodeTable[$input[$pos]]))
350
					return false;
351
352
				$digit = static::$decodeTable[$input[$pos++]];
353
				$i = $i + ($digit * $w);
354
				$t = $this->calculateThreshold($k, $bias);
355
356
				if ($digit < $t)
357
				{
358
					break;
359
				}
360
361
				$w = $w * (static::BASE - $t);
362
			}
363
364
			$bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0));
365
			$n = $n + (int) ($i / $outputLength);
366
			$i = $i % ($outputLength);
367
			$output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding);
368
369
			$i++;
370
		}
371
372
		return $output;
373
	}
374
375
	/**
376
	 * Calculate the bias threshold to fall between TMIN and TMAX
377
	 *
378
	 * @param integer $k
379
	 * @param integer $bias
380
	 * @return integer
381
	 */
382
	protected function calculateThreshold($k, $bias)
383
	{
384
		if ($k <= $bias + static::TMIN)
385
		{
386
			return static::TMIN;
387
		}
388
		elseif ($k >= $bias + static::TMAX)
389
		{
390
			return static::TMAX;
391
		}
392
		return $k - $bias;
393
	}
394
395
	/**
396
	 * Bias adaptation
397
	 *
398
	 * @param integer $delta
399
	 * @param integer $numPoints
400
	 * @param boolean $firstTime
401
	 * @return integer
402
	 */
403
	protected function adapt($delta, $numPoints, $firstTime)
404
	{
405
		$delta = (int) (
406
			($firstTime)
407
			? $delta / static::DAMP
408
			: $delta / 2
409
		);
410
		$delta += (int) ($delta / $numPoints);
411
412
		$k = 0;
413
		while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2)
414
		{
415
			$delta = (int) ($delta / (static::BASE - static::TMIN));
416
			$k = $k + static::BASE;
417
		}
418
		$k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW));
419
420
		return $k;
421
	}
422
423
	/**
424
	 * List code points for a given input
425
	 *
426
	 * @param string $input
427
	 * @return array Multi-dimension array with basic, non-basic and aggregated code points
428
	 */
429
	protected function listCodePoints($input)
430
	{
431
		$codePoints = array(
432
			'all' => array(),
433
			'basic' => array(),
434
			'nonBasic' => array(),
435
		);
436
437
		$length = mb_strlen($input, $this->encoding);
438
		for ($i = 0; $i < $length; $i++)
439
		{
440
			$char = mb_substr($input, $i, 1, $this->encoding);
441
			$code = $this->charToCodePoint($char);
442
			if ($code < 128)
443
			{
444
				$codePoints['all'][] = $codePoints['basic'][] = $code;
445
			}
446
			else
447
			{
448
				$codePoints['all'][] = $codePoints['nonBasic'][] = $code;
449
			}
450
		}
451
452
		return $codePoints;
453
	}
454
455
	/**
456
	 * Convert a single or multi-byte character to its code point
457
	 *
458
	 * @param string $char
459
	 * @return integer
460
	 */
461
	protected function charToCodePoint($char)
462
	{
463
		$code = ord($char[0]);
464
		if ($code < 128)
465
		{
466
			return $code;
467
		}
468
		elseif ($code < 224)
469
		{
470
			return (($code - 192) * 64) + (ord($char[1]) - 128);
471
		}
472
		elseif ($code < 240)
473
		{
474
			return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128);
475
		}
476
		else
477
		{
478
			return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128);
479
		}
480
	}
481
482
	/**
483
	 * Convert a code point to its single or multi-byte character
484
	 *
485
	 * @param integer $code
486
	 * @return string
487
	 */
488
	protected function codePointToChar($code)
489
	{
490
		if ($code <= 0x7F)
491
		{
492
			return chr($code);
493
		}
494
		elseif ($code <= 0x7FF)
495
		{
496
			return chr(($code >> 6) + 192) . chr(($code & 63) + 128);
497
		}
498
		elseif ($code <= 0xFFFF)
499
		{
500
			return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
501
		}
502
		else
503
		{
504
			return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
505
		}
506
	}
507
508
	/**
509
	 * Prepare domain name string for Punycode processing.
510
	 * See https://www.unicode.org/reports/tr46/#Processing
511
	 *
512
	 * @param string $domain A domain name
513
	 * @param array $errors Will record any errors encountered during preprocessing
514
	 */
515
	protected function preprocess(string $domain, array &$errors = array())
516
	{
517
		global $sourcedir;
518
519
		require_once($sourcedir . '/Unicode/Idna.php');
520
		require_once($sourcedir . '/Subs-Charset.php');
521
522
		$regexes = idna_regex();
523
		$maps = idna_maps();
524
525
		if (!$this->nonTransitional && function_exists('idna_maps_deviation'))
526
			$maps = array_merge($maps, idna_maps_deviation());
527
528
		if (!$this->std3 && function_exists('idna_maps_not_std3'))
529
			$maps = array_merge($maps, idna_maps_not_std3());
530
531
		$labels = explode('.', $domain);
532
533
		foreach ($labels as $l => $label) {
534
			$label = preg_replace('/[' . $regexes['ignored'] . ']/u', '', $label);
535
536
			$label = utf8_normalize_c(strtr($label, $maps));
537
538
			if ($this->std3)
539
				$label = strtolower($label);
540
541
			if (preg_match('/[' . $regexes['disallowed'] . ($this->std3 ? $regexes['disallowed_std3'] ?? '\x{0}-\x{2C}\x{2E}-\x{2F}\x{3A}-\x{60}\x{7B}-\x{7F}' : '') . ']/u', $label))
542
				$errors[] = 'disallowed';
543
544
			$labels[$l] = $label;
545
		}
546
547
		$errors = array_unique($errors);
548
549
		return implode('.', $labels);
550
	}
551
552
	/**
553
	 * Validates an individual part of a domain name.
554
	 *
555
	 * @param string $label Individual part of a domain name.
556
	 * @param bool $toPunycode True for encoding to Punycode, false for decoding.
557
	 */
558
	protected function validateLabel(string $label, bool $toPunycode = true)
559
	{
560
		global $sourcedir;
561
562
		$length = strlen($label);
563
564
		if ($length === 0)
565
		{
566
			return self::IDNA_ERROR_EMPTY_LABEL;
567
		}
568
569
		if ($toPunycode)
570
		{
571
			if ($length > 63)
572
			{
573
				return self::IDNA_ERROR_LABEL_TOO_LONG;
574
			}
575
576
			if ($this->std3 && $length !== strspn($label, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-'))
577
			{
578
				return self::IDNA_ERROR_PUNYCODE;
579
			}
580
		}
581
582
		if (strpos($label, '-') === 0)
583
		{
584
			return self::IDNA_ERROR_LEADING_HYPHEN;
585
		}
586
587
		if (strrpos($label, '-') === $length - 1)
588
		{
589
			return self::IDNA_ERROR_TRAILING_HYPHEN;
590
		}
591
592
		if (substr($label, 2, 2) === '--')
593
		{
594
			return self::IDNA_ERROR_HYPHEN_3_4;
595
		}
596
597
		if (preg_match('/^\p{M}/u', $label))
598
		{
599
			return self::IDNA_ERROR_LEADING_COMBINING_MARK;
600
		}
601
602
		require_once($sourcedir . '/Unicode/Idna.php');
603
		require_once($sourcedir . '/Subs-Charset.php');
604
605
		$regexes = idna_regex();
606
607
		if (preg_match('/[' . $regexes['disallowed'] . ($this->std3 ? $regexes['disallowed_std3'] ?? '\x{0}-\x{2C}\x{2E}-\x{2F}\x{3A}-\x{60}\x{7B}-\x{7F}' : '') . ']/u', $label))
608
		{
609
			return self::IDNA_ERROR_INVALID_ACE_LABEL;
610
		}
611
612
		if (!$toPunycode && $label !== utf8_normalize_kc($label))
613
		{
614
			return self::IDNA_ERROR_INVALID_ACE_LABEL;
615
		}
616
617
		return 0;
618
	}
619
}
620
621
?>