Passed
Push — release-2.1 ( 0c2197...207d2d )
by Jeremy
05:47
created

Punycode::decode()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 9
c 0
b 0
f 0
nop 1
dl 0
loc 15
rs 9.9666
nc 3
1
<?php
2
/**
3
 * A class for encoding/decoding Punycode.
4
 *
5
 * Derived from this library: https://github.com/true/php-punycode
6
 *
7
 * @author TrueServer B.V. <[email protected]>
8
 * @package php-punycode
9
 * @license MIT
10
 *
11
 * Simple Machines Forum (SMF)
12
 *
13
 * @package SMF
14
 * @author Simple Machines http://www.simplemachines.org
15
 * @copyright 2018 Simple Machines and individual contributors
16
 * @license http://www.simplemachines.org/about/smf/license.php BSD
17
 *
18
 * @version 2.1 Beta 4
19
 */
20
21
if (!defined('SMF'))
22
    die('No direct access...');
23
24
/**
25
 * Punycode implementation as described in RFC 3492
26
 *
27
 * @link http://tools.ietf.org/html/rfc3492
28
 */
29
class Punycode
30
{
31
    /**
32
     * Bootstring parameter values
33
     *
34
     */
35
    const BASE         = 36;
36
    const TMIN         = 1;
37
    const TMAX         = 26;
38
    const SKEW         = 38;
39
    const DAMP         = 700;
40
    const INITIAL_BIAS = 72;
41
    const INITIAL_N    = 128;
42
    const PREFIX       = 'xn--';
43
    const DELIMITER    = '-';
44
45
    /**
46
     * Encode table
47
     *
48
     * @param array
49
     */
50
    protected static $encodeTable = array(
51
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
52
        'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
53
        'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
54
    );
55
56
    /**
57
     * Decode table
58
     *
59
     * @param array
60
     */
61
    protected static $decodeTable = array(
62
        'a' =>  0, 'b' =>  1, 'c' =>  2, 'd' =>  3, 'e' =>  4, 'f' =>  5,
63
        'g' =>  6, 'h' =>  7, 'i' =>  8, 'j' =>  9, 'k' => 10, 'l' => 11,
64
        'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17,
65
        's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23,
66
        'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29,
67
        '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35
68
    );
69
70
    /**
71
     * Character encoding
72
     *
73
     * @param string
74
     */
75
    protected $encoding;
76
77
    /**
78
     * Constructor
79
     *
80
     * @param string $encoding Character encoding
81
     */
82
    public function __construct($encoding = 'UTF-8')
83
    {
84
        $this->encoding = $encoding;
85
    }
86
87
    /**
88
     * Encode a domain to its Punycode version
89
     *
90
     * @param string $input Domain name in Unicode to be encoded
91
     * @return string Punycode representation in ASCII
92
     */
93
    public function encode($input)
94
    {
95
        $input = mb_strtolower($input, $this->encoding);
96
        $parts = explode('.', $input);
97
        foreach ($parts as &$part) {
98
            $part = $this->encodePart($part);
99
        }
100
        $output = implode('.', $parts);
101
        $length = strlen($output);
0 ignored issues
show
Unused Code introduced by
The assignment to $length is dead and can be removed.
Loading history...
102
103
        return $output;
104
    }
105
106
    /**
107
     * Encode a part of a domain name, such as tld, to its Punycode version
108
     *
109
     * @param string $input Part of a domain name
110
     * @return string Punycode representation of a domain part
111
     */
112
    protected function encodePart($input)
113
    {
114
        $codePoints = $this->listCodePoints($input);
115
116
        $n = static::INITIAL_N;
117
        $bias = static::INITIAL_BIAS;
118
        $delta = 0;
119
        $h = $b = count($codePoints['basic']);
120
121
        $output = '';
122
        foreach ($codePoints['basic'] as $code) {
123
            $output .= $this->codePointToChar($code);
124
        }
125
        if ($input === $output) {
126
            return $output;
127
        }
128
        if ($b > 0) {
129
            $output .= static::DELIMITER;
130
        }
131
132
        $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']);
133
        sort($codePoints['nonBasic']);
134
135
        $i = 0;
136
        $length = mb_strlen($input, $this->encoding);
137
        while ($h < $length) {
138
            $m = $codePoints['nonBasic'][$i++];
139
            $delta = $delta + ($m - $n) * ($h + 1);
140
            $n = $m;
141
142
            foreach ($codePoints['all'] as $c) {
143
                if ($c < $n || $c < static::INITIAL_N) {
144
                    $delta++;
145
                }
146
                if ($c === $n) {
147
                    $q = $delta;
148
                    for ($k = static::BASE;; $k += static::BASE) {
149
                        $t = $this->calculateThreshold($k, $bias);
150
                        if ($q < $t) {
151
                            break;
152
                        }
153
154
                        $code = $t + (($q - $t) % (static::BASE - $t));
155
                        $output .= static::$encodeTable[$code];
156
157
                        $q = ($q - $t) / (static::BASE - $t);
158
                    }
159
160
                    $output .= static::$encodeTable[$q];
161
                    $bias = $this->adapt($delta, $h + 1, ($h === $b));
162
                    $delta = 0;
163
                    $h++;
164
                }
165
            }
166
167
            $delta++;
168
            $n++;
169
        }
170
        $out = static::PREFIX . $output;
171
172
        return $out;
173
    }
174
175
    /**
176
     * Decode a Punycode domain name to its Unicode counterpart
177
     *
178
     * @param string $input Domain name in Punycode
179
     * @return string Unicode domain name
180
     */
181
    public function decode($input)
182
    {
183
        $input = strtolower($input);
184
        $parts = explode('.', $input);
185
        foreach ($parts as &$part) {
186
            if (strpos($part, static::PREFIX) !== 0) {
187
                continue;
188
            }
189
190
            $part = substr($part, strlen(static::PREFIX));
191
            $part = $this->decodePart($part);
192
        }
193
        $output = implode('.', $parts);
194
195
        return $output;
196
    }
197
198
    /**
199
     * Decode a part of domain name, such as tld
200
     *
201
     * @param string $input Part of a domain name
202
     * @return string Unicode domain part
203
     */
204
    protected function decodePart($input)
205
    {
206
        $n = static::INITIAL_N;
207
        $i = 0;
208
        $bias = static::INITIAL_BIAS;
209
        $output = '';
210
211
        $pos = strrpos($input, static::DELIMITER);
212
        if ($pos !== false) {
213
            $output = substr($input, 0, $pos++);
214
        } else {
215
            $pos = 0;
216
        }
217
218
        $outputLength = strlen($output);
219
        $inputLength = strlen($input);
220
        while ($pos < $inputLength) {
221
            $oldi = $i;
222
            $w = 1;
223
224
            for ($k = static::BASE;; $k += static::BASE) {
225
                $digit = static::$decodeTable[$input[$pos++]];
226
                $i = $i + ($digit * $w);
227
                $t = $this->calculateThreshold($k, $bias);
228
229
                if ($digit < $t) {
230
                    break;
231
                }
232
233
                $w = $w * (static::BASE - $t);
234
            }
235
236
            $bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0));
237
            $n = $n + (int) ($i / $outputLength);
238
            $i = $i % ($outputLength);
239
            $output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding);
240
241
            $i++;
242
        }
243
244
        return $output;
245
    }
246
247
    /**
248
     * Calculate the bias threshold to fall between TMIN and TMAX
249
     *
250
     * @param integer $k
251
     * @param integer $bias
252
     * @return integer
253
     */
254
    protected function calculateThreshold($k, $bias)
255
    {
256
        if ($k <= $bias + static::TMIN) {
257
            return static::TMIN;
258
        } elseif ($k >= $bias + static::TMAX) {
259
            return static::TMAX;
260
        }
261
        return $k - $bias;
262
    }
263
264
    /**
265
     * Bias adaptation
266
     *
267
     * @param integer $delta
268
     * @param integer $numPoints
269
     * @param boolean $firstTime
270
     * @return integer
271
     */
272
    protected function adapt($delta, $numPoints, $firstTime)
273
    {
274
        $delta = (int) (
275
            ($firstTime)
276
                ? $delta / static::DAMP
277
                : $delta / 2
278
            );
279
        $delta += (int) ($delta / $numPoints);
280
281
        $k = 0;
282
        while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) {
283
            $delta = (int) ($delta / (static::BASE - static::TMIN));
284
            $k = $k + static::BASE;
285
        }
286
        $k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW));
287
288
        return $k;
289
    }
290
291
    /**
292
     * List code points for a given input
293
     *
294
     * @param string $input
295
     * @return array Multi-dimension array with basic, non-basic and aggregated code points
296
     */
297
    protected function listCodePoints($input)
298
    {
299
        $codePoints = array(
300
            'all'      => array(),
301
            'basic'    => array(),
302
            'nonBasic' => array(),
303
        );
304
305
        $length = mb_strlen($input, $this->encoding);
306
        for ($i = 0; $i < $length; $i++) {
307
            $char = mb_substr($input, $i, 1, $this->encoding);
308
            $code = $this->charToCodePoint($char);
309
            if ($code < 128) {
310
                $codePoints['all'][] = $codePoints['basic'][] = $code;
311
            } else {
312
                $codePoints['all'][] = $codePoints['nonBasic'][] = $code;
313
            }
314
        }
315
316
        return $codePoints;
317
    }
318
319
    /**
320
     * Convert a single or multi-byte character to its code point
321
     *
322
     * @param string $char
323
     * @return integer
324
     */
325
    protected function charToCodePoint($char)
326
    {
327
        $code = ord($char[0]);
328
        if ($code < 128) {
329
            return $code;
330
        } elseif ($code < 224) {
331
            return (($code - 192) * 64) + (ord($char[1]) - 128);
332
        } elseif ($code < 240) {
333
            return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128);
334
        } else {
335
            return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128);
336
        }
337
    }
338
339
    /**
340
     * Convert a code point to its single or multi-byte character
341
     *
342
     * @param integer $code
343
     * @return string
344
     */
345
    protected function codePointToChar($code)
346
    {
347
        if ($code <= 0x7F) {
348
            return chr($code);
349
        } elseif ($code <= 0x7FF) {
350
            return chr(($code >> 6) + 192) . chr(($code & 63) + 128);
351
        } elseif ($code <= 0xFFFF) {
352
            return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
353
        } else {
354
            return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
355
        }
356
    }
357
}
358
359
?>