Completed
Push — release-2.1 ( 594510...67de6a )
by Colin
07:41
created

Punycode::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
namespace TrueBV;
3
4
use TrueBV\Exception\DomainOutOfBoundsException;
5
use TrueBV\Exception\LabelOutOfBoundsException;
6
7
/**
8
 * Punycode implementation as described in RFC 3492
9
 *
10
 * @link http://tools.ietf.org/html/rfc3492
11
 */
12
class Punycode
13
{
14
15
    /**
16
     * Bootstring parameter values
17
     *
18
     */
19
    const BASE         = 36;
20
    const TMIN         = 1;
21
    const TMAX         = 26;
22
    const SKEW         = 38;
23
    const DAMP         = 700;
24
    const INITIAL_BIAS = 72;
25
    const INITIAL_N    = 128;
26
    const PREFIX       = 'xn--';
27
    const DELIMITER    = '-';
28
29
    /**
30
     * Encode table
31
     *
32
     * @param array
33
     */
34
    protected static $encodeTable = array(
35
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
36
        'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
37
        'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
38
    );
39
40
    /**
41
     * Decode table
42
     *
43
     * @param array
44
     */
45
    protected static $decodeTable = array(
46
        'a' =>  0, 'b' =>  1, 'c' =>  2, 'd' =>  3, 'e' =>  4, 'f' =>  5,
47
        'g' =>  6, 'h' =>  7, 'i' =>  8, 'j' =>  9, 'k' => 10, 'l' => 11,
48
        'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17,
49
        's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23,
50
        'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29,
51
        '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35
52
    );
53
54
    /**
55
     * Character encoding
56
     *
57
     * @param string
58
     */
59
    protected $encoding;
60
61
    /**
62
     * Constructor
63
     *
64
     * @param string $encoding Character encoding
65
     */
66
    public function __construct($encoding = 'UTF-8')
67
    {
68
        $this->encoding = $encoding;
69
    }
70
71
    /**
72
     * Encode a domain to its Punycode version
73
     *
74
     * @param string $input Domain name in Unicode to be encoded
75
     * @return string Punycode representation in ASCII
76
     */
77
    public function encode($input)
78
    {
79
        $input = mb_strtolower($input, $this->encoding);
80
        $parts = explode('.', $input);
81
        foreach ($parts as &$part) {
82
            $length = strlen($part);
83
            if ($length < 1) {
84
                throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length));
85
            }
86
            $part = $this->encodePart($part);
87
        }
88
        $output = implode('.', $parts);
89
        $length = strlen($output);
90
        if ($length > 255) {
91
            throw new DomainOutOfBoundsException(sprintf('A full domain name is limited to 255 octets (including the separators), %s given.', $length));
92
        }
93
94
        return $output;
95
    }
96
97
    /**
98
     * Encode a part of a domain name, such as tld, to its Punycode version
99
     *
100
     * @param string $input Part of a domain name
101
     * @return string Punycode representation of a domain part
102
     */
103
    protected function encodePart($input)
104
    {
105
        $codePoints = $this->listCodePoints($input);
106
107
        $n = static::INITIAL_N;
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $n. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
108
        $bias = static::INITIAL_BIAS;
109
        $delta = 0;
110
        $h = $b = count($codePoints['basic']);
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $h. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
Comprehensibility introduced by
Avoid variables with short names like $b. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
111
112
        $output = '';
113
        foreach ($codePoints['basic'] as $code) {
114
            $output .= $this->codePointToChar($code);
115
        }
116
        if ($input === $output) {
117
            return $output;
118
        }
119
        if ($b > 0) {
120
            $output .= static::DELIMITER;
121
        }
122
123
        $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']);
124
        sort($codePoints['nonBasic']);
125
126
        $i = 0;
127
        $length = mb_strlen($input, $this->encoding);
128
        while ($h < $length) {
129
            $m = $codePoints['nonBasic'][$i++];
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $m. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
130
            $delta = $delta + ($m - $n) * ($h + 1);
131
            $n = $m;
132
133
            foreach ($codePoints['all'] as $c) {
134
                if ($c < $n || $c < static::INITIAL_N) {
135
                    $delta++;
136
                }
137
                if ($c === $n) {
138
                    $q = $delta;
139
                    for ($k = static::BASE;; $k += static::BASE) {
140
                        $t = $this->calculateThreshold($k, $bias);
141
                        if ($q < $t) {
142
                            break;
143
                        }
144
145
                        $code = $t + (($q - $t) % (static::BASE - $t));
146
                        $output .= static::$encodeTable[$code];
147
148
                        $q = ($q - $t) / (static::BASE - $t);
149
                    }
150
151
                    $output .= static::$encodeTable[$q];
152
                    $bias = $this->adapt($delta, $h + 1, ($h === $b));
153
                    $delta = 0;
154
                    $h++;
155
                }
156
            }
157
158
            $delta++;
159
            $n++;
160
        }
161
        $out = static::PREFIX . $output;
162
        $length = strlen($out);
163
        if ($length > 63 || $length < 1) {
164
            throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length));
165
        }
166
167
        return $out;
168
    }
169
170
    /**
171
     * Decode a Punycode domain name to its Unicode counterpart
172
     *
173
     * @param string $input Domain name in Punycode
174
     * @return string Unicode domain name
175
     */
176
    public function decode($input)
177
    {
178
        $input = strtolower($input);
179
        $parts = explode('.', $input);
180
        foreach ($parts as &$part) {
181
            $length = strlen($part);
182
            if ($length > 63 || $length < 1) {
183
                throw new LabelOutOfBoundsException(sprintf('The length of any one label is limited to between 1 and 63 octets, but %s given.', $length));
184
            }
185
            if (strpos($part, static::PREFIX) !== 0) {
186
                continue;
187
            }
188
189
            $part = substr($part, strlen(static::PREFIX));
190
            $part = $this->decodePart($part);
191
        }
192
        $output = implode('.', $parts);
193
        $length = strlen($output);
194
        if ($length > 255) {
195
            throw new DomainOutOfBoundsException(sprintf('A full domain name is limited to 255 octets (including the separators), %s given.', $length));
196
        }
197
198
        return $output;
199
    }
200
201
    /**
202
     * Decode a part of domain name, such as tld
203
     *
204
     * @param string $input Part of a domain name
205
     * @return string Unicode domain part
206
     */
207
    protected function decodePart($input)
208
    {
209
        $n = static::INITIAL_N;
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $n. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
210
        $i = 0;
211
        $bias = static::INITIAL_BIAS;
212
        $output = '';
213
214
        $pos = strrpos($input, static::DELIMITER);
215
        if ($pos !== false) {
216
            $output = substr($input, 0, $pos++);
217
        } else {
218
            $pos = 0;
219
        }
220
221
        $outputLength = strlen($output);
222
        $inputLength = strlen($input);
223
        while ($pos < $inputLength) {
224
            $oldi = $i;
225
            $w = 1;
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $w. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
226
227
            for ($k = static::BASE;; $k += static::BASE) {
228
                $digit = static::$decodeTable[$input[$pos++]];
229
                $i = $i + ($digit * $w);
230
                $t = $this->calculateThreshold($k, $bias);
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $t. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
231
232
                if ($digit < $t) {
233
                    break;
234
                }
235
236
                $w = $w * (static::BASE - $t);
237
            }
238
239
            $bias = $this->adapt($i - $oldi, ++$outputLength, ($oldi === 0));
240
            $n = $n + (int) ($i / $outputLength);
241
            $i = $i % ($outputLength);
242
            $output = mb_substr($output, 0, $i, $this->encoding) . $this->codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, $this->encoding);
243
244
            $i++;
245
        }
246
247
        return $output;
248
    }
249
250
    /**
251
     * Calculate the bias threshold to fall between TMIN and TMAX
252
     *
253
     * @param integer $k
254
     * @param integer $bias
255
     * @return integer
256
     */
257
    protected function calculateThreshold($k, $bias)
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $k. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
258
    {
259
        if ($k <= $bias + static::TMIN) {
260
            return static::TMIN;
261
        } elseif ($k >= $bias + static::TMAX) {
262
            return static::TMAX;
263
        }
264
        return $k - $bias;
265
    }
266
267
    /**
268
     * Bias adaptation
269
     *
270
     * @param integer $delta
271
     * @param integer $numPoints
272
     * @param boolean $firstTime
273
     * @return integer
274
     */
275
    protected function adapt($delta, $numPoints, $firstTime)
276
    {
277
        $delta = (int) (
278
            ($firstTime)
279
                ? $delta / static::DAMP
280
                : $delta / 2
281
            );
282
        $delta += (int) ($delta / $numPoints);
283
284
        $k = 0;
0 ignored issues
show
Comprehensibility introduced by
Avoid variables with short names like $k. Configured minimum length is 3.

Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum.

Loading history...
285
        while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) {
286
            $delta = (int) ($delta / (static::BASE - static::TMIN));
287
            $k = $k + static::BASE;
288
        }
289
        $k = $k + (int) (((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW));
290
291
        return $k;
292
    }
293
294
    /**
295
     * List code points for a given input
296
     *
297
     * @param string $input
298
     * @return array Multi-dimension array with basic, non-basic and aggregated code points
0 ignored issues
show
Documentation introduced by
Consider making the return type a bit more specific; maybe use array<string,array>.

This check looks for the generic type array as a return type and suggests a more specific type. This type is inferred from the actual code.

Loading history...
299
     */
300
    protected function listCodePoints($input)
301
    {
302
        $codePoints = array(
303
            'all'      => array(),
304
            'basic'    => array(),
305
            'nonBasic' => array(),
306
        );
307
308
        $length = mb_strlen($input, $this->encoding);
309
        for ($i = 0; $i < $length; $i++) {
310
            $char = mb_substr($input, $i, 1, $this->encoding);
311
            $code = $this->charToCodePoint($char);
312
            if ($code < 128) {
313
                $codePoints['all'][] = $codePoints['basic'][] = $code;
314
            } else {
315
                $codePoints['all'][] = $codePoints['nonBasic'][] = $code;
316
            }
317
        }
318
319
        return $codePoints;
320
    }
321
322
    /**
323
     * Convert a single or multi-byte character to its code point
324
     *
325
     * @param string $char
326
     * @return integer
327
     */
328
    protected function charToCodePoint($char)
329
    {
330
        $code = ord($char[0]);
331
        if ($code < 128) {
332
            return $code;
333
        } elseif ($code < 224) {
334
            return (($code - 192) * 64) + (ord($char[1]) - 128);
335
        } elseif ($code < 240) {
336
            return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128);
337
        } else {
338
            return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128);
339
        }
340
    }
341
342
    /**
343
     * Convert a code point to its single or multi-byte character
344
     *
345
     * @param integer $code
346
     * @return string
347
     */
348
    protected function codePointToChar($code)
349
    {
350
        if ($code <= 0x7F) {
351
            return chr($code);
352
        } elseif ($code <= 0x7FF) {
353
            return chr(($code >> 6) + 192) . chr(($code & 63) + 128);
354
        } elseif ($code <= 0xFFFF) {
355
            return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
356 View Code Duplication
        } else {
357
            return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128);
358
        }
359
    }
360
}
361