IDNAEncoder::is_ascii() - Code Metrics - Inspection of "Namespacify" - rmccue/Requests - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#271)

unknown

created 2017-03-24 18:22 UTC

IDNAEncoder::is_ascii() A

↳ Parent: IDNAEncoder

Complexity

Conditions	1
Paths	1

Size

Total Lines	3
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	1
eloc	2
nc	1
nop	1
dl	0
loc	3
rs	10
c	0
b	0
f	0

<?php
namespace Rmccue\Requests;

use Rmccue\Requests\Exception as Exception;
/**
 * IDNA URL encoder
 *
 * Note: Not fully compliant, as nameprep does nothing yet.
 *
 * @package Rmccue\Requests
 * @subpackage Utilities
 * @see https://tools.ietf.org/html/rfc3490 IDNA specification
 * @see https://tools.ietf.org/html/rfc3492 Punycode/Bootstrap specification
 */
class IDNAEncoder {
	/**
	 * ACE prefix used for IDNA
	 *
	 * @see https://tools.ietf.org/html/rfc3490#section-5
	 * @var string
	 */
	const ACE_PREFIX = 'xn--';

	/**#@+
	 * Bootstrap constant for Punycode
	 *
	 * @see https://tools.ietf.org/html/rfc3492#section-5
	 * @var int
	 */
	const BOOTSTRAP_BASE         = 36;
	const BOOTSTRAP_TMIN         = 1;
	const BOOTSTRAP_TMAX         = 26;
	const BOOTSTRAP_SKEW         = 38;
	const BOOTSTRAP_DAMP         = 700;
	const BOOTSTRAP_INITIAL_BIAS = 72;
	const BOOTSTRAP_INITIAL_N    = 128;
	/**#@-*/

	/**
	 * Encode a hostname using Punycode
	 *
	 * @param string $string Hostname
	 * @return string Punycode-encoded hostname
	 */
	public static function encode($string) {
		$parts = explode('.', $string);
		foreach ($parts as &$part) {
			$part = self::to_ascii($part);
		}
		return implode('.', $parts);
	}

	/**
	 * Convert a UTF-8 string to an ASCII string using Punycode
	 *
	 * @throws Rmccue\Requests\Exception Provided string longer than 64 ASCII characters (`idna.provided_too_long`)
	 * @throws Rmccue\Requests\Exception Prepared string longer than 64 ASCII characters (`idna.prepared_too_long`)
	 * @throws Rmccue\Requests\Exception Provided string already begins with xn-- (`idna.provided_is_prefixed`)
	 * @throws Rmccue\Requests\Exception Encoded string longer than 64 ASCII characters (`idna.encoded_too_long`)
	 *
	 * @param string $string ASCII or UTF-8 string (max length 64 characters)
	 * @return string ASCII string
	 */
	public static function to_ascii($string) {
		// Step 1: Check if the string is already ASCII
		if (self::is_ascii($string)) {

			// Skip to step 7
			if (strlen($string) < 64) {
				return $string;
			}

			throw new Exception('Provided string is too long', 'idna.provided_too_long', $string);
		}

		// Step 2: nameprep
		$string = self::nameprep($string);

		// Step 3: UseSTD3ASCIIRules is false, continue

		// Step 4: Check if it's ASCII now
		if (self::is_ascii($string)) {

			// Skip to step 7
			if (strlen($string) < 64) {
				return $string;
			}

			throw new Exception('Prepared string is too long', 'idna.prepared_too_long', $string);
		}

		// Step 5: Check ACE prefix
		if (strpos($string, self::ACE_PREFIX) === 0) {
			throw new Exception('Provided string begins with ACE prefix', 'idna.provided_is_prefixed', $string);
		}

		// Step 6: Encode with Punycode
		$string = self::punycode_encode($string);

		// Step 7: Prepend ACE prefix
		$string = self::ACE_PREFIX . $string;

		// Step 8: Check size
		if (strlen($string) < 64) {
			return $string;
		}

		throw new Exception('Encoded string is too long', 'idna.encoded_too_long', $string);
	}

	/**
	 * Check whether a given string contains only ASCII characters
	 *
	 * @internal (Testing found regex was the fastest implementation)
	 *
	 * @param string $string
	 * @return bool Is the string ASCII-only?
	 */
	protected static function is_ascii($string) {
		return (preg_match('/(?:[^\x00-\x7F])/', $string) !== 1);
	}

	/**
	 * Prepare a string for use as an IDNA name
	 *
	 * @todo Implement this based on RFC 3491 and the newer 5891
	 * @param string $string
	 * @return string Prepared string
	 */
	protected static function nameprep($string) {
		return $string;
	}

	/**
	 * Convert a UTF-8 string to a UCS-4 codepoint array
	 *
	 * Based on Rmccue\Requests\IRI::replace_invalid_with_pct_encoding()
	 *
	 * @throws Rmccue\Requests\Exception Invalid UTF-8 codepoint (`idna.invalidcodepoint`)
	 * @param string $input
	 * @return array Unicode code points
	 */
	protected static function utf8_to_codepoints($input) {
		$codepoints = array();

		// Get number of bytes
		$strlen = strlen($input);

		for ($position = 0; $position < $strlen; $position++) {
			$value = ord($input[$position]);

			// One byte sequence:
			if ((~$value & 0x80) === 0x80) {
				$character = $value;
				$length = 1;
				$remaining = 0;
			}
			// Two byte sequence:
			elseif (($value & 0xE0) === 0xC0) {
				$character = ($value & 0x1F) << 6;
				$length = 2;
				$remaining = 1;
			}
			// Three byte sequence:
			elseif (($value & 0xF0) === 0xE0) {
				$character = ($value & 0x0F) << 12;
				$length = 3;
				$remaining = 2;
			}
			// Four byte sequence:
			elseif (($value & 0xF8) === 0xF0) {
				$character = ($value & 0x07) << 18;
				$length = 4;
				$remaining = 3;
			}
			// Invalid byte:
			else {
				throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value);
			}

			if ($remaining > 0) {
				if ($position + $length > $strlen) {
					throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
				}
				for ($position++; $remaining > 0; $position++) {
					$value = ord($input[$position]);

					// If it is invalid, count the sequence as invalid and reprocess the current byte:
					if (($value & 0xC0) !== 0x80) {
						throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
					}

					$character |= ($value & 0x3F) << (--$remaining * 6);
				}
				$position--;
			}

			if (
				// Non-shortest form sequences are invalid
				   $length > 1 && $character <= 0x7F
				|| $length > 2 && $character <= 0x7FF
				|| $length > 3 && $character <= 0xFFFF
				// Outside of range of ucschar codepoints
				// Noncharacters
				|| ($character & 0xFFFE) === 0xFFFE
				|| $character >= 0xFDD0 && $character <= 0xFDEF
				|| (
					// Everything else not in ucschar
					   $character > 0xD7FF && $character < 0xF900
					|| $character < 0x20
					|| $character > 0x7E && $character < 0xA0
					|| $character > 0xEFFFD
				)
			) {
				throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
			}

			$codepoints[] = $character;
		}

		return $codepoints;
	}

	/**
	 * RFC3492-compliant encoder
	 *
	 * @internal Pseudo-code from Section 6.3 is commented with "#" next to relevant code
	 * @throws Rmccue\Requests\Exception On character outside of the domain (never happens with Punycode) (`idna.character_outside_domain`)

	 *
	 * @param string $input UTF-8 encoded string to encode
	 * @return string Punycode-encoded string
	 */
	public static function punycode_encode($input) {
		$output = '';
#		let n = initial_n
		$n = self::BOOTSTRAP_INITIAL_N;

#		let delta = 0
		$delta = 0;
#		let bias = initial_bias
		$bias = self::BOOTSTRAP_INITIAL_BIAS;
#		let h = b = the number of basic code points in the input
		$h = $b = 0; // see loop

#		copy them to the output in order
		$codepoints = self::utf8_to_codepoints($input);
		$extended = array();

		foreach ($codepoints as $char) {
			if ($char < 128) {
				// Character is valid ASCII
				// TODO: this should also check if it's valid for a URL
				$output .= chr($char);
				$h++;
			}
			// Check if the character is non-ASCII, but below initial n
			// This never occurs for Punycode, so ignore in coverage
			// @codeCoverageIgnoreStart
			elseif ($char < $n) {
				throw new Exception('Invalid character', 'idna.character_outside_domain', $char);
			}
			// @codeCoverageIgnoreEnd
			else {
				$extended[$char] = true;
			}
		}
		$extended = array_keys($extended);
		sort($extended);
		$b = $h;
#		[copy them] followed by a delimiter if b > 0
		if (strlen($output) > 0) {
			$output .= '-';
		}
#		{if the input contains a non-basic code point < n then fail}
#		while h < length(input) do begin
		while ($h < count($codepoints)) {
#			let m = the minimum code point >= n in the input
			$m = array_shift($extended);

			//printf('next code point to insert is %s' . PHP_EOL, dechex($m));
#			let delta = delta + (m - n) * (h + 1), fail on overflow
			$delta += ($m - $n) * ($h + 1);
#			let n = m
			$n = $m;
#			for each code point c in the input (in order) do begin
			for ($num = 0; $num < count($codepoints); $num++) {
				$c = $codepoints[$num];

#				if c < n then increment delta, fail on overflow
				if ($c < $n) {
					$delta++;
				}
#				if c == n then begin
				elseif ($c === $n) {
#					let q = delta
					$q = $delta;

#					for k = base to infinity in steps of base do begin
					for ($k = self::BOOTSTRAP_BASE; ; $k += self::BOOTSTRAP_BASE) {
#						let t = tmin if k <= bias {+ tmin}, or
#								tmax if k >= bias + tmax, or k - bias otherwise
						if ($k <= ($bias + self::BOOTSTRAP_TMIN)) {
							$t = self::BOOTSTRAP_TMIN;

						}
						elseif ($k >= ($bias + self::BOOTSTRAP_TMAX)) {
							$t = self::BOOTSTRAP_TMAX;
						}
						else {
							$t = $k - $bias;
						}
#						if q < t then break
						if ($q < $t) {
							break;
						}
#						output the code point for digit t + ((q - t) mod (base - t))
						$digit = $t + (($q - $t) % (self::BOOTSTRAP_BASE - $t));
						$output .= self::digit_to_char($digit);
#						let q = (q - t) div (base - t)
						$q = floor(($q - $t) / (self::BOOTSTRAP_BASE - $t));
#					end
					}
#					output the code point for digit q
					$output .= self::digit_to_char($q);
#					let bias = adapt(delta, h + 1, test h equals b?)
					$bias = self::adapt($delta, $h + 1, $h === $b);
#					let delta = 0
					$delta = 0;
#					increment h
					$h++;
#				end
				}
#			end
			}
#			increment delta and n
			$delta++;
			$n++;
#		end
		}

		return $output;
	}

	/**
	 * Convert a digit to its respective character
	 *
	 * @see https://tools.ietf.org/html/rfc3492#section-5
	 * @throws Rmccue\Requests\Exception On invalid digit (`idna.invalid_digit`)
	 *
	 * @param int $digit Digit in the range 0-35
	 * @return string Single character corresponding to digit
	 */
	protected static function digit_to_char($digit) {
		// @codeCoverageIgnoreStart
		// As far as I know, this never happens, but still good to be sure.
		if ($digit < 0 || $digit > 35) {
			throw new Exception(sprintf('Invalid digit %d', $digit), 'idna.invalid_digit', $digit);
		}
		// @codeCoverageIgnoreEnd
		$digits = 'abcdefghijklmnopqrstuvwxyz0123456789';
		return substr($digits, $digit, 1);
	}

	/**
	 * Adapt the bias
	 *
	 * @see https://tools.ietf.org/html/rfc3492#section-6.1
	 * @param int $delta
	 * @param int $numpoints
	 * @param bool $firsttime
	 * @return int New bias
	 */
	protected static function adapt($delta, $numpoints, $firsttime) {
#	function adapt(delta,numpoints,firsttime):
#		if firsttime then let delta = delta div damp
		if ($firsttime) {
			$delta = floor($delta / self::BOOTSTRAP_DAMP);
		}
#		else let delta = delta div 2
		else {
			$delta = floor($delta / 2);
		}
#		let delta = delta + (delta div numpoints)
		$delta += floor($delta / $numpoints);
#		let k = 0
		$k = 0;

#		while delta > ((base - tmin) * tmax) div 2 do begin
		$max = floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN) * self::BOOTSTRAP_TMAX) / 2);
		while ($delta > $max) {
#			let delta = delta div (base - tmin)
			$delta = floor($delta / (self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN));
#			let k = k + base
			$k += self::BOOTSTRAP_BASE;
#		end
		}
#		return k + (((base - tmin + 1) * delta) div (delta + skew))
		return $k + floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN + 1) * $delta) / ($delta + self::BOOTSTRAP_SKEW));
	}
}

1		<?php
2		namespace Rmccue\Requests;
3
4		use Rmccue\Requests\Exception as Exception;
5		/**
6		* IDNA URL encoder
7		*
8		* Note: Not fully compliant, as nameprep does nothing yet.
9		*
10		* @package Rmccue\Requests
11		* @subpackage Utilities
12		* @see https://tools.ietf.org/html/rfc3490 IDNA specification
13		* @see https://tools.ietf.org/html/rfc3492 Punycode/Bootstrap specification
14		*/
15		class IDNAEncoder {
16		/**
17		* ACE prefix used for IDNA
18		*
19		* @see https://tools.ietf.org/html/rfc3490#section-5
20		* @var string
21		*/
22		const ACE_PREFIX = 'xn--';
23
24		/**#@+
25		* Bootstrap constant for Punycode
26		*
27		* @see https://tools.ietf.org/html/rfc3492#section-5
28		* @var int
29		*/
30		const BOOTSTRAP_BASE = 36;
31		const BOOTSTRAP_TMIN = 1;
32		const BOOTSTRAP_TMAX = 26;
33		const BOOTSTRAP_SKEW = 38;
34		const BOOTSTRAP_DAMP = 700;
35		const BOOTSTRAP_INITIAL_BIAS = 72;
36		const BOOTSTRAP_INITIAL_N = 128;
37		/*#@-/
38
39		/**
40		* Encode a hostname using Punycode
41		*
42		* @param string $string Hostname
43		* @return string Punycode-encoded hostname
44		*/
45		public static function encode($string) {
46		$parts = explode('.', $string);
47		foreach ($parts as &$part) {
48		$part = self::to_ascii($part);
49		}
50		return implode('.', $parts);
51		}
52
53		/**
54		* Convert a UTF-8 string to an ASCII string using Punycode
55		*
56		* @throws Rmccue\Requests\Exception Provided string longer than 64 ASCII characters (`idna.provided_too_long`)
57		* @throws Rmccue\Requests\Exception Prepared string longer than 64 ASCII characters (`idna.prepared_too_long`)
58		* @throws Rmccue\Requests\Exception Provided string already begins with xn-- (`idna.provided_is_prefixed`)
59		* @throws Rmccue\Requests\Exception Encoded string longer than 64 ASCII characters (`idna.encoded_too_long`)
60		*
61		* @param string $string ASCII or UTF-8 string (max length 64 characters)
62		* @return string ASCII string
63		*/
64		public static function to_ascii($string) {
65		// Step 1: Check if the string is already ASCII
66	View Code Duplication	if (self::is_ascii($string)) {
		0 ignored issues – show Duplication introduced 2017-03-24 18:26 UTC by Report Bug Copy Issue Report This code seems to be duplicated across your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
67		// Skip to step 7
68		if (strlen($string) < 64) {
69		return $string;
70		}
71
72		throw new Exception('Provided string is too long', 'idna.provided_too_long', $string);
73		}
74
75		// Step 2: nameprep
76		$string = self::nameprep($string);
77
78		// Step 3: UseSTD3ASCIIRules is false, continue
		0 ignored issues – show Unused Code Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report `36%` of this comment could be valid code. Did you maybe forget this after debugging? Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it. The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production. This check looks for comments that seem to be mostly valid code and reports them. Loading history...
79		// Step 4: Check if it's ASCII now
80	View Code Duplication	if (self::is_ascii($string)) {
		0 ignored issues – show Duplication introduced 2017-03-24 18:26 UTC by Report Bug Copy Issue Report This code seems to be duplicated across your project. Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation. You can also find more detailed suggestions in the “Code” section of your repository. Loading history...
81		// Skip to step 7
82		if (strlen($string) < 64) {
83		return $string;
84		}
85
86		throw new Exception('Prepared string is too long', 'idna.prepared_too_long', $string);
87		}
88
89		// Step 5: Check ACE prefix
90		if (strpos($string, self::ACE_PREFIX) === 0) {
91		throw new Exception('Provided string begins with ACE prefix', 'idna.provided_is_prefixed', $string);
92		}
93
94		// Step 6: Encode with Punycode
95		$string = self::punycode_encode($string);
96
97		// Step 7: Prepend ACE prefix
98		$string = self::ACE_PREFIX . $string;
99
100		// Step 8: Check size
101		if (strlen($string) < 64) {
102		return $string;
103		}
104
105		throw new Exception('Encoded string is too long', 'idna.encoded_too_long', $string);
106		}
107
108		/**
109		* Check whether a given string contains only ASCII characters
110		*
111		* @internal (Testing found regex was the fastest implementation)
112		*
113		* @param string $string
114		* @return bool Is the string ASCII-only?
115		*/
116		protected static function is_ascii($string) {
117		return (preg_match('/(?:[^\x00-\x7F])/', $string) !== 1);
118		}
119
120		/**
121		* Prepare a string for use as an IDNA name
122		*
123		* @todo Implement this based on RFC 3491 and the newer 5891
124		* @param string $string
125		* @return string Prepared string
126		*/
127		protected static function nameprep($string) {
128		return $string;
129		}
130
131		/**
132		* Convert a UTF-8 string to a UCS-4 codepoint array
133		*
134		* Based on Rmccue\Requests\IRI::replace_invalid_with_pct_encoding()
135		*
136		* @throws Rmccue\Requests\Exception Invalid UTF-8 codepoint (`idna.invalidcodepoint`)
137		* @param string $input
138		* @return array Unicode code points
139		*/
140		protected static function utf8_to_codepoints($input) {
141		$codepoints = array();
142
143		// Get number of bytes
144		$strlen = strlen($input);
145
146		for ($position = 0; $position < $strlen; $position++) {
147		$value = ord($input[$position]);
148
149		// One byte sequence:
150		if ((~$value & 0x80) === 0x80) {
151		$character = $value;
152		$length = 1;
153		$remaining = 0;
154		}
155		// Two byte sequence:
156	View Code Duplication	elseif (($value & 0xE0) === 0xC0) {
157		$character = ($value & 0x1F) << 6;
158		$length = 2;
159		$remaining = 1;
160		}
161		// Three byte sequence:
162	View Code Duplication	elseif (($value & 0xF0) === 0xE0) {
163		$character = ($value & 0x0F) << 12;
164		$length = 3;
165		$remaining = 2;
166		}
167		// Four byte sequence:
168	View Code Duplication	elseif (($value & 0xF8) === 0xF0) {
169		$character = ($value & 0x07) << 18;
170		$length = 4;
171		$remaining = 3;
172		}
173		// Invalid byte:
174		else {
175		throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value);
176		}
177
178		if ($remaining > 0) {
179		if ($position + $length > $strlen) {
180		throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
181		}
182		for ($position++; $remaining > 0; $position++) {
183		$value = ord($input[$position]);
184
185		// If it is invalid, count the sequence as invalid and reprocess the current byte:
186		if (($value & 0xC0) !== 0x80) {
187		throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
188		}
189
190		$character \|= ($value & 0x3F) << (--$remaining * 6);
191		}
192		$position--;
193		}
194
195		if (
196		// Non-shortest form sequences are invalid
197		$length > 1 && $character <= 0x7F
198		\|\| $length > 2 && $character <= 0x7FF
199		\|\| $length > 3 && $character <= 0xFFFF
200		// Outside of range of ucschar codepoints
201		// Noncharacters
202		\|\| ($character & 0xFFFE) === 0xFFFE
203		\|\| $character >= 0xFDD0 && $character <= 0xFDEF
204		\|\| (
205		// Everything else not in ucschar
206		$character > 0xD7FF && $character < 0xF900
207		\|\| $character < 0x20
208		\|\| $character > 0x7E && $character < 0xA0
209		\|\| $character > 0xEFFFD
210		)
211		) {
212		throw new Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character);
213		}
214
215		$codepoints[] = $character;
216		}
217
218		return $codepoints;
219		}
220
221		/**
222		* RFC3492-compliant encoder
223		*
224		* @internal Pseudo-code from Section 6.3 is commented with "#" next to relevant code
225		* @throws Rmccue\Requests\Exception On character outside of the domain (never happens with Punycode) (`idna.character_outside_domain`)
		0 ignored issues – show Coding Style introduced 2017-03-24 18:26 UTC by Report Bug Copy Issue Report This line exceeds maximum limit of 120 characters; contains 136 characters Overly long lines are hard to read on any screen. Most code styles therefor impose a maximum limit on the number of characters in a line. Loading history...
226		*
227		* @param string $input UTF-8 encoded string to encode
228		* @return string Punycode-encoded string
229		*/
230		public static function punycode_encode($input) {
231		$output = '';
232		# let n = initial_n
233		$n = self::BOOTSTRAP_INITIAL_N;
		0 ignored issues – show Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $n. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history...
234		# let delta = 0
235		$delta = 0;
236		# let bias = initial_bias
237		$bias = self::BOOTSTRAP_INITIAL_BIAS;
238		# let h = b = the number of basic code points in the input
239		$h = $b = 0; // see loop
		0 ignored issues – show Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $h. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history... Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $b. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history...
240		# copy them to the output in order
241		$codepoints = self::utf8_to_codepoints($input);
242		$extended = array();
243
244		foreach ($codepoints as $char) {
245		if ($char < 128) {
246		// Character is valid ASCII
247		// TODO: this should also check if it's valid for a URL
248		$output .= chr($char);
249		$h++;
250		}
251		// Check if the character is non-ASCII, but below initial n
252		// This never occurs for Punycode, so ignore in coverage
253		// @codeCoverageIgnoreStart
254		elseif ($char < $n) {
255		throw new Exception('Invalid character', 'idna.character_outside_domain', $char);
256		}
257		// @codeCoverageIgnoreEnd
258		else {
259		$extended[$char] = true;
260		}
261		}
262		$extended = array_keys($extended);
263		sort($extended);
264		$b = $h;
265		# [copy them] followed by a delimiter if b > 0
266		if (strlen($output) > 0) {
267		$output .= '-';
268		}
269		# {if the input contains a non-basic code point < n then fail}
270		# while h < length(input) do begin
271		while ($h < count($codepoints)) {
272		# let m = the minimum code point >= n in the input
273		$m = array_shift($extended);
		0 ignored issues – show Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $m. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history...
274		//printf('next code point to insert is %s' . PHP_EOL, dechex($m));
275		# let delta = delta + (m - n) * (h + 1), fail on overflow
276		$delta += ($m - $n) * ($h + 1);
277		# let n = m
278		$n = $m;
279		# for each code point c in the input (in order) do begin
280		for ($num = 0; $num < count($codepoints); $num++) {
281		$c = $codepoints[$num];
		0 ignored issues – show Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $c. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history...
282		# if c < n then increment delta, fail on overflow
283		if ($c < $n) {
284		$delta++;
285		}
286		# if c == n then begin
287		elseif ($c === $n) {
288		# let q = delta
289		$q = $delta;
		0 ignored issues – show Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $q. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history...
290		# for k = base to infinity in steps of base do begin
291		for ($k = self::BOOTSTRAP_BASE; ; $k += self::BOOTSTRAP_BASE) {
292		# let t = tmin if k <= bias {+ tmin}, or
293		# tmax if k >= bias + tmax, or k - bias otherwise
294		if ($k <= ($bias + self::BOOTSTRAP_TMIN)) {
295		$t = self::BOOTSTRAP_TMIN;
		0 ignored issues – show Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $t. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history...
296		}
297		elseif ($k >= ($bias + self::BOOTSTRAP_TMAX)) {
298		$t = self::BOOTSTRAP_TMAX;
299		}
300		else {
301		$t = $k - $bias;
302		}
303		# if q < t then break
304		if ($q < $t) {
305		break;
306		}
307		# output the code point for digit t + ((q - t) mod (base - t))
308		$digit = $t + (($q - $t) % (self::BOOTSTRAP_BASE - $t));
309		$output .= self::digit_to_char($digit);
310		# let q = (q - t) div (base - t)
311		$q = floor(($q - $t) / (self::BOOTSTRAP_BASE - $t));
312		# end
313		}
314		# output the code point for digit q
315		$output .= self::digit_to_char($q);
316		# let bias = adapt(delta, h + 1, test h equals b?)
317		$bias = self::adapt($delta, $h + 1, $h === $b);
318		# let delta = 0
319		$delta = 0;
320		# increment h
321		$h++;
322		# end
323		}
324		# end
325		}
326		# increment delta and n
327		$delta++;
328		$n++;
329		# end
330		}
331
332		return $output;
333		}
334
335		/**
336		* Convert a digit to its respective character
337		*
338		* @see https://tools.ietf.org/html/rfc3492#section-5
339		* @throws Rmccue\Requests\Exception On invalid digit (`idna.invalid_digit`)
340		*
341		* @param int $digit Digit in the range 0-35
342		* @return string Single character corresponding to digit
343		*/
344		protected static function digit_to_char($digit) {
345		// @codeCoverageIgnoreStart
346		// As far as I know, this never happens, but still good to be sure.
347		if ($digit < 0 \|\| $digit > 35) {
348		throw new Exception(sprintf('Invalid digit %d', $digit), 'idna.invalid_digit', $digit);
349		}
350		// @codeCoverageIgnoreEnd
351		$digits = 'abcdefghijklmnopqrstuvwxyz0123456789';
352		return substr($digits, $digit, 1);
353		}
354
355		/**
356		* Adapt the bias
357		*
358		* @see https://tools.ietf.org/html/rfc3492#section-6.1
359		* @param int $delta
360		* @param int $numpoints
361		* @param bool $firsttime
362		* @return int New bias
363		*/
364		protected static function adapt($delta, $numpoints, $firsttime) {
365		# function adapt(delta,numpoints,firsttime):
366		# if firsttime then let delta = delta div damp
367		if ($firsttime) {
368		$delta = floor($delta / self::BOOTSTRAP_DAMP);
369		}
370		# else let delta = delta div 2
371		else {
372		$delta = floor($delta / 2);
373		}
374		# let delta = delta + (delta div numpoints)
375		$delta += floor($delta / $numpoints);
376		# let k = 0
377		$k = 0;
		0 ignored issues – show Comprehensibility introduced 2016-02-09 23:07 UTC by Report Bug Copy Issue Report Avoid variables with short names like $k. Configured minimum length is 3. Short variable names may make your code harder to understand. Variable names should be self-descriptive. This check looks for variable names who are shorter than a configured minimum. Loading history...
378		# while delta > ((base - tmin) * tmax) div 2 do begin
379		$max = floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN) * self::BOOTSTRAP_TMAX) / 2);
380		while ($delta > $max) {
381		# let delta = delta div (base - tmin)
382		$delta = floor($delta / (self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN));
383		# let k = k + base
384		$k += self::BOOTSTRAP_BASE;
385		# end
386		}
387		# return k + (((base - tmin + 1) * delta) div (delta + skew))
388		return $k + floor(((self::BOOTSTRAP_BASE - self::BOOTSTRAP_TMIN + 1) * $delta) / ($delta + self::BOOTSTRAP_SKEW));
389		}
390		}

rmccue / Requests

Pull Request — master (#271)

IDNAEncoder::is_ascii() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like