Utf8::charsToCodepointsWithSurrogates() - Code Metrics - Inspection of "Removed Utf8ToSurrogates input class, replaced wit..." - s9e/RegexpBuilder - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 94638b...c96444 )

by Josh

created 2016-10-21 04:54 UTC

Utf8::charsToCodepointsWithSurrogates() A

↳ Parent: Utf8

Complexity

Conditions	3
Paths	3

Size

Total Lines	19
Code Lines	10

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	9
CRAP Score	3

Importance

Changes

Metric	Value
dl	0
loc	19
ccs	9
cts	9
cp	1
rs	9.4285
c	0
b	0
f	0
cc	3
eloc	10
nc	3
nop	1
crap	3

<?php

/**
* @package   s9e\RegexpBuilder
* @copyright Copyright (c) 2016 The s9e Authors
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Input;

use InvalidArgumentException;

class Utf8 extends BaseImplementation
{
	/**
	* @var bool Whether to use surrogates to represent higher codepoints
	*/
	protected $useSurrogates;

	/**
	* {@inheritdoc}
	*/
	public function __construct(array $options = [])
	{
		$this->useSurrogates = !empty($options['useSurrogates']);
	}

	/**
	* {@inheritdoc}
	*/
	public function split($string)
	{
		if (preg_match_all('(.)us', $string, $matches) === false)
		{
			throw new InvalidArgumentException('Invalid UTF-8 string');
		}

		return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
	}

	/**
	* Convert a list of UTF-8 characters into a list of Unicode codepoint
	*
	* @param  string[]  $chars
	* @return integer[]
	*/
	protected function charsToCodepoints(array $chars)
	{
		return array_map([$this, 'cp'], $chars);
	}

	/**
	* Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
	*
	* @param  string[]  $chars
	* @return integer[]
	*/
	protected function charsToCodepointsWithSurrogates(array $chars)
	{
		$codepoints = [];
		foreach ($chars as $char)
		{
			$cp = $this->cp($char);
			if ($cp < 0x10000)
			{
				$codepoints[] = $cp;
			}
			else
			{
				$codepoints[] = 0xD7C0 + ($cp >> 10);
				$codepoints[] = 0xDC00 + ($cp & 0x3FF);
			}
		}

		return $codepoints;
	}

	/**
	* Compute and return the Unicode codepoint for given UTF-8 char
	*
	* @param  string  $char UTF-8 char
	* @return integer
	*/
	protected function cp($char)
	{
		$cp = ord($char[0]);
		if ($cp >= 0xF0)
		{
			$cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
		}
		elseif ($cp >= 0xE0)
		{
			$cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
		}
		elseif ($cp >= 0xC0)
		{
			$cp = ($cp << 6) + ord($char[1]) - 0x3080;
		}

		return $cp;
	}
}

1		<?php
2
3		/**
4		* @package s9e\RegexpBuilder
5		* @copyright Copyright (c) 2016 The s9e Authors
6		* @license http://www.opensource.org/licenses/mit-license.php The MIT License
7		*/
8		namespace s9e\RegexpBuilder\Input;
9
10		use InvalidArgumentException;
11
12		class Utf8 extends BaseImplementation
13		{
14		/**
15		* @var bool Whether to use surrogates to represent higher codepoints
16		*/
17		protected $useSurrogates;
18
19		/**
20		* {@inheritdoc}
21		*/
22	9	public function __construct(array $options = [])
23		{
24	9	$this->useSurrogates = !empty($options['useSurrogates']);
25	9	}
26
27		/**
28		* {@inheritdoc}
29		*/
30	9	public function split($string)
31		{
32	9	if (preg_match_all('(.)us', $string, $matches) === false)
33		{
34	1	throw new InvalidArgumentException('Invalid UTF-8 string');
35		}
36
37	8	return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
38		}
39
40		/**
41		* Convert a list of UTF-8 characters into a list of Unicode codepoint
42		*
43		* @param string[] $chars
44		* @return integer[]
45		*/
46	6	protected function charsToCodepoints(array $chars)
47		{
48	6	return array_map([$this, 'cp'], $chars);
49		}
50
51		/**
52		* Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
53		*
54		* @param string[] $chars
55		* @return integer[]
56		*/
57	2	protected function charsToCodepointsWithSurrogates(array $chars)
58		{
59	2	$codepoints = [];
60	2	foreach ($chars as $char)
61		{
62	2	$cp = $this->cp($char);
63	2	if ($cp < 0x10000)
64		{
65	2	$codepoints[] = $cp;
66		}
67		else
68		{
69	1	$codepoints[] = 0xD7C0 + ($cp >> 10);
70	2	$codepoints[] = 0xDC00 + ($cp & 0x3FF);
71		}
72		}
73
74	2	return $codepoints;
75		}
76
77		/**
78		* Compute and return the Unicode codepoint for given UTF-8 char
79		*
80		* @param string $char UTF-8 char
81		* @return integer
82		*/
83	7	protected function cp($char)
84		{
85	7	$cp = ord($char[0]);
86	7	if ($cp >= 0xF0)
87		{
88	3	$cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
89		}
90	7	elseif ($cp >= 0xE0)
91		{
92	1	$cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
93		}
94	6	elseif ($cp >= 0xC0)
95		{
96	2	$cp = ($cp << 6) + ord($char[1]) - 0x3080;
97		}
98
99	7	return $cp;
100		}
101		}

s9e / RegexpBuilder

Push — master ( 94638b...c96444 )

Utf8::charsToCodepointsWithSurrogates() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like