Utf8 - Code Metrics - s9e/RegexpBuilder - Measure and Improve Code Quality continuously with Scrutinizer

Utf8 A
last analyzed 2022-02-28 14:49 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	88
Duplicated Lines	0 %

Test Coverage

Coverage

100%

Importance

Changes	2
Bugs	0	Features	0

Metric	Value
wmc	12
eloc	24
c	2
b	0
f	0
dl	0
loc	88
ccs	27
cts	27
cp	1
rs	10

5 Methods

Rating	Name	Size	Complexity
A	__construct()	3	1
A	charsToCodepointsWithSurrogates()	18	3
A	cp()	17	4
A	charsToCodepoints()	3	1
A	split()	8	3

<?php declare(strict_types=1);

/**
* @package   s9e\RegexpBuilder
* @copyright Copyright (c) 2016-2022 The s9e authors
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
*/
namespace s9e\RegexpBuilder\Input;

use InvalidArgumentException;

class Utf8 extends BaseImplementation
{
	/**
	* @var bool Whether to use surrogates to represent higher codepoints
	*/
	protected $useSurrogates;

	/**
	* {@inheritdoc}
	*/
	public function __construct(array $options = [])
	{
		$this->useSurrogates = !empty($options['useSurrogates']);
	}

	/**
	* {@inheritdoc}
	*/
	public function split(string $string): array
	{
		if (preg_match_all('(.)us', $string, $matches) === false)
		{
			throw new InvalidArgumentException('Invalid UTF-8 string');
		}

		return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
	}

	/**
	* Convert a list of UTF-8 characters into a list of Unicode codepoint
	*
	* @param  string[]  $chars
	* @return integer[]
	*/
	protected function charsToCodepoints(array $chars): array
	{
		return array_map([$this, 'cp'], $chars);
	}

	/**
	* Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
	*
	* @param  string[]  $chars
	* @return integer[]
	*/
	protected function charsToCodepointsWithSurrogates(array $chars): array
	{
		$codepoints = [];
		foreach ($chars as $char)
		{
			$cp = $this->cp($char);
			if ($cp < 0x10000)
			{
				$codepoints[] = $cp;
			}
			else
			{
				$codepoints[] = 0xD7C0 + ($cp >> 10);
				$codepoints[] = 0xDC00 + ($cp & 0x3FF);
			}
		}

		return $codepoints;
	}

	/**
	* Compute and return the Unicode codepoint for given UTF-8 char
	*
	* @param  string  $char UTF-8 char
	* @return integer
	*/
	protected function cp(string $char): int
	{
		$cp = ord($char[0]);
		if ($cp >= 0xF0)
		{
			$cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
		}
		elseif ($cp >= 0xE0)
		{
			$cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
		}
		elseif ($cp >= 0xC0)
		{
			$cp = ($cp << 6) + ord($char[1]) - 0x3080;
		}

		return $cp;
	}
}

1		<?php declare(strict_types=1);
2
3		/**
4		* @package s9e\RegexpBuilder
5		* @copyright Copyright (c) 2016-2022 The s9e authors
6		* @license http://www.opensource.org/licenses/mit-license.php The MIT License
7		*/
8		namespace s9e\RegexpBuilder\Input;
9
10		use InvalidArgumentException;
11
12		class Utf8 extends BaseImplementation
13		{
14		/**
15		* @var bool Whether to use surrogates to represent higher codepoints
16		*/
17		protected $useSurrogates;
18
19		/**
20		* {@inheritdoc}
21		*/
22	9	public function __construct(array $options = [])
23		{
24	9	$this->useSurrogates = !empty($options['useSurrogates']);
25	9	}
26
27		/**
28		* {@inheritdoc}
29		*/
30	9	public function split(string $string): array
31		{
32	9	if (preg_match_all('(.)us', $string, $matches) === false)
33		{
34	1	throw new InvalidArgumentException('Invalid UTF-8 string');
35		}
36
37	8	return ($this->useSurrogates) ? $this->charsToCodepointsWithSurrogates($matches[0]) : $this->charsToCodepoints($matches[0]);
38		}
39
40		/**
41		* Convert a list of UTF-8 characters into a list of Unicode codepoint
42		*
43		* @param string[] $chars
44		* @return integer[]
45		*/
46	6	protected function charsToCodepoints(array $chars): array
47		{
48	6	return array_map([$this, 'cp'], $chars);
49		}
50
51		/**
52		* Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
53		*
54		* @param string[] $chars
55		* @return integer[]
56		*/
57	2	protected function charsToCodepointsWithSurrogates(array $chars): array
58		{
59	2	$codepoints = [];
60	2	foreach ($chars as $char)
61		{
62	2	$cp = $this->cp($char);
63	2	if ($cp < 0x10000)
64		{
65	2	$codepoints[] = $cp;
66		}
67		else
68		{
69	1	$codepoints[] = 0xD7C0 + ($cp >> 10);
70	1	$codepoints[] = 0xDC00 + ($cp & 0x3FF);
71		}
72		}
73
74	2	return $codepoints;
75		}
76
77		/**
78		* Compute and return the Unicode codepoint for given UTF-8 char
79		*
80		* @param string $char UTF-8 char
81		* @return integer
82		*/
83	7	protected function cp(string $char): int
84		{
85	7	$cp = ord($char[0]);
86	7	if ($cp >= 0xF0)
87		{
88	3	$cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
89		}
90	7	elseif ($cp >= 0xE0)
91		{
92	1	$cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
93		}
94	6	elseif ($cp >= 0xC0)
95		{
96	2	$cp = ($cp << 6) + ord($char[1]) - 0x3080;
97		}
98
99	7	return $cp;
100		}
101		}

s9e / RegexpBuilder

Utf8 A last analyzed 2022-02-28 14:49 UTC

Complexity

Size/Duplication

Test Coverage

Importance

5 Methods

Duplication Side-by-Side

Filter issues like

Utf8 A
last analyzed 2022-02-28 14:49 UTC