Completed
Push — master ( 50e3b5...fbe966 )
by Josh
02:25
created

Utf8::cp()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 18
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 9
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 18
ccs 9
cts 9
cp 1
rs 9.2
c 0
b 0
f 0
cc 4
eloc 9
nc 4
nop 1
crap 4
1
<?php
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder\Input;
9
10
use InvalidArgumentException;
11
12
class Utf8 implements InputInterface
13
{
14
	/**
15
	* {@inheritdoc}
16
	*/
17 6
	public function split($string)
18
	{
19 6
		if (preg_match_all('(.)us', $string, $matches) === false)
20
		{
21 1
			throw new InvalidArgumentException('Invalid UTF-8 string');
22
		}
23
24 5
		return $this->charsToCodepoints($matches[0]);
25
	}
26
27
	/**
28
	* Convert a list of UTF-8 characters to a list of Unicode codepoint
29
	*
30
	* @param  string[]  $chars
31
	* @return integer[]
32
	*/
33 5
	protected function charsToCodepoints(array $chars)
34
	{
35 5
		return array_map([$this, 'cp'], $chars);
36
	}
37
38
	/**
39
	* Compute and return the Unicode codepoint for given UTF-8 char
40
	*
41
	* @param  string  $char UTF-8 char
42
	* @return integer
43
	*/
44 4
	protected function cp($char)
45
	{
46 4
		$cp = ord($char[0]);
47 4
		if ($cp >= 0xF0)
48
		{
49 1
			$cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
50
		}
51 4
		elseif ($cp >= 0xE0)
52
		{
53 1
			$cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
54
		}
55 3
		elseif ($cp >= 0xC0)
56
		{
57 1
			$cp = ($cp << 6) + ord($char[1]) - 0x3080;
58
		}
59
60 4
		return $cp;
61
	}
62
}