Passed
Push — master ( cf678e...babb3f )
by Josh
02:55
created

Hasher   A

Complexity

Total Complexity 11

Size/Duplication

Total Lines 87
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
wmc 11
eloc 31
c 1
b 0
f 0
dl 0
loc 87
rs 10

3 Methods

Rating   Name   Duplication   Size   Complexity  
A cp() 0 17 4
A quickHash() 0 26 4
A charsToCodepointsWithSurrogates() 0 18 3
1
<?php declare(strict_types=1);
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2022 The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\JavaScript;
9
10
use ValueError;
11
12
class Hasher
13
{
14
	/**
15
	* Generate a hash that matches the hashing algorithm used in render.js
16
	*
17
	* See hash() in render.js
18
	*
19
	* @param  string $text
20
	* @return int
21
	*/
22
	public static function quickHash(string $text): int
23
	{
24
		if (preg_match_all('(.)us', $text, $matches) === false)
25
		{
26
			throw new ValueError('Invalid UTF-8 string');
27
		}
28
29
		$codepoints = self::charsToCodepointsWithSurrogates($matches[0]);
30
31
		$pos = count($codepoints);
32
		$s1  = 0;
33
		$s2  = 0;
34
		while (--$pos >= 0)
35
		{
36
			$s1 = ($s1 + $codepoints[$pos]) % 0xFFFF;
37
			$s2 = ($s1 + $s2) % 0xFFFF;
38
		}
39
		$hash = ($s2 << 16) | $s1;
40
41
		// Convert to signed long
42
		if ($hash > 0x7FFFFFFF)
43
		{
44
			$hash -= 0x100000000;
45
		}
46
47
		return $hash;
48
	}
49
50
	/**
51
	* Convert a list of UTF-8 characters into a list of Unicode codepoint with surrogates
52
	*
53
	* @param  string[]  $chars
54
	* @return int[]
55
	*/
56
	protected static function charsToCodepointsWithSurrogates(array $chars): array
57
	{
58
		$codepoints = [];
59
		foreach ($chars as $char)
60
		{
61
			$cp = self::cp($char);
62
			if ($cp < 0x10000)
63
			{
64
				$codepoints[] = $cp;
65
			}
66
			else
67
			{
68
				$codepoints[] = 0xD7C0 + ($cp >> 10);
69
				$codepoints[] = 0xDC00 + ($cp & 0x3FF);
70
			}
71
		}
72
73
		return $codepoints;
74
	}
75
76
	/**
77
	* Compute and return the Unicode codepoint for given UTF-8 char
78
	*
79
	* @param  string  $char UTF-8 char
80
	* @return int
81
	*/
82
	protected static function cp(string $char): int
83
	{
84
		$cp = ord($char[0]);
85
		if ($cp >= 0xF0)
86
		{
87
			$cp = ($cp << 18) + (ord($char[1]) << 12) + (ord($char[2]) << 6) + ord($char[3]) - 0x3C82080;
88
		}
89
		elseif ($cp >= 0xE0)
90
		{
91
			$cp = ($cp << 12) + (ord($char[1]) << 6) + ord($char[2]) - 0xE2080;
92
		}
93
		elseif ($cp >= 0xC0)
94
		{
95
			$cp = ($cp << 6) + ord($char[1]) - 0x3080;
96
		}
97
98
		return $cp;
99
	}
100
}