MetaCharacters   A
last analyzed

Complexity

Total Complexity 23

Size/Duplication

Total Lines 208
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 4
Bugs 0 Features 0
Metric Value
wmc 23
eloc 47
c 4
b 0
f 0
dl 0
loc 208
ccs 47
cts 47
cp 1
rs 10

10 Methods

Rating   Name   Duplication   Size   Complexity  
A replaceMeta() 0 14 4
A exprIsChar() 0 14 1
A add() 0 17 3
A __construct() 0 3 1
A isQuantifiable() 0 3 2
A getExpression() 0 8 2
A matchesAny() 0 11 3
A isChar() 0 3 2
A exprIsQuantifiable() 0 11 2
A computeValue() 0 16 3
1
<?php declare(strict_types=1);
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016-2022 The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder;
9
10
use InvalidArgumentException;
11
use s9e\RegexpBuilder\Input\InputInterface;
12
13
class MetaCharacters
14
{
15
	/**
16
	* @const Bit value that indicates whether a meta-character represents a single character usable
17
	*        in a character class
18
	*/
19
	const IS_CHAR = 1;
20
21
	/**
22
	* @const Bit value that indicates whether a meta-character represents a quantifiable expression
23
	*/
24
	const IS_QUANTIFIABLE = 2;
25
26
	/**
27
	* @var array Map of meta values and the expression they represent
28
	*/
29
	protected $exprs = [];
30
31
	/**
32
	* @var InputInterface
33
	*/
34
	protected $input;
35
36
	/**
37
	* @var array Map of meta-characters' codepoints and their value
38
	*/
39
	protected $meta = [];
40
41
	/**
42
	* @param InputInterface $input
43
	*/
44 17
	public function __construct(InputInterface $input)
45
	{
46 17
		$this->input = $input;
47 17
	}
48
49
	/**
50
	* Add a meta-character to the list
51
	*
52
	* @param  string $char Meta-character
53
	* @param  string $expr Regular expression
54
	* @return void
55
	*/
56 16
	public function add(string $char, string $expr): void
57
	{
58 16
		$split = $this->input->split($char);
59 16
		if (count($split) !== 1)
60
		{
61 1
			throw new InvalidArgumentException('Meta-characters must be represented by exactly one character');
62
		}
63 15
		if (@preg_match('(' . $expr . ')u', '') === false)
64
		{
65 1
			throw new InvalidArgumentException("Invalid expression '" . $expr . "'");
66
		}
67
68 14
		$inputValue = $split[0];
69 14
		$metaValue  = $this->computeValue($expr);
70
71 14
		$this->exprs[$metaValue] = $expr;
72 14
		$this->meta[$inputValue] = $metaValue;
73 14
	}
74
75
	/**
76
	* Get the expression associated with a meta value
77
	*
78
	* @param  integer $metaValue
79
	* @return string
80
	*/
81 2
	public function getExpression(int $metaValue): string
82
	{
83 2
		if (!isset($this->exprs[$metaValue]))
84
		{
85 1
			throw new InvalidArgumentException('Invalid meta value ' . $metaValue);
86
		}
87
88 1
		return $this->exprs[$metaValue];
89
	}
90
91
	/**
92
	* Return whether a given value represents a single character usable in a character class
93
	*
94
	* @param  integer $value
95
	* @return bool
96
	*/
97 13
	public static function isChar(int $value): bool
98
	{
99 13
		return ($value >= 0 || ($value & self::IS_CHAR));
100
	}
101
102
	/**
103
	* Return whether a given value represents a quantifiable expression
104
	*
105
	* @param  integer $value
106
	* @return bool
107
	*/
108 13
	public static function isQuantifiable(int $value): bool
109
	{
110 13
		return ($value >= 0 || ($value & self::IS_QUANTIFIABLE));
111
	}
112
113
	/**
114
	* Replace values from meta-characters in a list of strings with their meta value
115
	*
116
	* @param  array[] $strings
117
	* @return array[]
118
	*/
119 14
	public function replaceMeta(array $strings): array
120
	{
121 14
		foreach ($strings as &$string)
122
		{
123 14
			foreach ($string as &$value)
124
			{
125 14
				if (isset($this->meta[$value]))
126
				{
127 14
					$value = $this->meta[$value];
128
				}
129
			}
130
		}
131
132 14
		return $strings;
133
	}
134
135
	/**
136
	* Compute and return a value for given expression
137
	*
138
	* Values are meant to be a unique negative integer. The least significant bits are used to
139
	* store the expression's properties
140
	*
141
	* @param  string  $expr Regular expression
142
	* @return integer
143
	*/
144 14
	protected function computeValue(string $expr): int
145
	{
146
		$properties = [
147 14
			self::IS_CHAR         => 'exprIsChar',
148 14
			self::IS_QUANTIFIABLE => 'exprIsQuantifiable'
149
		];
150 14
		$value = (1 + count($this->meta)) * -(2 ** count($properties));
151 14
		foreach ($properties as $bitValue => $methodName)
152
		{
153 14
			if ($this->$methodName($expr))
154
			{
155 8
				$value |= $bitValue;
156
			}
157
		}
158
159 14
		return $value;
160
	}
161
162
	/**
163
	* Test whether given expression represents a single character usable in a character class
164
	*
165
	* @param  string $expr
166
	* @return bool
167
	*/
168 14
	protected function exprIsChar(string $expr): bool
169
	{
170
		$regexps = [
171
			// Escaped literal or escape sequence such as \w but not \R
172 14
			'(^\\\\[adefhnrstvwDHNSVW\\W]$)D',
173
174
			// Unicode properties such as \pL or \p{Lu}
175
			'(^\\\\p(?:.|\\{[^}]+\\})$)Di',
176
177
			// An escape sequence such as \x1F or \x{2600}
178
			'(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di'
179
		];
180
181 14
		return $this->matchesAny($expr, $regexps);
182
	}
183
184
	/**
185
	* Test whether given expression is quantifiable
186
	*
187
	* @param  string $expr
188
	* @return bool
189
	*/
190 14
	protected function exprIsQuantifiable(string $expr): bool
191
	{
192
		$regexps = [
193
			// A dot or \R
194 14
			'(^(?:\\.|\\\\R)$)D',
195
196
			// A character class
197
			'(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D'
198
		];
199
200 14
		return $this->matchesAny($expr, $regexps) || $this->exprIsChar($expr);
201
	}
202
203
	/**
204
	* Test whether given expression matches any of the given regexps
205
	*
206
	* @param  string   $expr
207
	* @param  string[] $regexps
208
	* @return bool
209
	*/
210 14
	protected function matchesAny(string $expr, array $regexps): bool
211
	{
212 14
		foreach ($regexps as $regexp)
213
		{
214 14
			if (preg_match($regexp, $expr))
215
			{
216 8
				return true;
217
			}
218
		}
219
220 14
		return false;
221
	}
222
}