Completed
Push — master ( c1acae...227219 )
by Josh
03:52
created

MetaCharacters::exprIsChar()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 14
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 14
ccs 3
cts 3
cp 1
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 5
nc 1
nop 1
crap 1
1
<?php
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder;
9
10
use InvalidArgumentException;
11
use s9e\RegexpBuilder\Input\InputInterface;
12
13
class MetaCharacters
14
{
15
	/**
16
	* @const Bit value that indicates whether a meta-character represents a single character
17
	*/
18
	const IS_CHAR = 1;
19
20
	/**
21
	* @const Bit value that indicates whether a meta-character represents a quantifiable expression
22
	*/
23
	const IS_QUANTIFIABLE = 2;
24
25
	/**
26
	* @var array Map of meta values and the expression they represent
27
	*/
28
	protected $exprs = [];
29
30
	/**
31
	* @var InputInterface
32
	*/
33
	protected $input;
34
35
	/**
36
	* @var array Map of meta-characters' codepoints and their value
37
	*/
38
	protected $meta = [];
39
40
	/**
41
	* @param InputInterface $input
42
	*/
43 17
	public function __construct(InputInterface $input)
44
	{
45 17
		$this->input = $input;
46 17
	}
47
48
	/**
49
	* Add a meta-character to the list
50
	*
51
	* @param  string $char Meta-character
52
	* @param  string $expr Regular expression
53
	* @return void
54
	*/
55 16
	public function add($char, $expr)
56
	{
57 16
		$split = $this->input->split($char);
58 16
		if (count($split) !== 1)
59
		{
60 1
			throw new InvalidArgumentException('Meta-characters must be represented by exactly one character');
61
		}
62 15
		if (@preg_match('(' . $expr . ')u', '') === false)
63
		{
64 1
			throw new InvalidArgumentException("Invalid expression '" . $expr . "'");
65
		}
66
67 14
		$inputValue = $split[0];
68 14
		$metaValue  = $this->computeValue($expr);
69
70 14
		$this->exprs[$metaValue] = $expr;
71 14
		$this->meta[$inputValue] = $metaValue;
72 14
	}
73
74
	/**
75
	* Get the expression associated with a meta value
76
	*
77
	* @param  integer $metaValue
78
	* @return string
79
	*/
80 2
	public function getExpression($metaValue)
81
	{
82 2
		if (!isset($this->exprs[$metaValue]))
83
		{
84 1
			throw new InvalidArgumentException('Invalid meta value ' . $metaValue);
85
		}
86
87 1
		return $this->exprs[$metaValue];
88
	}
89
90
	/**
91
	* Return whether a given value represents a single character
92
	*
93
	* @param  integer $value
94
	* @return bool
95
	*/
96 13
	public function isChar($value)
97
	{
98 13
		return ($value >= 0 || ($value & self::IS_CHAR));
99
	}
100
101
	/**
102
	* Return whether a given value represents a quantifiable expression
103
	*
104
	* @param  integer $value
105
	* @return bool
106
	*/
107 13
	public function isQuantifiable($value)
108
	{
109 13
		return ($value >= 0 || ($value & self::IS_QUANTIFIABLE));
110
	}
111
112
	/**
113
	* Replace values from meta-characters in a list of strings with their meta value
114
	*
115
	* @param  array[] $strings
116
	* @return array[]
117
	*/
118 14
	public function replaceMeta(array $strings)
119
	{
120 14
		foreach ($strings as &$string)
121
		{
122 14
			foreach ($string as &$value)
123
			{
124 14
				if (isset($this->meta[$value]))
125
				{
126 14
					$value = $this->meta[$value];
127
				}
128
			}
129
		}
130
131 14
		return $strings;
132
	}
133
134
	/**
135
	* Compute and return a value for given expression
136
	*
137
	* Values are meant to be a unique negative integer. The last 2 bits indicate whether the
138
	* expression is quantifiable and/or represents a single character.
139
	*
140
	* @param  string  $expr Regular expression
141
	* @return integer
142
	*/
143 14
	protected function computeValue($expr)
144
	{
145 14
		$value = (1 + count($this->meta)) * -4;
146 14
		if ($this->exprIsChar($expr))
147
		{
148 5
			$value |= self::IS_CHAR;
149
		}
150 14
		if ($this->exprIsQuantifiable($expr))
151
		{
152 8
			$value |= self::IS_QUANTIFIABLE;
153
		}
154
155 14
		return $value;
156
	}
157
158
	/**
159
	* Test whether given expression represents a single character usable in a character class
160
	*
161
	* @param  string $expr
162
	* @return bool
163
	*/
164 14
	protected function exprIsChar($expr)
165
	{
166
		$regexps = [
167
			// Escaped literal or escape sequence such as \w but not \R
168 14
			'(^\\\\[adefhnrstvwDHNSVW\\W]$)D',
169
170
			// Unicode properties such as \pL or \p{Lu}
171
			'(^\\\\p(?:.|\\{[^}]+\\})$)Di',
172
173
			// An escape sequence such as \x1F or \x{2600}
174
			'(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di'
175
		];
176
177 14
		return $this->matchesAny($expr, $regexps);
178
	}
179
180
	/**
181
	* Test whether given expression is quantifiable
182
	*
183
	* @param  string $expr
184
	* @return bool
185
	*/
186 14
	protected function exprIsQuantifiable($expr)
187
	{
188
		$regexps = [
189
			// A dot or \R
190 14
			'(^(?:\\.|\\\\R)$)D',
191
192
			// A character class
193
			'(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D'
194
		];
195
196 14
		return $this->matchesAny($expr, $regexps) || $this->exprIsChar($expr);
197
	}
198
199
	/**
200
	* Test whether given expression matches any of the given regexps
201
	*
202
	* @param  string   $expr
203
	* @param  string[] $regexps
204
	* @return bool
205
	*/
206 14
	protected function matchesAny($expr, array $regexps)
207
	{
208 14
		foreach ($regexps as $regexp)
209
		{
210 14
			if (preg_match($regexp, $expr))
211
			{
212 14
				return true;
213
			}
214
		}
215
216 14
		return false;
217
	}
218
}