Completed
Branch MetaCharacters (da2f1d)
by Josh
01:29
created

MetaCharacters::add()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 18
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 18
ccs 11
cts 11
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 10
nc 3
nop 2
crap 3
1
<?php
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder;
9
10
use InvalidArgumentException;
11
use s9e\RegexpBuilder\Input\InputInterface;
12
13
class MetaCharacters
14
{
15
	/**
16
	* @const Bit value that indicates whether a meta-character represents a single character
17
	*/
18
	const IS_CHAR = 1;
19
20
	/**
21
	* @const Bit value that indicates whether a meta-character represents a quantifiable expression
22
	*/
23
	const IS_QUANTIFIABLE = 2;
24
25
	/**
26
	* @var array Map of meta values and the expression they represent
27
	*/
28
	protected $exprs = [];
29
30
	/**
31
	* @var InputInterface
32
	*/
33
	protected $input;
34
35
	/**
36
	* @var array Map of meta-characters' codepoints and their value
37
	*/
38
	protected $meta = [];
39
40
	/**
41
	* @param InputInterface $input
42
	*/
43 13
	public function __construct(InputInterface $input)
44
	{
45 13
		$this->input = $input;
46 13
	}
47
48
	/**
49
	* Add a meta-character to the list
50
	*
51
	* @param  string $char Meta-character
52
	* @param  string $expr Regular expression
53
	* @return void
54
	*/
55 12
	public function add($char, $expr)
56
	{
57 12
		$split = $this->input->split($char);
58 12
		if (count($split) !== 1)
59
		{
60 1
			throw new InvalidArgumentException('Meta-characters must be represented by exactly one character');
61
		}
62 11
		if (@preg_match('(' . $expr . ')u', '') === false)
63
		{
64 1
			throw new InvalidArgumentException("Invalid expression '" . $expr . "'");
65
		}
66
67 10
		$inputValue = $split[0];
68 10
		$metaValue  = $this->computeValue($expr);
69
70 10
		$this->exprs[$metaValue] = $expr;
71 10
		$this->meta[$inputValue] = $metaValue;
72 10
	}
73
74
	/**
75
	* Get the expression associated with a meta value
76
	*
77
	* @param  integer $metaValue
78
	* @return string
79
	*/
80 2
	public function getExpression($metaValue)
81
	{
82 2
		if (!isset($this->exprs[$metaValue]))
83
		{
84 1
			throw new InvalidArgumentException('Invalid meta value ' . $metaValue);
85
		}
86
87 1
		return $this->exprs[$metaValue];
88
	}
89
90
	/**
91
	* Return whether a given value represents a single character
92
	*
93
	* @param  integer $value
94
	* @return bool
95
	*/
96 9
	public static function isChar($value)
97
	{
98 9
		return ($value >= 0 || ($value & self::IS_CHAR));
99
	}
100
101
	/**
102
	* Return whether a given value represents a quantifiable expression
103
	*
104
	* @param  integer $value
105
	* @return bool
106
	*/
107 9
	public static function isQuantifiable($value)
108
	{
109 9
		return ($value >= 0 || ($value & self::IS_QUANTIFIABLE));
110
	}
111
112
	/**
113
	* Replace values from meta-characters in a list of strings with their meta value
114
	*
115
	* @param  array[] $strings
116
	* @return array[]
117
	*/
118 10
	public function replaceMeta(array $strings)
119
	{
120 10
		foreach ($strings as &$string)
121
		{
122 10
			foreach ($string as &$value)
123
			{
124 10
				if (isset($this->meta[$value]))
125
				{
126 10
					$value = $this->meta[$value];
127
				}
128
			}
129
		}
130
131 10
		return $strings;
132
	}
133
134
	/**
135
	* Compute and return a value for given expression
136
	*
137
	* Values are meant to be a unique negative integer. The last 2 bits indicate whether the
138
	* expression is quantifiable and/or represents a single character.
139
	*
140
	* @param  string  $expr Regular expression
141
	* @return integer
142
	*/
143 10
	protected function computeValue($expr)
144
	{
145 10
		$value = (1 + count($this->meta)) * -4;
146 10
		if ($this->exprIsChar($expr))
147
		{
148 5
			$value |= self::IS_CHAR;
149
		}
150 10
		if ($this->exprIsQuantifiable($expr))
151
		{
152 5
			$value |= self::IS_QUANTIFIABLE;
153
		}
154
155 10
		return $value;
156
	}
157
158
	/**
159
	* Test whether given expression represents a single character usable in a character class
160
	*
161
	* @param  string $expr
162
	* @return bool
163
	*/
164 10
	protected function exprIsChar($expr)
165
	{
166
		$regexps = [
167
			// A dot
168 10
			'(^\\.$)D',
169
170
			// Escaped literal or escape sequence such as \w but not \R
171
			'(^\\\\[adefhnrstvwDHNSVW\\W]$)D',
172
173
			// Unicode properties such as \pL or \p{Lu}
174
			'(^\\\\p(?:.|\\{[^}]+\\})$)Di',
175
176
			// An escape sequence such as \x1F or \x{2600}
177
			'(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di',
178
179
			// A character class
180
			'(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D'
181
		];
182 10
		foreach ($regexps as $regexp)
183
		{
184 10
			if (preg_match($regexp, $expr))
185
			{
186 10
				return true;
187
			}
188
		}
189
190 5
		return false;
191
	}
192
193
	/**
194
	* Test whether given expression is quantifiable
195
	*
196
	* @param  string $expr
197
	* @return bool
198
	*/
199 10
	protected function exprIsQuantifiable($expr)
200
	{
201 10
		return $this->exprIsChar($expr);
202
	}
203
}