|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* @package s9e\RegexpBuilder |
|
5
|
|
|
* @copyright Copyright (c) 2016-2017 The s9e Authors |
|
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
|
7
|
|
|
*/ |
|
8
|
|
|
namespace s9e\RegexpBuilder; |
|
9
|
|
|
|
|
10
|
|
|
use InvalidArgumentException; |
|
11
|
|
|
use s9e\RegexpBuilder\Input\InputInterface; |
|
12
|
|
|
|
|
13
|
|
|
class MetaCharacters |
|
14
|
|
|
{ |
|
15
|
|
|
/** |
|
16
|
|
|
* @const Bit value that indicates whether a meta character represents a single character |
|
17
|
|
|
*/ |
|
18
|
|
|
const IS_CHAR = 1; |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* @const Bit value that indicates whether a meta character represents a quantifiable expression |
|
22
|
|
|
*/ |
|
23
|
|
|
const IS_QUANTIFIABLE = 2; |
|
24
|
|
|
|
|
25
|
|
|
/** |
|
26
|
|
|
* @var array Map of meta values and the expression they represent |
|
27
|
|
|
*/ |
|
28
|
|
|
protected $exprs = []; |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* @var InputInterface |
|
32
|
|
|
*/ |
|
33
|
|
|
protected $input; |
|
34
|
|
|
|
|
35
|
|
|
/** |
|
36
|
|
|
* @var array Map of meta characters' codepoints and their value |
|
37
|
|
|
*/ |
|
38
|
|
|
protected $meta = []; |
|
39
|
|
|
|
|
40
|
|
|
/** |
|
41
|
|
|
* @param InputInterface $input |
|
42
|
|
|
*/ |
|
43
|
11 |
|
public function __construct(InputInterface $input) |
|
44
|
|
|
{ |
|
45
|
11 |
|
$this->input = $input; |
|
46
|
11 |
|
} |
|
47
|
|
|
|
|
48
|
|
|
/** |
|
49
|
|
|
* Add a meta character to the list |
|
50
|
|
|
* |
|
51
|
|
|
* @param string $char Meta character |
|
52
|
|
|
* @param string $expr Regular expression |
|
53
|
|
|
* @return void |
|
54
|
|
|
*/ |
|
55
|
11 |
|
public function add($char, $expr) |
|
56
|
|
|
{ |
|
57
|
11 |
|
$split = $this->input->split($char); |
|
58
|
11 |
|
if (count($split) !== 1) |
|
59
|
|
|
{ |
|
60
|
1 |
|
throw new InvalidArgumentException('Meta characters must be represented by exactly one character'); |
|
61
|
|
|
} |
|
62
|
10 |
View Code Duplication |
if (@preg_match('(' . $expr . ')u', '') === false) |
|
|
|
|
|
|
63
|
|
|
{ |
|
64
|
1 |
|
throw new InvalidArgumentException("Invalid expression '" . $expr . "'"); |
|
65
|
|
|
} |
|
66
|
|
|
|
|
67
|
9 |
|
$inputValue = $split[0]; |
|
68
|
9 |
|
$metaValue = $this->computeValue($expr); |
|
69
|
|
|
|
|
70
|
9 |
|
$this->exprs[$metaValue] = $expr; |
|
71
|
9 |
|
$this->meta[$inputValue] = $metaValue; |
|
72
|
9 |
|
} |
|
73
|
|
|
|
|
74
|
|
|
/** |
|
75
|
|
|
* Get the expression associated with a meta value |
|
76
|
|
|
* |
|
77
|
|
|
* @param integer $metaValue |
|
78
|
|
|
* @return string |
|
79
|
|
|
*/ |
|
80
|
|
|
public function getExpression($metaValue) |
|
81
|
|
|
{ |
|
82
|
|
|
if (!isset($this->exprs[$metaValue])) |
|
83
|
|
|
{ |
|
84
|
|
|
throw new InvalidArgumentException('Invalid meta value ' . $metaValue); |
|
85
|
|
|
} |
|
86
|
|
|
|
|
87
|
|
|
return $this->exprs[$metaValue]; |
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
/** |
|
91
|
|
|
* Get the value set for a meta character |
|
92
|
|
|
* |
|
93
|
|
|
* @param string $char |
|
94
|
|
|
* @return integer |
|
95
|
|
|
*/ |
|
96
|
|
|
public function getValue($char) |
|
97
|
|
|
{ |
|
98
|
|
|
if (!$this->isMeta($char)) |
|
99
|
|
|
{ |
|
100
|
|
|
throw new InvalidArgumentException('Invalid meta character ' . $char); |
|
101
|
|
|
} |
|
102
|
|
|
|
|
103
|
|
|
return $this->values[$char]; |
|
|
|
|
|
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
/** |
|
107
|
|
|
* Return whether a given value represents a single character |
|
108
|
|
|
* |
|
109
|
|
|
* @param integer $value |
|
110
|
|
|
* @return bool |
|
111
|
|
|
*/ |
|
112
|
9 |
|
public static function isChar($value) |
|
113
|
|
|
{ |
|
114
|
9 |
|
return ($value >= 0 || ($value & self::IS_CHAR)); |
|
115
|
|
|
} |
|
116
|
|
|
|
|
117
|
|
|
/** |
|
118
|
|
|
* Return whether a given value represents a quantifiable expression |
|
119
|
|
|
* |
|
120
|
|
|
* @param integer $value |
|
121
|
|
|
* @return bool |
|
122
|
|
|
*/ |
|
123
|
9 |
|
public static function isQuantifiable($value) |
|
124
|
|
|
{ |
|
125
|
9 |
|
return ($value >= 0 || ($value & self::IS_QUANTIFIABLE)); |
|
126
|
|
|
} |
|
127
|
|
|
|
|
128
|
|
|
/** |
|
129
|
|
|
* Return whether a given character is a meta character |
|
130
|
|
|
* |
|
131
|
|
|
* @param string $char |
|
132
|
|
|
* @return bool |
|
133
|
|
|
*/ |
|
134
|
|
|
public function isMeta($char) |
|
135
|
|
|
{ |
|
136
|
|
|
return isset($this->values[$char]); |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
|
|
/** |
|
140
|
|
|
* |
|
141
|
|
|
* |
|
142
|
|
|
* @param array[] $strings |
|
143
|
|
|
* @return array[] |
|
144
|
|
|
*/ |
|
145
|
9 |
|
public function replaceMeta(array $strings) |
|
146
|
|
|
{ |
|
147
|
9 |
|
foreach ($strings as &$string) |
|
148
|
|
|
{ |
|
149
|
9 |
|
foreach ($string as &$value) |
|
150
|
|
|
{ |
|
151
|
9 |
|
if (isset($this->meta[$value])) |
|
152
|
|
|
{ |
|
153
|
9 |
|
$value = $this->meta[$value]; |
|
154
|
|
|
} |
|
155
|
|
|
} |
|
156
|
9 |
|
unset($value); |
|
157
|
|
|
} |
|
158
|
9 |
|
unset($string); |
|
159
|
|
|
|
|
160
|
9 |
|
return $strings; |
|
161
|
|
|
} |
|
162
|
|
|
|
|
163
|
|
|
/** |
|
164
|
|
|
* Compute and return a value for given expression |
|
165
|
|
|
* |
|
166
|
|
|
* Values are meant to be a unique negative integer. The last 2 bits indicate whether the |
|
167
|
|
|
* expression is quantifiable and/or represents a single character. |
|
168
|
|
|
* |
|
169
|
|
|
* @param string $expr Regular expression |
|
170
|
|
|
* @return integer |
|
171
|
|
|
*/ |
|
172
|
9 |
|
protected function computeValue($expr) |
|
173
|
|
|
{ |
|
174
|
9 |
|
$value = (1 + count($this->meta)) * -4; |
|
175
|
9 |
|
if ($this->exprIsChar($expr)) |
|
176
|
|
|
{ |
|
177
|
5 |
|
$value |= self::IS_CHAR; |
|
178
|
|
|
} |
|
179
|
9 |
|
if ($this->exprIsQuantifiable($expr)) |
|
180
|
|
|
{ |
|
181
|
5 |
|
$value |= self::IS_QUANTIFIABLE; |
|
182
|
|
|
} |
|
183
|
|
|
|
|
184
|
9 |
|
return $value; |
|
185
|
|
|
} |
|
186
|
|
|
|
|
187
|
|
|
/** |
|
188
|
|
|
* Test whether given expression represents a single character usable in a character class |
|
189
|
|
|
* |
|
190
|
|
|
* @param string $expr |
|
191
|
|
|
* @return bool |
|
192
|
|
|
*/ |
|
193
|
9 |
|
protected function exprIsChar($expr) |
|
194
|
|
|
{ |
|
195
|
|
|
$regexps = [ |
|
196
|
|
|
// A dot |
|
197
|
9 |
|
'(^\\.$)D', |
|
198
|
|
|
|
|
199
|
|
|
// Escaped literal or escape sequence such as \w but not \R |
|
200
|
|
|
'(^\\\\[adefhnrstvwDHNSVW\\W]$)D', |
|
201
|
|
|
|
|
202
|
|
|
// Unicode properties such as \pL or \p{Lu} |
|
203
|
|
|
'(^\\\\p(?:.|\\{[^}]+\\})$)Di', |
|
204
|
|
|
|
|
205
|
|
|
// An escape sequence such as \x1F or \x{2600} |
|
206
|
|
|
'(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di', |
|
207
|
|
|
|
|
208
|
|
|
// A character class |
|
209
|
|
|
'(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D' |
|
210
|
|
|
]; |
|
211
|
9 |
|
foreach ($regexps as $regexp) |
|
212
|
|
|
{ |
|
213
|
9 |
|
if (preg_match($regexp, $expr)) |
|
214
|
|
|
{ |
|
215
|
9 |
|
return true; |
|
216
|
|
|
} |
|
217
|
|
|
} |
|
218
|
|
|
|
|
219
|
4 |
|
return false; |
|
220
|
|
|
} |
|
221
|
|
|
|
|
222
|
|
|
/** |
|
223
|
|
|
* Test whether given expression is quantifiable |
|
224
|
|
|
* |
|
225
|
|
|
* @param string $expr |
|
226
|
|
|
* @return bool |
|
227
|
|
|
*/ |
|
228
|
9 |
|
protected function exprIsQuantifiable($expr) |
|
229
|
|
|
{ |
|
230
|
9 |
|
return $this->exprIsChar($expr); |
|
231
|
|
|
} |
|
232
|
|
|
|
|
233
|
|
|
/** |
|
234
|
|
|
* Validate a meta character |
|
235
|
|
|
* |
|
236
|
|
|
* @param string $char |
|
237
|
|
|
* @return void |
|
238
|
|
|
*/ |
|
239
|
|
|
protected function validateChar($char) |
|
240
|
|
|
{ |
|
241
|
|
|
$split = $this->input->split($char); |
|
242
|
|
|
if (count($split) !== 1) |
|
243
|
|
|
{ |
|
244
|
|
|
throw new InvalidArgumentException('Meta characters must be represented by exactly one character'); |
|
245
|
|
|
} |
|
246
|
|
|
} |
|
247
|
|
|
|
|
248
|
|
|
/** |
|
249
|
|
|
* Validate a regular expression |
|
250
|
|
|
* |
|
251
|
|
|
* @param string $expr |
|
252
|
|
|
* @return void |
|
253
|
|
|
*/ |
|
254
|
|
|
protected function validateExpr($expr) |
|
255
|
|
|
{ |
|
256
|
|
View Code Duplication |
if (@preg_match('(' . $expr . ')u', '') === false) |
|
|
|
|
|
|
257
|
|
|
{ |
|
258
|
|
|
throw new InvalidArgumentException("Invalid expression '" . $expr . "'"); |
|
259
|
|
|
} |
|
260
|
|
|
} |
|
261
|
|
|
} |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.