1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package s9e\RegexpBuilder |
5
|
|
|
* @copyright Copyright (c) 2016-2018 The s9e Authors |
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
7
|
|
|
*/ |
8
|
|
|
namespace s9e\RegexpBuilder; |
9
|
|
|
|
10
|
|
|
use InvalidArgumentException; |
11
|
|
|
use s9e\RegexpBuilder\Input\InputInterface; |
12
|
|
|
|
13
|
|
|
class MetaCharacters |
14
|
|
|
{ |
15
|
|
|
/** |
16
|
|
|
* @const Bit value that indicates whether a meta-character represents a single character |
17
|
|
|
*/ |
18
|
|
|
const IS_CHAR = 1; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @const Bit value that indicates whether a meta-character represents a quantifiable expression |
22
|
|
|
*/ |
23
|
|
|
const IS_QUANTIFIABLE = 2; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var array Map of meta values and the expression they represent |
27
|
|
|
*/ |
28
|
|
|
protected $exprs = []; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @var InputInterface |
32
|
|
|
*/ |
33
|
|
|
protected $input; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @var array Map of meta-characters' codepoints and their value |
37
|
|
|
*/ |
38
|
|
|
protected $meta = []; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* @param InputInterface $input |
42
|
|
|
*/ |
43
|
|
|
public function __construct(InputInterface $input) |
44
|
|
|
{ |
45
|
|
|
$this->input = $input; |
46
|
|
|
} |
47
|
|
|
|
48
|
|
|
/** |
49
|
|
|
* Add a meta-character to the list |
50
|
|
|
* |
51
|
|
|
* @param string $char Meta-character |
52
|
|
|
* @param string $expr Regular expression |
53
|
|
|
* @return void |
54
|
|
|
*/ |
55
|
|
|
public function add($char, $expr) |
56
|
|
|
{ |
57
|
|
|
$split = $this->input->split($char); |
58
|
|
|
if (count($split) !== 1) |
59
|
|
|
{ |
60
|
|
|
throw new InvalidArgumentException('Meta-characters must be represented by exactly one character'); |
61
|
|
|
} |
62
|
|
|
if (@preg_match('(' . $expr . ')u', '') === false) |
63
|
|
|
{ |
64
|
|
|
throw new InvalidArgumentException("Invalid expression '" . $expr . "'"); |
65
|
|
|
} |
66
|
|
|
|
67
|
|
|
$inputValue = $split[0]; |
68
|
|
|
$metaValue = $this->computeValue($expr); |
69
|
|
|
|
70
|
|
|
$this->exprs[$metaValue] = $expr; |
71
|
|
|
$this->meta[$inputValue] = $metaValue; |
72
|
|
|
} |
73
|
|
|
|
74
|
|
|
/** |
75
|
|
|
* Get the expression associated with a meta value |
76
|
|
|
* |
77
|
|
|
* @param integer $metaValue |
78
|
|
|
* @return string |
79
|
|
|
*/ |
80
|
|
|
public function getExpression($metaValue) |
81
|
|
|
{ |
82
|
|
|
if (!isset($this->exprs[$metaValue])) |
83
|
|
|
{ |
84
|
|
|
throw new InvalidArgumentException('Invalid meta value ' . $metaValue); |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
return $this->exprs[$metaValue]; |
88
|
|
|
} |
89
|
|
|
|
90
|
|
|
/** |
91
|
|
|
* Return whether a given value represents a single character |
92
|
|
|
* |
93
|
|
|
* @param integer $value |
94
|
|
|
* @return bool |
95
|
|
|
*/ |
96
|
|
|
public function isChar($value) |
97
|
|
|
{ |
98
|
|
|
return ($value >= 0 || ($value & self::IS_CHAR)); |
99
|
|
|
} |
100
|
|
|
|
101
|
|
|
/** |
102
|
|
|
* Return whether a given value represents a quantifiable expression |
103
|
|
|
* |
104
|
|
|
* @param integer $value |
105
|
|
|
* @return bool |
106
|
|
|
*/ |
107
|
|
|
public function isQuantifiable($value) |
108
|
|
|
{ |
109
|
|
|
return ($value >= 0 || ($value & self::IS_QUANTIFIABLE)); |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* Replace values from meta-characters in a list of strings with their meta value |
114
|
|
|
* |
115
|
|
|
* @param array[] $strings |
116
|
|
|
* @return array[] |
117
|
|
|
*/ |
118
|
|
|
public function replaceMeta(array $strings) |
119
|
|
|
{ |
120
|
|
|
foreach ($strings as &$string) |
121
|
|
|
{ |
122
|
|
|
foreach ($string as &$value) |
123
|
|
|
{ |
124
|
|
|
if (isset($this->meta[$value])) |
125
|
|
|
{ |
126
|
|
|
$value = $this->meta[$value]; |
127
|
|
|
} |
128
|
|
|
} |
129
|
|
|
} |
130
|
|
|
|
131
|
|
|
return $strings; |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
/** |
135
|
|
|
* Compute and return a value for given expression |
136
|
|
|
* |
137
|
|
|
* Values are meant to be a unique negative integer. The last 2 bits indicate whether the |
138
|
|
|
* expression is quantifiable and/or represents a single character. |
139
|
|
|
* |
140
|
|
|
* @param string $expr Regular expression |
141
|
|
|
* @return integer |
142
|
|
|
*/ |
143
|
|
|
protected function computeValue($expr) |
144
|
|
|
{ |
145
|
|
|
$value = (1 + count($this->meta)) * -4; |
146
|
|
|
if ($this->exprIsChar($expr)) |
147
|
|
|
{ |
148
|
|
|
$value |= self::IS_CHAR; |
149
|
|
|
} |
150
|
|
|
if ($this->exprIsQuantifiable($expr)) |
151
|
|
|
{ |
152
|
|
|
$value |= self::IS_QUANTIFIABLE; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
return $value; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* Test whether given expression represents a single character usable in a character class |
160
|
|
|
* |
161
|
|
|
* @param string $expr |
162
|
|
|
* @return bool |
163
|
|
|
*/ |
164
|
|
View Code Duplication |
protected function exprIsChar($expr) |
|
|
|
|
165
|
|
|
{ |
166
|
|
|
$regexps = [ |
167
|
|
|
// Escaped literal or escape sequence such as \w but not \R |
168
|
|
|
'(^\\\\[adefhnrstvwDHNSVW\\W]$)D', |
169
|
|
|
|
170
|
|
|
// Unicode properties such as \pL or \p{Lu} |
171
|
|
|
'(^\\\\p(?:.|\\{[^}]+\\})$)Di', |
172
|
|
|
|
173
|
|
|
// An escape sequence such as \x1F or \x{2600} |
174
|
|
|
'(^\\\\x(?:[0-9a-f]{2}|\\{[^}]+\\})$)Di' |
175
|
|
|
]; |
176
|
|
|
foreach ($regexps as $regexp) |
177
|
|
|
{ |
178
|
|
|
if (preg_match($regexp, $expr)) |
179
|
|
|
{ |
180
|
|
|
return true; |
181
|
|
|
} |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
return false; |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
/** |
188
|
|
|
* Test whether given expression is quantifiable |
189
|
|
|
* |
190
|
|
|
* @param string $expr |
191
|
|
|
* @return bool |
192
|
|
|
*/ |
193
|
|
View Code Duplication |
protected function exprIsQuantifiable($expr) |
|
|
|
|
194
|
|
|
{ |
195
|
|
|
$regexps = [ |
196
|
|
|
// A dot or \R |
197
|
|
|
'(^(?:\\.|\\\\R)$)D', |
198
|
|
|
|
199
|
|
|
// A character class |
200
|
|
|
'(^\\[\\^?(?:([^\\\\\\]]|\\\\.)(?:-(?-1))?)++\\]$)D' |
201
|
|
|
]; |
202
|
|
|
foreach ($regexps as $regexp) |
203
|
|
|
{ |
204
|
|
|
if (preg_match($regexp, $expr)) |
205
|
|
|
{ |
206
|
|
|
return true; |
207
|
|
|
} |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
return $this->exprIsChar($expr); |
211
|
|
|
} |
212
|
|
|
} |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.