1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package s9e\RegexpBuilder |
5
|
|
|
* @copyright Copyright (c) 2016-2018 The s9e Authors |
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
7
|
|
|
*/ |
8
|
|
|
namespace s9e\RegexpBuilder; |
9
|
|
|
|
10
|
|
|
use s9e\RegexpBuilder\MetaCharacters; |
11
|
|
|
use s9e\RegexpBuilder\Output\OutputInterface; |
12
|
|
|
|
13
|
|
|
class Serializer |
14
|
|
|
{ |
15
|
|
|
/** |
16
|
|
|
* @var Escaper |
17
|
|
|
*/ |
18
|
|
|
protected $escaper; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @var MetaCharacters |
22
|
|
|
*/ |
23
|
|
|
protected $meta; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var OutputInterface |
27
|
|
|
*/ |
28
|
13 |
|
protected $output; |
29
|
|
|
|
30
|
13 |
|
/** |
31
|
13 |
|
* @param OutputInterface $output |
32
|
13 |
|
* @parm MetaCharacters $meta |
33
|
|
|
* @param Escaper $escaper |
34
|
|
|
*/ |
35
|
|
|
public function __construct(OutputInterface $output, MetaCharacters $meta, Escaper $escaper) |
36
|
|
|
{ |
37
|
|
|
$this->escaper = $escaper; |
38
|
|
|
$this->meta = $meta; |
39
|
|
|
$this->output = $output; |
40
|
13 |
|
} |
41
|
|
|
|
42
|
13 |
|
/** |
43
|
13 |
|
* Serialize given strings into a regular expression |
44
|
13 |
|
* |
45
|
|
|
* @param array[] $strings |
46
|
13 |
|
* @return string |
47
|
|
|
*/ |
48
|
3 |
|
public function serializeStrings(array $strings) |
49
|
|
|
{ |
50
|
|
|
$info = $this->analyzeStrings($strings); |
51
|
13 |
|
$alternations = array_map([$this, 'serializeString'], $info['strings']); |
52
|
|
|
if (!empty($info['chars'])) |
53
|
|
|
{ |
54
|
|
|
// Prepend the character class to the list of alternations |
55
|
|
|
array_unshift($alternations, $this->serializeCharacterClass($info['chars'])); |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
$expr = implode('|', $alternations); |
59
|
|
|
if ($this->needsParentheses($info)) |
60
|
|
|
{ |
61
|
|
|
$expr = '(?:' . $expr . ')'; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
return $expr . $info['quantifier']; |
65
|
|
|
} |
66
|
13 |
|
|
67
|
|
|
/** |
68
|
13 |
|
* Analyze given strings to determine how to serialize them |
69
|
13 |
|
* |
70
|
|
|
* The returned array may contains any of the following elements: |
71
|
4 |
|
* |
72
|
4 |
|
* - (string) quantifier Either '' or '?' |
73
|
|
|
* - (array) chars List of values from single-char strings |
74
|
|
|
* - (array) strings List of multi-char strings |
75
|
13 |
|
* |
76
|
13 |
|
* @param array[] $strings |
77
|
|
|
* @return array |
78
|
10 |
|
*/ |
79
|
10 |
|
protected function analyzeStrings(array $strings) |
80
|
|
|
{ |
81
|
|
|
$info = ['alternationsCount' => 0, 'quantifier' => '']; |
82
|
13 |
|
if ($strings[0] === []) |
83
|
|
|
{ |
84
|
13 |
|
$info['quantifier'] = '?'; |
85
|
|
|
unset($strings[0]); |
86
|
|
|
} |
87
|
|
|
|
88
|
|
|
$chars = $this->getChars($strings); |
89
|
|
|
if (count($chars) > 1) |
90
|
|
|
{ |
91
|
|
|
++$info['alternationsCount']; |
92
|
|
|
$info['chars'] = array_values($chars); |
93
|
13 |
|
$strings = array_diff_key($strings, $chars); |
94
|
|
|
} |
95
|
13 |
|
|
96
|
13 |
|
$info['strings'] = array_values($strings); |
97
|
|
|
$info['alternationsCount'] += count($strings); |
98
|
10 |
|
|
99
|
|
|
return $info; |
100
|
13 |
|
} |
101
|
|
|
|
102
|
5 |
|
/** |
103
|
|
|
* Return the portion of strings that are composed of a single character |
104
|
|
|
* |
105
|
13 |
|
* @param array[] |
106
|
|
|
* @return array String key => value |
107
|
|
|
*/ |
108
|
|
|
protected function getChars(array $strings) |
109
|
|
|
{ |
110
|
|
|
$chars = []; |
111
|
|
|
foreach ($strings as $k => $string) |
112
|
|
|
{ |
113
|
|
|
if ($this->isChar($string)) |
114
|
13 |
|
{ |
115
|
|
|
$chars[$k] = $string[0]; |
116
|
13 |
|
} |
117
|
13 |
|
} |
118
|
|
|
|
119
|
13 |
|
return $chars; |
120
|
|
|
} |
121
|
13 |
|
|
122
|
|
|
/** |
123
|
|
|
* Get the list of ranges that cover all given values |
124
|
|
|
* |
125
|
13 |
|
* @param integer[] $values Ordered list of values |
126
|
|
|
* @return array[] List of ranges in the form [start, end] |
127
|
|
|
*/ |
128
|
|
|
protected function getRanges(array $values) |
129
|
|
|
{ |
130
|
|
|
$i = 0; |
131
|
|
|
$cnt = count($values); |
132
|
|
|
$start = $values[0]; |
133
|
|
|
$end = $start; |
134
|
10 |
|
$ranges = []; |
135
|
|
|
while (++$i < $cnt) |
136
|
10 |
|
{ |
137
|
10 |
|
if ($values[$i] === $end + 1) |
138
|
10 |
|
{ |
139
|
10 |
|
++$end; |
140
|
10 |
|
} |
141
|
10 |
|
else |
142
|
|
|
{ |
143
|
10 |
|
$ranges[] = [$start, $end]; |
144
|
|
|
$start = $end = $values[$i]; |
145
|
8 |
|
} |
146
|
|
|
} |
147
|
|
|
$ranges[] = [$start, $end]; |
148
|
|
|
|
149
|
7 |
|
return $ranges; |
150
|
7 |
|
} |
151
|
|
|
|
152
|
|
|
/** |
153
|
10 |
|
* Test whether given string represents a single character |
154
|
|
|
* |
155
|
10 |
|
* @param array $string |
156
|
|
|
* @return bool |
157
|
|
|
*/ |
158
|
|
|
protected function isChar(array $string) |
159
|
|
|
{ |
160
|
|
|
return count($string) === 1 && !is_array($string[0]) && $this->meta->isChar($string[0]); |
161
|
|
|
} |
162
|
|
|
|
163
|
|
|
/** |
164
|
13 |
|
* Test whether an expression is quantifiable based on the strings info |
165
|
|
|
* |
166
|
|
|
* @param array $info |
167
|
13 |
|
* @return bool |
168
|
|
|
*/ |
169
|
|
|
protected function isQuantifiable(array $info) |
170
|
|
|
{ |
171
|
|
|
$strings = $info['strings']; |
172
|
|
|
|
173
|
|
|
return empty($strings) || $this->isSingleQuantifiableString($strings); |
174
|
|
|
} |
175
|
|
|
|
176
|
10 |
|
/** |
177
|
|
|
* Test whether a list of strings contains only one single quantifiable string |
178
|
10 |
|
* |
179
|
10 |
|
* @param string[] $strings |
180
|
|
|
* @return bool |
181
|
10 |
|
*/ |
182
|
10 |
|
protected function isSingleQuantifiableString(array $strings) |
183
|
|
|
{ |
184
|
8 |
|
return count($strings) === 1 && count($strings[0]) === 1 && $this->meta->isQuantifiable($strings[0][0]); |
185
|
|
|
} |
186
|
1 |
|
|
187
|
|
|
/** |
188
|
10 |
|
* Test whether an expression needs parentheses based on the strings info |
189
|
|
|
* |
190
|
|
|
* @param array $info |
191
|
10 |
|
* @return bool |
192
|
|
|
*/ |
193
|
10 |
|
protected function needsParentheses(array $info) |
194
|
|
|
{ |
195
|
|
|
return ($info['alternationsCount'] > 1 || ($info['quantifier'] && !$this->isQuantifiable($info))); |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* Serialize a given list of values into a character class |
200
|
|
|
* |
201
|
|
|
* @param integer[] $values |
202
|
5 |
|
* @return string |
203
|
|
|
*/ |
204
|
5 |
|
protected function serializeCharacterClass(array $values) |
205
|
5 |
|
{ |
206
|
|
|
$expr = '['; |
207
|
5 |
|
foreach ($this->getRanges($values) as list($start, $end)) |
208
|
|
|
{ |
209
|
|
|
$expr .= $this->serializeCharacterClassUnit($start); |
210
|
5 |
|
if ($end > $start) |
211
|
|
|
{ |
212
|
|
|
if ($end > $start + 1) |
213
|
|
|
{ |
214
|
|
|
$expr .= '-'; |
215
|
|
|
} |
216
|
|
|
$expr .= $this->serializeCharacterClassUnit($end); |
217
|
|
|
} |
218
|
|
|
} |
219
|
|
|
$expr .= ']'; |
220
|
|
|
|
221
|
|
|
return $expr; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
/** |
225
|
|
|
* Serialize a given value to be used in a character class |
226
|
|
|
* |
227
|
|
|
* @param integer $value |
228
|
|
|
* @return string |
229
|
|
|
*/ |
230
|
|
|
protected function serializeCharacterClassUnit($value) |
231
|
|
|
{ |
232
|
|
|
return $this->serializeValue($value, 'escapeCharacterClass'); |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
/** |
236
|
|
|
* Serialize an element from a string |
237
|
|
|
* |
238
|
|
|
* @param array|integer $element |
239
|
|
|
* @return string |
240
|
|
|
*/ |
241
|
|
|
protected function serializeElement($element) |
242
|
|
|
{ |
243
|
|
|
return (is_array($element)) ? $this->serializeStrings($element) : $this->serializeLiteral($element); |
244
|
|
|
} |
245
|
|
|
|
246
|
|
|
/** |
247
|
|
|
* Serialize a given value to be used as a literal |
248
|
|
|
* |
249
|
|
|
* @param integer $value |
250
|
|
|
* @return string |
251
|
|
|
*/ |
252
|
|
|
protected function serializeLiteral($value) |
253
|
|
|
{ |
254
|
|
|
return $this->serializeValue($value, 'escapeLiteral'); |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
/** |
258
|
|
|
* Serialize a given string into a regular expression |
259
|
|
|
* |
260
|
|
|
* @param array $string |
261
|
|
|
* @return string |
262
|
|
|
*/ |
263
|
|
|
protected function serializeString(array $string) |
264
|
|
|
{ |
265
|
|
|
return implode('', array_map([$this, 'serializeElement'], $string)); |
266
|
|
|
} |
267
|
|
|
|
268
|
|
|
/** |
269
|
|
|
* Serialize a given value |
270
|
|
|
* |
271
|
|
|
* @param integer $value |
272
|
|
|
* @param string $escapeMethod |
273
|
|
|
* @return string |
274
|
|
|
*/ |
275
|
|
|
protected function serializeValue($value, $escapeMethod) |
276
|
|
|
{ |
277
|
|
|
return ($value < 0) ? $this->meta->getExpression($value) : $this->escaper->$escapeMethod($this->output->output($value)); |
278
|
|
|
} |
279
|
|
|
} |