|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* @package s9e\RegexpBuilder |
|
5
|
|
|
* @copyright Copyright (c) 2016-2018 The s9e Authors |
|
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
|
7
|
|
|
*/ |
|
8
|
|
|
namespace s9e\RegexpBuilder; |
|
9
|
|
|
|
|
10
|
|
|
use s9e\RegexpBuilder\MetaCharacters; |
|
11
|
|
|
use s9e\RegexpBuilder\Output\OutputInterface; |
|
12
|
|
|
|
|
13
|
|
|
class Serializer |
|
14
|
|
|
{ |
|
15
|
|
|
/** |
|
16
|
|
|
* @var Escaper |
|
17
|
|
|
*/ |
|
18
|
|
|
protected $escaper; |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* @var MetaCharacters |
|
22
|
|
|
*/ |
|
23
|
|
|
protected $meta; |
|
24
|
|
|
|
|
25
|
|
|
/** |
|
26
|
|
|
* @var OutputInterface |
|
27
|
|
|
*/ |
|
28
|
13 |
|
protected $output; |
|
29
|
|
|
|
|
30
|
13 |
|
/** |
|
31
|
13 |
|
* @param OutputInterface $output |
|
32
|
13 |
|
* @parm MetaCharacters $meta |
|
33
|
|
|
* @param Escaper $escaper |
|
34
|
|
|
*/ |
|
35
|
|
|
public function __construct(OutputInterface $output, MetaCharacters $meta, Escaper $escaper) |
|
36
|
|
|
{ |
|
37
|
|
|
$this->escaper = $escaper; |
|
38
|
|
|
$this->meta = $meta; |
|
39
|
|
|
$this->output = $output; |
|
40
|
13 |
|
} |
|
41
|
|
|
|
|
42
|
13 |
|
/** |
|
43
|
13 |
|
* Serialize given strings into a regular expression |
|
44
|
13 |
|
* |
|
45
|
|
|
* @param array[] $strings |
|
46
|
13 |
|
* @return string |
|
47
|
|
|
*/ |
|
48
|
3 |
|
public function serializeStrings(array $strings) |
|
49
|
|
|
{ |
|
50
|
|
|
$info = $this->analyzeStrings($strings); |
|
51
|
13 |
|
$alternations = array_map([$this, 'serializeString'], $info['strings']); |
|
52
|
|
|
if (!empty($info['chars'])) |
|
53
|
|
|
{ |
|
54
|
|
|
// Prepend the character class to the list of alternations |
|
55
|
|
|
array_unshift($alternations, $this->serializeCharacterClass($info['chars'])); |
|
56
|
|
|
} |
|
57
|
|
|
|
|
58
|
|
|
$expr = implode('|', $alternations); |
|
59
|
|
|
if ($this->needsParentheses($info)) |
|
60
|
|
|
{ |
|
61
|
|
|
$expr = '(?:' . $expr . ')'; |
|
62
|
|
|
} |
|
63
|
|
|
|
|
64
|
|
|
return $expr . $info['quantifier']; |
|
65
|
|
|
} |
|
66
|
13 |
|
|
|
67
|
|
|
/** |
|
68
|
13 |
|
* Analyze given strings to determine how to serialize them |
|
69
|
13 |
|
* |
|
70
|
|
|
* The returned array may contains any of the following elements: |
|
71
|
4 |
|
* |
|
72
|
4 |
|
* - (string) quantifier Either '' or '?' |
|
73
|
|
|
* - (array) chars List of values from single-char strings |
|
74
|
|
|
* - (array) strings List of multi-char strings |
|
75
|
13 |
|
* |
|
76
|
13 |
|
* @param array[] $strings |
|
77
|
|
|
* @return array |
|
78
|
10 |
|
*/ |
|
79
|
10 |
|
protected function analyzeStrings(array $strings) |
|
80
|
|
|
{ |
|
81
|
|
|
$info = ['alternationsCount' => 0, 'quantifier' => '']; |
|
82
|
13 |
|
if ($strings[0] === []) |
|
83
|
|
|
{ |
|
84
|
13 |
|
$info['quantifier'] = '?'; |
|
85
|
|
|
unset($strings[0]); |
|
86
|
|
|
} |
|
87
|
|
|
|
|
88
|
|
|
$chars = $this->getChars($strings); |
|
89
|
|
|
if (count($chars) > 1) |
|
90
|
|
|
{ |
|
91
|
|
|
++$info['alternationsCount']; |
|
92
|
|
|
$info['chars'] = array_values($chars); |
|
93
|
13 |
|
$strings = array_diff_key($strings, $chars); |
|
94
|
|
|
} |
|
95
|
13 |
|
|
|
96
|
13 |
|
$info['strings'] = array_values($strings); |
|
97
|
|
|
$info['alternationsCount'] += count($strings); |
|
98
|
10 |
|
|
|
99
|
|
|
return $info; |
|
100
|
13 |
|
} |
|
101
|
|
|
|
|
102
|
5 |
|
/** |
|
103
|
|
|
* Return the portion of strings that are composed of a single character |
|
104
|
|
|
* |
|
105
|
13 |
|
* @param array[] |
|
106
|
|
|
* @return array String key => value |
|
107
|
|
|
*/ |
|
108
|
|
|
protected function getChars(array $strings) |
|
109
|
|
|
{ |
|
110
|
|
|
$chars = []; |
|
111
|
|
|
foreach ($strings as $k => $string) |
|
112
|
|
|
{ |
|
113
|
|
|
if ($this->isChar($string)) |
|
114
|
13 |
|
{ |
|
115
|
|
|
$chars[$k] = $string[0]; |
|
116
|
13 |
|
} |
|
117
|
13 |
|
} |
|
118
|
|
|
|
|
119
|
13 |
|
return $chars; |
|
120
|
|
|
} |
|
121
|
13 |
|
|
|
122
|
|
|
/** |
|
123
|
|
|
* Get the list of ranges that cover all given values |
|
124
|
|
|
* |
|
125
|
13 |
|
* @param integer[] $values Ordered list of values |
|
126
|
|
|
* @return array[] List of ranges in the form [start, end] |
|
127
|
|
|
*/ |
|
128
|
|
|
protected function getRanges(array $values) |
|
129
|
|
|
{ |
|
130
|
|
|
$i = 0; |
|
131
|
|
|
$cnt = count($values); |
|
132
|
|
|
$start = $values[0]; |
|
133
|
|
|
$end = $start; |
|
134
|
10 |
|
$ranges = []; |
|
135
|
|
|
while (++$i < $cnt) |
|
136
|
10 |
|
{ |
|
137
|
10 |
|
if ($values[$i] === $end + 1) |
|
138
|
10 |
|
{ |
|
139
|
10 |
|
++$end; |
|
140
|
10 |
|
} |
|
141
|
10 |
|
else |
|
142
|
|
|
{ |
|
143
|
10 |
|
$ranges[] = [$start, $end]; |
|
144
|
|
|
$start = $end = $values[$i]; |
|
145
|
8 |
|
} |
|
146
|
|
|
} |
|
147
|
|
|
$ranges[] = [$start, $end]; |
|
148
|
|
|
|
|
149
|
7 |
|
return $ranges; |
|
150
|
7 |
|
} |
|
151
|
|
|
|
|
152
|
|
|
/** |
|
153
|
10 |
|
* Test whether given string represents a single character |
|
154
|
|
|
* |
|
155
|
10 |
|
* @param array $string |
|
156
|
|
|
* @return bool |
|
157
|
|
|
*/ |
|
158
|
|
|
protected function isChar(array $string) |
|
159
|
|
|
{ |
|
160
|
|
|
return count($string) === 1 && !is_array($string[0]) && $this->meta->isChar($string[0]); |
|
161
|
|
|
} |
|
162
|
|
|
|
|
163
|
|
|
/** |
|
164
|
13 |
|
* Test whether an expression is quantifiable based on the strings info |
|
165
|
|
|
* |
|
166
|
|
|
* @param array $info |
|
167
|
13 |
|
* @return bool |
|
168
|
|
|
*/ |
|
169
|
|
|
protected function isQuantifiable(array $info) |
|
170
|
|
|
{ |
|
171
|
|
|
$strings = $info['strings']; |
|
172
|
|
|
|
|
173
|
|
|
return empty($strings) || $this->isSingleQuantifiableString($strings); |
|
174
|
|
|
} |
|
175
|
|
|
|
|
176
|
10 |
|
/** |
|
177
|
|
|
* Test whether a list of strings contains only one single quantifiable string |
|
178
|
10 |
|
* |
|
179
|
10 |
|
* @param string[] $strings |
|
180
|
|
|
* @return bool |
|
181
|
10 |
|
*/ |
|
182
|
10 |
|
protected function isSingleQuantifiableString(array $strings) |
|
183
|
|
|
{ |
|
184
|
8 |
|
return count($strings) === 1 && count($strings[0]) === 1 && $this->meta->isQuantifiable($strings[0][0]); |
|
185
|
|
|
} |
|
186
|
1 |
|
|
|
187
|
|
|
/** |
|
188
|
10 |
|
* Test whether an expression needs parentheses based on the strings info |
|
189
|
|
|
* |
|
190
|
|
|
* @param array $info |
|
191
|
10 |
|
* @return bool |
|
192
|
|
|
*/ |
|
193
|
10 |
|
protected function needsParentheses(array $info) |
|
194
|
|
|
{ |
|
195
|
|
|
return ($info['alternationsCount'] > 1 || ($info['quantifier'] && !$this->isQuantifiable($info))); |
|
196
|
|
|
} |
|
197
|
|
|
|
|
198
|
|
|
/** |
|
199
|
|
|
* Serialize a given list of values into a character class |
|
200
|
|
|
* |
|
201
|
|
|
* @param integer[] $values |
|
202
|
5 |
|
* @return string |
|
203
|
|
|
*/ |
|
204
|
5 |
|
protected function serializeCharacterClass(array $values) |
|
205
|
5 |
|
{ |
|
206
|
|
|
$expr = '['; |
|
207
|
5 |
|
foreach ($this->getRanges($values) as list($start, $end)) |
|
208
|
|
|
{ |
|
209
|
|
|
$expr .= $this->serializeCharacterClassUnit($start); |
|
210
|
5 |
|
if ($end > $start) |
|
211
|
|
|
{ |
|
212
|
|
|
if ($end > $start + 1) |
|
213
|
|
|
{ |
|
214
|
|
|
$expr .= '-'; |
|
215
|
|
|
} |
|
216
|
|
|
$expr .= $this->serializeCharacterClassUnit($end); |
|
217
|
|
|
} |
|
218
|
|
|
} |
|
219
|
|
|
$expr .= ']'; |
|
220
|
|
|
|
|
221
|
|
|
return $expr; |
|
222
|
|
|
} |
|
223
|
|
|
|
|
224
|
|
|
/** |
|
225
|
|
|
* Serialize a given value to be used in a character class |
|
226
|
|
|
* |
|
227
|
|
|
* @param integer $value |
|
228
|
|
|
* @return string |
|
229
|
|
|
*/ |
|
230
|
|
|
protected function serializeCharacterClassUnit($value) |
|
231
|
|
|
{ |
|
232
|
|
|
return $this->serializeValue($value, 'escapeCharacterClass'); |
|
233
|
|
|
} |
|
234
|
|
|
|
|
235
|
|
|
/** |
|
236
|
|
|
* Serialize an element from a string |
|
237
|
|
|
* |
|
238
|
|
|
* @param array|integer $element |
|
239
|
|
|
* @return string |
|
240
|
|
|
*/ |
|
241
|
|
|
protected function serializeElement($element) |
|
242
|
|
|
{ |
|
243
|
|
|
return (is_array($element)) ? $this->serializeStrings($element) : $this->serializeLiteral($element); |
|
244
|
|
|
} |
|
245
|
|
|
|
|
246
|
|
|
/** |
|
247
|
|
|
* Serialize a given value to be used as a literal |
|
248
|
|
|
* |
|
249
|
|
|
* @param integer $value |
|
250
|
|
|
* @return string |
|
251
|
|
|
*/ |
|
252
|
|
|
protected function serializeLiteral($value) |
|
253
|
|
|
{ |
|
254
|
|
|
return $this->serializeValue($value, 'escapeLiteral'); |
|
255
|
|
|
} |
|
256
|
|
|
|
|
257
|
|
|
/** |
|
258
|
|
|
* Serialize a given string into a regular expression |
|
259
|
|
|
* |
|
260
|
|
|
* @param array $string |
|
261
|
|
|
* @return string |
|
262
|
|
|
*/ |
|
263
|
|
|
protected function serializeString(array $string) |
|
264
|
|
|
{ |
|
265
|
|
|
return implode('', array_map([$this, 'serializeElement'], $string)); |
|
266
|
|
|
} |
|
267
|
|
|
|
|
268
|
|
|
/** |
|
269
|
|
|
* Serialize a given value |
|
270
|
|
|
* |
|
271
|
|
|
* @param integer $value |
|
272
|
|
|
* @param string $escapeMethod |
|
273
|
|
|
* @return string |
|
274
|
|
|
*/ |
|
275
|
|
|
protected function serializeValue($value, $escapeMethod) |
|
276
|
|
|
{ |
|
277
|
|
|
return ($value < 0) ? $this->meta->getExpression($value) : $this->escaper->$escapeMethod($this->output->output($value)); |
|
278
|
|
|
} |
|
279
|
|
|
} |