1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package s9e\RegexpBuilder |
5
|
|
|
* @copyright Copyright (c) 2016 The s9e Authors |
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
7
|
|
|
*/ |
8
|
|
|
namespace s9e\RegexpBuilder; |
9
|
|
|
|
10
|
|
|
use s9e\RegexpBuilder\Output\OutputInterface; |
11
|
|
|
|
12
|
|
|
class Serializer |
13
|
|
|
{ |
14
|
|
|
/** |
15
|
|
|
* @var Escaper |
16
|
|
|
*/ |
17
|
|
|
protected $escaper; |
18
|
|
|
|
19
|
|
|
/** |
20
|
|
|
* @var OutputInterface |
21
|
|
|
*/ |
22
|
|
|
protected $output; |
23
|
|
|
|
24
|
|
|
/** |
25
|
|
|
* @param OutputInterface $output |
26
|
|
|
* @param Escaper $escaper |
27
|
|
|
*/ |
28
|
13 |
|
public function __construct(OutputInterface $output, Escaper $escaper) |
29
|
|
|
{ |
30
|
13 |
|
$this->escaper = $escaper; |
31
|
13 |
|
$this->output = $output; |
32
|
13 |
|
} |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Serialize given strings into a regular expression |
36
|
|
|
* |
37
|
|
|
* @param array[] $strings |
38
|
|
|
* @return string |
39
|
|
|
*/ |
40
|
13 |
|
public function serializeStrings(array $strings) |
41
|
|
|
{ |
42
|
13 |
|
$info = $this->analyzeStrings($strings); |
43
|
13 |
|
$alternations = $this->buildAlternations($info); |
44
|
13 |
|
$expr = implode('|', $alternations); |
45
|
|
|
|
46
|
13 |
|
if (count($alternations) > 1 || $this->isOneOptionalString($info)) |
47
|
|
|
{ |
48
|
3 |
|
$expr = '(?:' . $expr . ')'; |
49
|
|
|
} |
50
|
|
|
|
51
|
13 |
|
return $expr . $info['quantifier']; |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
/** |
55
|
|
|
* Analyze given strings to determine how to serialize them |
56
|
|
|
* |
57
|
|
|
* The returned array may contains any of the following elements: |
58
|
|
|
* |
59
|
|
|
* - (string) quantifier Either '' or '?' |
60
|
|
|
* - (array) chars List of values from single-char strings |
61
|
|
|
* - (array) strings List of multi-char strings |
62
|
|
|
* |
63
|
|
|
* @param array[] $strings |
64
|
|
|
* @return array |
65
|
|
|
*/ |
66
|
13 |
|
protected function analyzeStrings(array $strings) |
67
|
|
|
{ |
68
|
13 |
|
$info = ['quantifier' => '']; |
69
|
13 |
|
if ($strings[0] === []) |
70
|
|
|
{ |
71
|
4 |
|
$info['quantifier'] = '?'; |
72
|
4 |
|
unset($strings[0]); |
73
|
|
|
} |
74
|
|
|
|
75
|
13 |
|
$chars = $this->getChars($strings); |
76
|
13 |
|
if (count($chars) > 1) |
77
|
|
|
{ |
78
|
10 |
|
$info['chars'] = array_values($chars); |
79
|
10 |
|
$strings = array_diff_key($strings, $chars); |
80
|
|
|
} |
81
|
|
|
|
82
|
13 |
|
$info['strings'] = array_values($strings); |
83
|
|
|
|
84
|
13 |
|
return $info; |
85
|
|
|
} |
86
|
|
|
|
87
|
|
|
/** |
88
|
|
|
* Build the list of alternations based on given info |
89
|
|
|
* |
90
|
|
|
* @param array $info |
91
|
|
|
* @return string[] |
92
|
|
|
*/ |
93
|
13 |
|
protected function buildAlternations(array $info) |
94
|
|
|
{ |
95
|
13 |
|
$alternations = []; |
96
|
13 |
|
if (!empty($info['chars'])) |
97
|
|
|
{ |
98
|
10 |
|
$alternations[] = $this->serializeCharacterClass($info['chars']); |
99
|
|
|
} |
100
|
13 |
|
foreach ($info['strings'] as $string) |
101
|
|
|
{ |
102
|
5 |
|
$alternations[] = $this->serializeString($string); |
103
|
|
|
} |
104
|
|
|
|
105
|
13 |
|
return $alternations; |
106
|
|
|
} |
107
|
|
|
|
108
|
|
|
/** |
109
|
|
|
* Return the portion of strings that are composed of a single character |
110
|
|
|
* |
111
|
|
|
* @param array[] |
112
|
|
|
* @return array String key => codepoint |
113
|
|
|
*/ |
114
|
13 |
View Code Duplication |
protected function getChars(array $strings) |
|
|
|
|
115
|
|
|
{ |
116
|
13 |
|
$chars = []; |
117
|
13 |
|
foreach ($strings as $k => $string) |
118
|
|
|
{ |
119
|
13 |
|
if (count($string) === 1) |
120
|
|
|
{ |
121
|
13 |
|
$chars[$k] = $string[0]; |
122
|
|
|
} |
123
|
|
|
} |
124
|
|
|
|
125
|
13 |
|
return $chars; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
/** |
129
|
|
|
* Get the list of ranges that cover all given values |
130
|
|
|
* |
131
|
|
|
* @param integer[] $values Ordered list of values |
132
|
|
|
* @return array[] List of ranges in the form [start, end] |
133
|
|
|
*/ |
134
|
10 |
|
protected function getRanges(array $values) |
135
|
|
|
{ |
136
|
10 |
|
$i = 0; |
137
|
10 |
|
$cnt = count($values); |
138
|
10 |
|
$start = $values[0]; |
139
|
10 |
|
$end = $start; |
140
|
10 |
|
$ranges = []; |
141
|
10 |
|
while (++$i < $cnt) |
142
|
|
|
{ |
143
|
10 |
|
if ($values[$i] === $end + 1) |
144
|
|
|
{ |
145
|
8 |
|
++$end; |
146
|
|
|
} |
147
|
|
|
else |
148
|
|
|
{ |
149
|
7 |
|
$ranges[] = [$start, $end]; |
150
|
7 |
|
$start = $end = $values[$i]; |
151
|
|
|
} |
152
|
|
|
} |
153
|
10 |
|
$ranges[] = [$start, $end]; |
154
|
|
|
|
155
|
10 |
|
return $ranges; |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
/** |
159
|
|
|
* Test whether a string is optional and has more than one character |
160
|
|
|
* |
161
|
|
|
* @param array $info |
162
|
|
|
* @return bool |
163
|
|
|
*/ |
164
|
13 |
|
protected function isOneOptionalString(array $info) |
165
|
|
|
{ |
166
|
|
|
// Test whether the first string has a quantifier and more than one element |
167
|
13 |
|
return (!empty($info['quantifier']) && isset($info['strings'][0][1])); |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
/** |
171
|
|
|
* Serialize a given list of values into a character class |
172
|
|
|
* |
173
|
|
|
* @param integer[] $values |
174
|
|
|
* @return string |
175
|
|
|
*/ |
176
|
10 |
|
protected function serializeCharacterClass(array $values) |
177
|
|
|
{ |
178
|
10 |
|
$expr = '['; |
179
|
10 |
|
foreach ($this->getRanges($values) as list($start, $end)) |
180
|
|
|
{ |
181
|
10 |
|
$expr .= $this->escaper->escapeCharacterClass($this->output->output($start)); |
182
|
10 |
|
if ($end > $start) |
183
|
|
|
{ |
184
|
8 |
|
if ($end > $start + 1) |
185
|
|
|
{ |
186
|
1 |
|
$expr .= '-'; |
187
|
|
|
} |
188
|
10 |
|
$expr .= $this->escaper->escapeCharacterClass($this->output->output($end)); |
189
|
|
|
} |
190
|
|
|
} |
191
|
10 |
|
$expr .= ']'; |
192
|
|
|
|
193
|
10 |
|
return $expr; |
194
|
|
|
} |
195
|
|
|
|
196
|
|
|
/** |
197
|
|
|
* Serialize a given string into a regular expression |
198
|
|
|
* |
199
|
|
|
* @param array $string |
200
|
|
|
* @return string |
201
|
|
|
*/ |
202
|
5 |
|
protected function serializeString(array $string) |
203
|
|
|
{ |
204
|
5 |
|
$expr = ''; |
205
|
5 |
|
foreach ($string as $element) |
206
|
|
|
{ |
207
|
5 |
|
$expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element)); |
208
|
|
|
} |
209
|
|
|
|
210
|
5 |
|
return $expr; |
211
|
|
|
} |
212
|
|
|
} |
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.