Completed
Branch MetaCharacters (348a52)
by Josh
01:30
created

Serializer::serializeCharacterClass()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 19
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 10
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 19
ccs 10
cts 10
cp 1
rs 9.2
c 0
b 0
f 0
cc 4
eloc 10
nc 4
nop 1
crap 4
1
<?php
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder;
9
10
use s9e\RegexpBuilder\MetaCharacters;
11
use s9e\RegexpBuilder\Output\OutputInterface;
12
13
class Serializer
14
{
15
	/**
16
	* @var Escaper
17
	*/
18
	protected $escaper;
19
20
	/**
21
	* @var MetaCharacters
22
	*/
23
	protected $meta;
24
25
	/**
26
	* @var OutputInterface
27
	*/
28
	protected $output;
29
30
	/**
31
	* @param OutputInterface $output
32
	* @parm  MetaCharacters  $meta
33
	* @param Escaper         $escaper
34
	*/
35 13
	public function __construct(OutputInterface $output, MetaCharacters $meta, Escaper $escaper)
36
	{
37 13
		$this->escaper = $escaper;
38 13
		$this->meta    = $meta;
39 13
		$this->output  = $output;
40 13
	}
41
42
	/**
43
	* Serialize given strings into a regular expression
44
	*
45
	* @param  array[] $strings
46
	* @return string
47
	*/
48 13
	public function serializeStrings(array $strings)
49
	{
50 13
		$info = $this->analyzeStrings($strings);
51 13
		$expr = implode('|', $this->buildAlternations($info));
52 13
		if ($this->needsParentheses($info))
53
		{
54 3
			$expr = '(?:' . $expr . ')';
55
		}
56
57 13
		return $expr . $info['quantifier'];
58
	}
59
60
	/**
61
	* Analyze given strings to determine how to serialize them
62
	*
63
	* The returned array may contains any of the following elements:
64
	*
65
	*  - (string) quantifier Either '' or '?'
66
	*  - (array)  chars      List of values from single-char strings
67
	*  - (array)  strings    List of multi-char strings
68
	*
69
	* @param  array[] $strings
70
	* @return array
71
	*/
72 13
	protected function analyzeStrings(array $strings)
73
	{
74 13
		$info = ['alternationsCount' => 0, 'quantifier' => ''];
75 13
		if ($strings[0] === [])
76
		{
77 4
			$info['quantifier'] = '?';
78 4
			unset($strings[0]);
79
		}
80
81 13
		$chars = $this->getChars($strings);
82 13
		if (count($chars) > 1)
83
		{
84 10
			++$info['alternationsCount'];
85 10
			$info['chars'] = array_values($chars);
86 10
			$strings = array_diff_key($strings, $chars);
87
		}
88
89 13
		$info['strings'] = array_values($strings);
90 13
		$info['alternationsCount'] += count($strings);
91
92 13
		return $info;
93
	}
94
95
	/**
96
	* Build the list of alternations based on given info
97
	*
98
	* @param  array    $info
99
	* @return string[]
100
	*/
101 13
	protected function buildAlternations(array $info)
102
	{
103 13
		$alternations = [];
104 13
		if (!empty($info['chars']))
105
		{
106 10
			$alternations[] = $this->serializeCharacterClass($info['chars']);
107
		}
108 13
		foreach ($info['strings'] as $string)
109
		{
110 5
			$alternations[] = $this->serializeString($string);
111
		}
112
113 13
		return $alternations;
114
	}
115
116
	/**
117
	* Return the portion of strings that are composed of a single character
118
	*
119
	* @param  array[]
120
	* @return array   String key => value
121
	*/
122 13
	protected function getChars(array $strings)
123
	{
124 13
		$chars = [];
125 13
		foreach ($strings as $k => $string)
126
		{
127 13
			if ($this->isChar($string))
128
			{
129 13
				$chars[$k] = $string[0];
130
			}
131
		}
132
133 13
		return $chars;
134
	}
135
136
	/**
137
	* Get the list of ranges that cover all given values
138
	*
139
	* @param  integer[] $values Ordered list of values
140
	* @return array[]           List of ranges in the form [start, end]
141
	*/
142 10
	protected function getRanges(array $values)
143
	{
144 10
		$i      = 0;
145 10
		$cnt    = count($values);
146 10
		$start  = $values[0];
147 10
		$end    = $start;
148 10
		$ranges = [];
149 10
		while (++$i < $cnt)
150
		{
151 10
			if ($values[$i] === $end + 1)
152
			{
153 8
				++$end;
154
			}
155
			else
156
			{
157 7
				$ranges[] = [$start, $end];
158 7
				$start = $end = $values[$i];
159
			}
160
		}
161 10
		$ranges[] = [$start, $end];
162
163 10
		return $ranges;
164
	}
165
166
	/**
167
	* Test whether given string is a single character
168
	*
169
	* @param  array $string
170
	* @return bool
171
	*/
172 13
	protected function isChar(array $string)
173
	{
174 13
		if (count($string) !== 1 || is_array($string[0]))
175
		{
176 5
			return false;
177
		}
178
179 12
		return $this->meta->isChar($string[0]);
180
	}
181
182
	/**
183
	* Test whether an expression is quantifiable based on the strings info
184
	*
185
	* @param  array $info
186
	* @return bool
187
	*/
188 3
	protected function isQuantifiable(array $info)
189
	{
190 3
		$strings = $info['strings'];
191
192 3
		return empty($strings) || $this->isSingleQuantifiableString($strings);
193
	}
194
195
	/**
196
	* Test whether a list of strings contains only one single quantifiable string
197
	*
198
	* @param  string[] $strings
199
	* @return bool
200
	*/
201 1
	protected function isSingleQuantifiableString(array $strings)
202
	{
203 1
		return count($strings) === 1 && count($strings[0]) === 1 && $this->meta->isQuantifiable($strings[0][0]);
204
	}
205
206
	/**
207
	* Test whether an expression needs parentheses based on the strings info
208
	*
209
	* @param  array $info
210
	* @return bool
211
	*/
212 13
	protected function needsParentheses(array $info)
213
	{
214 13
		return ($info['alternationsCount'] > 1 || ($info['quantifier'] && !$this->isQuantifiable($info)));
215
	}
216
217
	/**
218
	* Serialize a given list of values into a character class
219
	*
220
	* @param  integer[] $values
221
	* @return string
222
	*/
223 10
	protected function serializeCharacterClass(array $values)
224
	{
225 10
		$expr = '[';
226 10
		foreach ($this->getRanges($values) as list($start, $end))
227
		{
228 10
			$expr .= $this->serializeCharacterClassUnit($start);
229 10
			if ($end > $start)
230
			{
231 8
				if ($end > $start + 1)
232
				{
233 1
					$expr .= '-';
234
				}
235 10
				$expr .= $this->serializeCharacterClassUnit($end);
236
			}
237
		}
238 10
		$expr .= ']';
239
240 10
		return $expr;
241
	}
242
243
	/**
244
	* Serialize a given value to be used in a character class
245
	*
246
	* @param  integer $value
247
	* @return string
248
	*/
249 10
	protected function serializeCharacterClassUnit($value)
250
	{
251 10
		return $this->serializeValue($value, 'escapeCharacterClass');
252
	}
253
254
	/**
255
	* Serialize an element from a string
256
	*
257
	* @param  array|integer $element
258
	* @return string
259
	*/
260 5
	protected function serializeElement($element)
261
	{
262 5
		return (is_array($element)) ? $this->serializeStrings($element) : $this->serializeLiteral($element);
263
	}
264
265
	/**
266
	* Serialize a given value to be used as a literal
267
	*
268
	* @param  integer $value
269
	* @return string
270
	*/
271 5
	protected function serializeLiteral($value)
272
	{
273 5
		return $this->serializeValue($value, 'escapeLiteral');
274
	}
275
276
	/**
277
	* Serialize a given string into a regular expression
278
	*
279
	* @param  array  $string
280
	* @return string
281
	*/
282 5
	protected function serializeString(array $string)
283
	{
284 5
		return implode('', array_map([$this, 'serializeElement'], $string));
285
	}
286
287
	/**
288
	* Serialize a given value
289
	*
290
	* @param  integer $value
291
	* @param  string  $escapeMethod
292
	* @return string
293
	*/
294 13
	protected function serializeValue($value, $escapeMethod)
295
	{
296 13
		return ($value < 0) ? $this->meta->getExpression($value) : $this->escaper->$escapeMethod($this->output->output($value));
297
	}
298
}