Completed
Push — master ( 71f6c0...c1acae )
by Josh
09:22
created

Serializer::serializeValue()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 2
nc 2
nop 2
dl 0
loc 4
ccs 0
cts 0
cp 0
crap 6
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder;
9
10
use s9e\RegexpBuilder\MetaCharacters;
11
use s9e\RegexpBuilder\Output\OutputInterface;
12
13
class Serializer
14
{
15
	/**
16
	* @var Escaper
17
	*/
18
	protected $escaper;
19
20
	/**
21
	* @var MetaCharacters
22
	*/
23
	protected $meta;
24
25
	/**
26
	* @var OutputInterface
27
	*/
28 13
	protected $output;
29
30 13
	/**
31 13
	* @param OutputInterface $output
32 13
	* @parm  MetaCharacters  $meta
33
	* @param Escaper         $escaper
34
	*/
35
	public function __construct(OutputInterface $output, MetaCharacters $meta, Escaper $escaper)
36
	{
37
		$this->escaper = $escaper;
38
		$this->meta    = $meta;
39
		$this->output  = $output;
40 13
	}
41
42 13
	/**
43 13
	* Serialize given strings into a regular expression
44 13
	*
45
	* @param  array[] $strings
46 13
	* @return string
47
	*/
48 3
	public function serializeStrings(array $strings)
49
	{
50
		$info         = $this->analyzeStrings($strings);
51 13
		$alternations = array_map([$this, 'serializeString'], $info['strings']);
52
		if (!empty($info['chars']))
53
		{
54
			// Prepend the character class to the list of alternations
55
			array_unshift($alternations, $this->serializeCharacterClass($info['chars']));
56
		}
57
58
		$expr = implode('|', $alternations);
59
		if ($this->needsParentheses($info))
60
		{
61
			$expr = '(?:' . $expr . ')';
62
		}
63
64
		return $expr . $info['quantifier'];
65
	}
66 13
67
	/**
68 13
	* Analyze given strings to determine how to serialize them
69 13
	*
70
	* The returned array may contains any of the following elements:
71 4
	*
72 4
	*  - (string) quantifier Either '' or '?'
73
	*  - (array)  chars      List of values from single-char strings
74
	*  - (array)  strings    List of multi-char strings
75 13
	*
76 13
	* @param  array[] $strings
77
	* @return array
78 10
	*/
79 10
	protected function analyzeStrings(array $strings)
80
	{
81
		$info = ['alternationsCount' => 0, 'quantifier' => ''];
82 13
		if ($strings[0] === [])
83
		{
84 13
			$info['quantifier'] = '?';
85
			unset($strings[0]);
86
		}
87
88
		$chars = $this->getChars($strings);
89
		if (count($chars) > 1)
90
		{
91
			++$info['alternationsCount'];
92
			$info['chars'] = array_values($chars);
93 13
			$strings       = array_diff_key($strings, $chars);
94
		}
95 13
96 13
		$info['strings']            = array_values($strings);
97
		$info['alternationsCount'] += count($strings);
98 10
99
		return $info;
100 13
	}
101
102 5
	/**
103
	* Return the portion of strings that are composed of a single character
104
	*
105 13
	* @param  array[]
106
	* @return array   String key => value
107
	*/
108
	protected function getChars(array $strings)
109
	{
110
		$chars = [];
111
		foreach ($strings as $k => $string)
112
		{
113
			if ($this->isChar($string))
114 13
			{
115
				$chars[$k] = $string[0];
116 13
			}
117 13
		}
118
119 13
		return $chars;
120
	}
121 13
122
	/**
123
	* Get the list of ranges that cover all given values
124
	*
125 13
	* @param  integer[] $values Ordered list of values
126
	* @return array[]           List of ranges in the form [start, end]
127
	*/
128
	protected function getRanges(array $values)
129
	{
130
		$i      = 0;
131
		$cnt    = count($values);
132
		$start  = $values[0];
133
		$end    = $start;
134 10
		$ranges = [];
135
		while (++$i < $cnt)
136 10
		{
137 10
			if ($values[$i] === $end + 1)
138 10
			{
139 10
				++$end;
140 10
			}
141 10
			else
142
			{
143 10
				$ranges[] = [$start, $end];
144
				$start = $end = $values[$i];
145 8
			}
146
		}
147
		$ranges[] = [$start, $end];
148
149 7
		return $ranges;
150 7
	}
151
152
	/**
153 10
	* Test whether given string represents a single character
154
	*
155 10
	* @param  array $string
156
	* @return bool
157
	*/
158
	protected function isChar(array $string)
159
	{
160
		return count($string) === 1 && !is_array($string[0]) && $this->meta->isChar($string[0]);
161
	}
162
163
	/**
164 13
	* Test whether an expression is quantifiable based on the strings info
165
	*
166
	* @param  array $info
167 13
	* @return bool
168
	*/
169
	protected function isQuantifiable(array $info)
170
	{
171
		$strings = $info['strings'];
172
173
		return empty($strings) || $this->isSingleQuantifiableString($strings);
174
	}
175
176 10
	/**
177
	* Test whether a list of strings contains only one single quantifiable string
178 10
	*
179 10
	* @param  string[] $strings
180
	* @return bool
181 10
	*/
182 10
	protected function isSingleQuantifiableString(array $strings)
183
	{
184 8
		return count($strings) === 1 && count($strings[0]) === 1 && $this->meta->isQuantifiable($strings[0][0]);
185
	}
186 1
187
	/**
188 10
	* Test whether an expression needs parentheses based on the strings info
189
	*
190
	* @param  array $info
191 10
	* @return bool
192
	*/
193 10
	protected function needsParentheses(array $info)
194
	{
195
		return ($info['alternationsCount'] > 1 || ($info['quantifier'] && !$this->isQuantifiable($info)));
196
	}
197
198
	/**
199
	* Serialize a given list of values into a character class
200
	*
201
	* @param  integer[] $values
202 5
	* @return string
203
	*/
204 5
	protected function serializeCharacterClass(array $values)
205 5
	{
206
		$expr = '[';
207 5
		foreach ($this->getRanges($values) as list($start, $end))
208
		{
209
			$expr .= $this->serializeCharacterClassUnit($start);
210 5
			if ($end > $start)
211
			{
212
				if ($end > $start + 1)
213
				{
214
					$expr .= '-';
215
				}
216
				$expr .= $this->serializeCharacterClassUnit($end);
217
			}
218
		}
219
		$expr .= ']';
220
221
		return $expr;
222
	}
223
224
	/**
225
	* Serialize a given value to be used in a character class
226
	*
227
	* @param  integer $value
228
	* @return string
229
	*/
230
	protected function serializeCharacterClassUnit($value)
231
	{
232
		return $this->serializeValue($value, 'escapeCharacterClass');
233
	}
234
235
	/**
236
	* Serialize an element from a string
237
	*
238
	* @param  array|integer $element
239
	* @return string
240
	*/
241
	protected function serializeElement($element)
242
	{
243
		return (is_array($element)) ? $this->serializeStrings($element) : $this->serializeLiteral($element);
244
	}
245
246
	/**
247
	* Serialize a given value to be used as a literal
248
	*
249
	* @param  integer $value
250
	* @return string
251
	*/
252
	protected function serializeLiteral($value)
253
	{
254
		return $this->serializeValue($value, 'escapeLiteral');
255
	}
256
257
	/**
258
	* Serialize a given string into a regular expression
259
	*
260
	* @param  array  $string
261
	* @return string
262
	*/
263
	protected function serializeString(array $string)
264
	{
265
		return implode('', array_map([$this, 'serializeElement'], $string));
266
	}
267
268
	/**
269
	* Serialize a given value
270
	*
271
	* @param  integer $value
272
	* @param  string  $escapeMethod
273
	* @return string
274
	*/
275
	protected function serializeValue($value, $escapeMethod)
276
	{
277
		return ($value < 0) ? $this->meta->getExpression($value) : $this->escaper->$escapeMethod($this->output->output($value));
278
	}
279
}