Serializer::serializeElement()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 3
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 2
eloc 1
nc 2
nop 1
crap 2
1
<?php declare(strict_types=1);
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016-2022 The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder;
9
10
use s9e\RegexpBuilder\MetaCharacters;
11
use s9e\RegexpBuilder\Output\OutputInterface;
12
13
class Serializer
14
{
15
	/**
16
	* @var Escaper
17
	*/
18
	protected $escaper;
19
20
	/**
21
	* @var MetaCharacters
22
	*/
23
	protected $meta;
24
25
	/**
26
	* @var OutputInterface
27
	*/
28
	protected $output;
29
30
	/**
31
	* @param OutputInterface $output
32
	* @parm  MetaCharacters  $meta
33
	* @param Escaper         $escaper
34
	*/
35 13
	public function __construct(OutputInterface $output, MetaCharacters $meta, Escaper $escaper)
36
	{
37 13
		$this->escaper = $escaper;
38 13
		$this->meta    = $meta;
39 13
		$this->output  = $output;
40 13
	}
41
42
	/**
43
	* Serialize given strings into a regular expression
44
	*
45
	* @param  array[] $strings
46
	* @return string
47
	*/
48 13
	public function serializeStrings(array $strings): string
49
	{
50 13
		$info         = $this->analyzeStrings($strings);
51 13
		$alternations = array_map([$this, 'serializeString'], $info['strings']);
52 13
		if (!empty($info['chars']))
53
		{
54
			// Prepend the character class to the list of alternations
55 10
			array_unshift($alternations, $this->serializeCharacterClass($info['chars']));
56
		}
57
58 13
		$expr = implode('|', $alternations);
59 13
		if ($this->needsParentheses($info))
60
		{
61 3
			$expr = '(?:' . $expr . ')';
62
		}
63
64 13
		return $expr . $info['quantifier'];
65
	}
66
67
	/**
68
	* Analyze given strings to determine how to serialize them
69
	*
70
	* The returned array may contains any of the following elements:
71
	*
72
	*  - (string) quantifier Either '' or '?'
73
	*  - (array)  chars      List of values from single-char strings
74
	*  - (array)  strings    List of multi-char strings
75
	*
76
	* @param  array[] $strings
77
	* @return array
78
	*/
79 13
	protected function analyzeStrings(array $strings): array
80
	{
81 13
		$info = ['alternationsCount' => 0, 'quantifier' => ''];
82 13
		if ($strings[0] === [])
83
		{
84 4
			$info['quantifier'] = '?';
85 4
			unset($strings[0]);
86
		}
87
88 13
		$chars = $this->getChars($strings);
89 13
		if (count($chars) > 1)
90
		{
91 10
			++$info['alternationsCount'];
92 10
			$info['chars'] = array_values($chars);
93 10
			$strings       = array_diff_key($strings, $chars);
94
		}
95
96 13
		$info['strings']            = array_values($strings);
97 13
		$info['alternationsCount'] += count($strings);
98
99 13
		return $info;
100
	}
101
102
	/**
103
	* Return the portion of strings that are composed of a single character
104
	*
105
	* @param  array<int, array> $strings
106
	* @return array<int, int>            String key => value
107
	*/
108 13
	protected function getChars(array $strings): array
109
	{
110 13
		$chars = [];
111 13
		foreach ($strings as $k => $string)
112
		{
113 13
			if ($this->isChar($string))
114
			{
115 12
				$chars[$k] = $string[0];
116
			}
117
		}
118
119 13
		return $chars;
120
	}
121
122
	/**
123
	* Get the list of ranges that cover all given values
124
	*
125
	* @param  integer[] $values Ordered list of values
126
	* @return array[]           List of ranges in the form [start, end]
127
	*/
128 10
	protected function getRanges(array $values): array
129
	{
130 10
		$i      = 0;
131 10
		$cnt    = count($values);
132 10
		$start  = $values[0];
133 10
		$end    = $start;
134 10
		$ranges = [];
135 10
		while (++$i < $cnt)
136
		{
137 10
			if ($values[$i] === $end + 1)
138
			{
139 8
				++$end;
140
			}
141
			else
142
			{
143 7
				$ranges[] = [$start, $end];
144 7
				$start = $end = $values[$i];
145
			}
146
		}
147 10
		$ranges[] = [$start, $end];
148
149 10
		return $ranges;
150
	}
151
152
	/**
153
	* Test whether given string represents a single character
154
	*
155
	* @param  array $string
156
	* @return bool
157
	*/
158 13
	protected function isChar(array $string): bool
159
	{
160 13
		return count($string) === 1 && is_int($string[0]) && MetaCharacters::isChar($string[0]);
161
	}
162
163
	/**
164
	* Test whether an expression is quantifiable based on the strings info
165
	*
166
	* @param  array $info
167
	* @return bool
168
	*/
169 3
	protected function isQuantifiable(array $info): bool
170
	{
171 3
		$strings = $info['strings'];
172
173 3
		return empty($strings) || $this->isSingleQuantifiableString($strings);
174
	}
175
176
	/**
177
	* Test whether a list of strings contains only one single quantifiable string
178
	*
179
	* @param  array[] $strings
180
	* @return bool
181
	*/
182 1
	protected function isSingleQuantifiableString(array $strings): bool
183
	{
184 1
		return count($strings) === 1 && count($strings[0]) === 1 && MetaCharacters::isQuantifiable($strings[0][0]);
185
	}
186
187
	/**
188
	* Test whether an expression needs parentheses based on the strings info
189
	*
190
	* @param  array $info
191
	* @return bool
192
	*/
193 13
	protected function needsParentheses(array $info): bool
194
	{
195 13
		return ($info['alternationsCount'] > 1 || ($info['quantifier'] && !$this->isQuantifiable($info)));
196
	}
197
198
	/**
199
	* Serialize a given list of values into a character class
200
	*
201
	* @param  integer[] $values
202
	* @return string
203
	*/
204 10
	protected function serializeCharacterClass(array $values): string
205
	{
206 10
		$expr = '[';
207 10
		foreach ($this->getRanges($values) as list($start, $end))
208
		{
209 10
			$expr .= $this->serializeCharacterClassUnit($start);
210 10
			if ($end > $start)
211
			{
212 8
				if ($end > $start + 1)
213
				{
214 1
					$expr .= '-';
215
				}
216 8
				$expr .= $this->serializeCharacterClassUnit($end);
217
			}
218
		}
219 10
		$expr .= ']';
220
221 10
		return $expr;
222
	}
223
224
	/**
225
	* Serialize a given value to be used in a character class
226
	*
227
	* @param  integer $value
228
	* @return string
229
	*/
230 10
	protected function serializeCharacterClassUnit(int $value): string
231
	{
232 10
		return $this->serializeValue($value, 'escapeCharacterClass');
233
	}
234
235
	/**
236
	* Serialize an element from a string
237
	*
238
	* @param  array|integer $element
239
	* @return string
240
	*/
241 5
	protected function serializeElement($element): string
242
	{
243 5
		return (is_array($element)) ? $this->serializeStrings($element) : $this->serializeLiteral($element);
244
	}
245
246
	/**
247
	* Serialize a given value to be used as a literal
248
	*
249
	* @param  integer $value
250
	* @return string
251
	*/
252 5
	protected function serializeLiteral(int $value): string
253
	{
254 5
		return $this->serializeValue($value, 'escapeLiteral');
255
	}
256
257
	/**
258
	* Serialize a given string into a regular expression
259
	*
260
	* @param  array  $string
261
	* @return string
262
	*/
263 5
	protected function serializeString(array $string): string
264
	{
265 5
		return implode('', array_map([$this, 'serializeElement'], $string));
266
	}
267
268
	/**
269
	* Serialize a given value
270
	*
271
	* @param  integer $value
272
	* @param  string  $escapeMethod
273
	* @return string
274
	*/
275 13
	protected function serializeValue(int $value, string $escapeMethod): string
276
	{
277 13
		return ($value < 0) ? $this->meta->getExpression($value) : $this->escaper->$escapeMethod($this->output->output($value));
278
	}
279
}