Completed
Push — master ( 95e794...553f1a )
by Josh
04:13
created

Serializer::analyzeStrings()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 20
Code Lines 11

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 3

Importance

Changes 0
Metric Value
dl 0
loc 20
ccs 11
cts 11
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 11
nc 4
nop 1
crap 3
1
<?php
2
3
/**
4
* @package   s9e\RegexpBuilder
5
* @copyright Copyright (c) 2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\RegexpBuilder;
9
10
use s9e\RegexpBuilder\Output\OutputInterface;
11
12
class Serializer
13
{
14
	/**
15
	* @var Escaper
16
	*/
17
	protected $escaper;
18
19
	/**
20
	* @var OutputInterface
21
	*/
22
	protected $output;
23
24
	/**
25
	* @param OutputInterface $output
26
	* @param Escaper         $escaper
27
	*/
28 13
	public function __construct(OutputInterface $output, Escaper $escaper)
29
	{
30 13
		$this->escaper = $escaper;
31 13
		$this->output  = $output;
32 13
	}
33
34
	/**
35
	* Serialize given strings into a regular expression
36
	*
37
	* @param  array[] $strings
38
	* @return string
39
	*/
40 13
	public function serializeStrings(array $strings)
41
	{
42 13
		$info = $this->analyzeStrings($strings);
43 13
		$alternations = $this->buildAlternations($info);
44 13
		$expr = implode('|', $alternations);
45
46 13
		if (count($alternations) > 1 || $this->isOneOptionalString($info))
47
		{
48 3
			$expr = '(?:' . $expr . ')';
49
		}
50
51 13
		return $expr . $info['quantifier'];
52
	}
53
54
	/**
55
	* Analyze given strings to determine how to serialize them
56
	*
57
	* The returned array may contains any of the following elements:
58
	*
59
	*  - (string) quantifier Either '' or '?'
60
	*  - (array)  chars      List of values from single-char strings
61
	*  - (array)  strings    List of multi-char strings
62
	*
63
	* @param  array[] $strings
64
	* @return array
65
	*/
66 13
	protected function analyzeStrings(array $strings)
67
	{
68 13
		$info = ['quantifier' => ''];
69 13
		if ($strings[0] === [])
70
		{
71 4
			$info['quantifier'] = '?';
72 4
			unset($strings[0]);
73
		}
74
75 13
		$chars = $this->getChars($strings);
76 13
		if (count($chars) > 1)
77
		{
78 10
			$info['chars'] = array_values($chars);
79 10
			$strings = array_diff_key($strings, $chars);
80
		}
81
82 13
		$info['strings'] = array_values($strings);
83
84 13
		return $info;
85
	}
86
87
	/**
88
	* Build the list of alternations based on given info
89
	*
90
	* @param  array    $info
91
	* @return string[]
92
	*/
93 13
	protected function buildAlternations(array $info)
94
	{
95 13
		$alternations = [];
96 13
		if (!empty($info['chars']))
97
		{
98 10
			$alternations[] = $this->serializeCharacterClass($info['chars']);
99
		}
100 13
		foreach ($info['strings'] as $string)
101
		{
102 5
			$alternations[] = $this->serializeString($string);
103
		}
104
105 13
		return $alternations;
106
	}
107
108
	/**
109
	* Return the portion of strings that are composed of a single character
110
	*
111
	* @param  array[]
112
	* @return array   String key => codepoint
113
	*/
114 13 View Code Duplication
	protected function getChars(array $strings)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
115
	{
116 13
		$chars = [];
117 13
		foreach ($strings as $k => $string)
118
		{
119 13
			if (count($string) === 1)
120
			{
121 13
				$chars[$k] = $string[0];
122
			}
123
		}
124
125 13
		return $chars;
126
	}
127
128
	/**
129
	* Get the list of ranges that cover all given values
130
	*
131
	* @param  integer[] $values Ordered list of values
132
	* @return array[]           List of ranges in the form [start, end]
133
	*/
134 10
	protected function getRanges(array $values)
135
	{
136 10
		$i      = 0;
137 10
		$cnt    = count($values);
138 10
		$start  = $values[0];
139 10
		$end    = $start;
140 10
		$ranges = [];
141 10
		while (++$i < $cnt)
142
		{
143 10
			if ($values[$i] === $end + 1)
144
			{
145 8
				++$end;
146
			}
147
			else
148
			{
149 7
				$ranges[] = [$start, $end];
150 7
				$start = $end = $values[$i];
151
			}
152
		}
153 10
		$ranges[] = [$start, $end];
154
155 10
		return $ranges;
156
	}
157
158
	/**
159
	* Test whether a string is optional and has more than one character
160
	*
161
	* @param  array $info
162
	* @return bool
163
	*/
164 13
	protected function isOneOptionalString(array $info)
165
	{
166
		// Test whether the first string has a quantifier and more than one element
167 13
		return (!empty($info['quantifier']) && isset($info['strings'][0][1]));
168
	}
169
170
	/**
171
	* Serialize a given list of values into a character class
172
	*
173
	* @param  integer[] $values
174
	* @return string
175
	*/
176 10
	protected function serializeCharacterClass(array $values)
177
	{
178 10
		$expr = '[';
179 10
		foreach ($this->getRanges($values) as list($start, $end))
180
		{
181 10
			$expr .= $this->escaper->escapeCharacterClass($this->output->output($start));
182 10
			if ($end > $start)
183
			{
184 8
				if ($end > $start + 1)
185
				{
186 1
					$expr .= '-';
187
				}
188 10
				$expr .= $this->escaper->escapeCharacterClass($this->output->output($end));
189
			}
190
		}
191 10
		$expr .= ']';
192
193 10
		return $expr;
194
	}
195
196
	/**
197
	* Serialize a given string into a regular expression
198
	*
199
	* @param  array  $string
200
	* @return string
201
	*/
202 5
	protected function serializeString(array $string)
203
	{
204 5
		$expr = '';
205 5
		foreach ($string as $element)
206
		{
207 5
			$expr .= (is_array($element)) ? $this->serializeStrings($element) : $this->escaper->escapeLiteral($this->output->output($element));
208
		}
209
210 5
		return $expr;
211
	}
212
}