Completed
Push — master ( 84da34...cb7888 )
by Josh
16:13
created

CharacterClassBuilder::unescapeLiterals()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 20

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 0
cts 10
cp 0
rs 9.9332
c 0
b 0
f 0
cc 4
nc 3
nop 0
crap 20
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2019 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
class CharacterClassBuilder
11
{
12
	/**
13
	* @var string[] List of characters in the class
14
	*/
15
	protected $chars;
16
17
	/**
18
	* @var string Delimiter used in regexps
19
	*/
20
	public $delimiter = '/';
21
22
	/**
23
	* @var array[] Array of [start, end] pairs where start and end are keys from $this->chars
24
	*/
25
	protected $ranges;
26
27
	/**
28
	* Create a character class that matches the given list of characters
29
	*
30
	* @param  string[] $chars
31
	* @return string
32
	*/
33
	public function fromList(array $chars)
34
	{
35
		$this->chars = $chars;
36
37
		$this->unescapeLiterals();
38
		sort($this->chars);
39
		$this->storeRanges();
40
		$this->reorderDash();
41
		$this->fixCaret();
42
		$this->escapeSpecialChars();
43
44
		return $this->buildCharacterClass();
45
	}
46
47
	/**
48
	* Build the character class based
49
	*
50
	* @return string
51
	*/
52
	protected function buildCharacterClass()
53
	{
54
		$str = '[';
55
		foreach ($this->ranges as list($start, $end))
56
		{
57
			if ($end > $start + 2)
58
			{
59
				$str .= $this->chars[$start] . '-' . $this->chars[$end];
60
			}
61
			else
62
			{
63
				$str .= implode('', array_slice($this->chars, $start, $end + 1 - $start));
64
			}
65
		}
66
		$str .= ']';
67
68
		return $str;
69
	}
70
71
	/**
72
	* Escape special characters in stored chars
73
	*
74
	* @return void
75
	*/
76
	protected function escapeSpecialChars()
77
	{
78
		$specialChars = ['\\', ']', $this->delimiter];
79
		foreach (array_intersect($this->chars, $specialChars) as $k => $v)
80
		{
81
			$this->chars[$k] = '\\' . $v;
82
		}
83
	}
84
85
	/**
86
	* Reorder or escape the caret character so that the regexp doesn't start with it
87
	*
88
	* @return void
89
	*/
90
	protected function fixCaret()
91
	{
92
		// Test whether the character class starts with a caret
93
		$k = array_search('^', $this->chars, true);
94
		if ($this->ranges[0][0] !== $k)
95
		{
96
			return;
97
		}
98
99
		// We swap the first two ranges if applicable, otherwise we escape the caret
100
		if (isset($this->ranges[1]))
101
		{
102
			$range           = $this->ranges[0];
103
			$this->ranges[0] = $this->ranges[1];
104
			$this->ranges[1] = $range;
105
		}
106
		else
107
		{
108
			$this->chars[$k] = '\\^';
109
		}
110
	}
111
112
	/**
113
	* Reorder the characters so that a literal dash isn't mistaken for a range
114
	*
115
	* @return void
116
	*/
117
	protected function reorderDash()
118
	{
119
		$dashIndex = array_search('-', $this->chars, true);
120
		if ($dashIndex === false)
121
		{
122
			return;
123
		}
124
125
		// Look for a single dash and move it to the start of the character class
126
		$k = array_search([$dashIndex, $dashIndex], $this->ranges, true);
127
		if ($k > 0)
128
		{
129
			unset($this->ranges[$k]);
130
			array_unshift($this->ranges, [$dashIndex, $dashIndex]);
131
		}
132
133
		// Look for a comma-dash (0x2C..0x2D) range
134
		$commaIndex = array_search(',', $this->chars);
135
		$range      = [$commaIndex, $dashIndex];
136
		$k          = array_search($range, $this->ranges, true);
137
		if ($k !== false)
138
		{
139
			// Replace with a single comma and prepend a single dash
140
			$this->ranges[$k] = [$commaIndex, $commaIndex];
141
			array_unshift($this->ranges, [$dashIndex, $dashIndex]);
142
		}
143
	}
144
145
	/**
146
	* Store the character ranges from the list of characters
147
	*
148
	* @return void
149
	*/
150
	protected function storeRanges()
151
	{
152
		$values = [];
153
		foreach ($this->chars as $char)
154
		{
155
			if (strlen($char) === 1)
156
			{
157
				$values[] = ord($char);
158
			}
159
			else
160
			{
161
				$values[] = false;
162
			}
163
		}
164
165
		$i = count($values) - 1;
166
		$ranges = [];
167
		while ($i >= 0)
168
		{
169
			$start = $i;
170
			$end   = $i;
171
			while ($start > 0 && $values[$start - 1] === $values[$end] - ($end + 1 - $start))
172
			{
173
				--$start;
174
			}
175
			$ranges[] = [$start, $end];
176
			$i = $start - 1;
177
		}
178
179
		$this->ranges = array_reverse($ranges);
180
	}
181
182
	/**
183
	* Unescape literals in stored chars
184
	*
185
	* @return void
186
	*/
187
	protected function unescapeLiterals()
188
	{
189
		foreach ($this->chars as $k => $char)
190
		{
191
			if ($char[0] === '\\' && preg_match('(^\\\\[^a-z]$)Di', $char))
192
			{
193
				$this->chars[$k] = substr($char, 1);
194
			}
195
		}
196
	}
197
}