Passed
Push — master ( b79f84...894306 )
by Josh
03:29
created

XPathHelper   A

Complexity

Total Complexity 27

Size/Duplication

Total Lines 280
Duplicated Lines 0 %

Test Coverage

Coverage 100%

Importance

Changes 3
Bugs 0 Features 0
Metric Value
wmc 27
eloc 98
c 3
b 0
f 0
dl 0
loc 280
ccs 88
cts 88
cp 1
rs 10

10 Methods

Rating   Name   Duplication   Size   Complexity  
A getVariables() 0 13 1
A isExpressionNumeric() 0 19 3
A decodeStrings() 0 9 1
A encodeStrings() 0 9 1
A parseEqualityExpr() 0 36 4
A getXPathParser() 0 18 2
B removeRedundantParentheses() 0 30 7
A evaluateConcat() 0 11 2
A minify() 0 48 3
A evaluateLiteral() 0 8 3
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2020 The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use RuntimeException;
11
use s9e\TextFormatter\Configurator\RecursiveParser;
12
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\BooleanFunctions;
13
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\BooleanOperators;
14
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\Comparisons;
15
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\Core;
16
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\Math;
17
use s9e\TextFormatter\Configurator\RendererGenerators\PHP\XPathConvertor\Convertors\SingleByteStringFunctions;
18
use s9e\TextFormatter\Utils\XPath;
19
20
abstract class XPathHelper
21
{
22
	/**
23
	* Decode strings inside of an XPath expression
24
	*
25
	* @param  string $expr
26
	* @return string
27
	*/
28 27
	public static function decodeStrings($expr)
29
	{
30 27
		return preg_replace_callback(
31 27
			'(\'[^\']*+\'|"[^"]*+")',
32
			function ($m)
33
			{
34 3
				return $m[0][0] . hex2bin(substr($m[0], 1, -1)) . $m[0][0];
35 27
			},
36
			$expr
37
		);
38
	}
39
40
	/**
41
	* Encode strings inside of an XPath expression
42
	*
43
	* @param  string $expr
44
	* @return string
45
	*/
46 28
	public static function encodeStrings($expr)
47
	{
48 28
		return preg_replace_callback(
49 28
			'(\'[^\']*+\'|"[^"]*+")',
50
			function ($m)
51
			{
52 3
				return $m[0][0] . bin2hex(substr($m[0], 1, -1)) . $m[0][0];
53 28
			},
54
			$expr
55
		);
56
	}
57
58
	/**
59
	* Return the list of variables used in a given XPath expression
60
	*
61
	* @param  string $expr XPath expression
62
	* @return array        Alphabetically sorted list of unique variable names
63
	*/
64 4
	public static function getVariables($expr)
65
	{
66
		// First, remove strings' contents to prevent false-positives
67 4
		$expr = preg_replace('/(["\']).*?\\1/s', '$1$1', $expr);
68
69
		// Capture all the variable names
70 4
		preg_match_all('/\\$(\\w+)/', $expr, $matches);
71
72
		// Dedupe and sort names
73 4
		$varNames = array_unique($matches[1]);
74 4
		sort($varNames);
75
76 4
		return $varNames;
77
	}
78
79
	/**
80
	* Determine whether given XPath expression definitely evaluates to a number
81
	*
82
	* @param  string $expr XPath expression
83
	* @return bool         Whether given XPath expression definitely evaluates to a number
84
	*/
85 21
	public static function isExpressionNumeric($expr)
86
	{
87
		// Detect simple arithmetic operations
88 21
		if (preg_match('(^([$@][-\\w]++|-?[.\\d]++)(?: *(?:[-*+]|div) *(?1))+$)', $expr))
89
		{
90 12
			return true;
91
		}
92
93
		// Try parsing the expression as a math expression
94
		try
95
		{
96 9
			return (bool) self::getXPathParser()->parse($expr, 'Math');
97
		}
98 5
		catch (RuntimeException $e)
99
		{
100
			// Do nothing
101
		}
102
103 5
		return false;
104
	}
105
106
	/**
107
	* Remove extraneous space in a given XPath expression
108
	*
109
	* @param  string $expr Original XPath expression
110
	* @return string       Minified XPath expression
111
	*/
112 30
	public static function minify($expr)
113
	{
114 30
		$expr = trim($expr);
115
116
		// Test whether there's any characters that can be removed
117 30
		if (!preg_match('([\\s\\)])', $expr))
118
		{
119 2
			return $expr;
120
		}
121
122 28
		preg_match_all('("[^"]*+"|\'[^\']*+\'|[\'"](*:X))', $expr, $m);
123 28
		if (!empty($m['MARK']))
124
		{
125 1
			throw new RuntimeException("Cannot parse XPath expression '" . $expr . "'");
126
		}
127
128
		// Temporarily encode the content of literal strings
129 27
		$expr = self::encodeStrings(trim($expr));
130
131
		// Normalize whitespace to a single space
132 27
		$expr = preg_replace('(\\s+)', ' ', $expr);
133
134
		$regexps = [
135
			// Remove the space between a non-word character and a word character
136 27
			'([-a-z_0-9]\\K (?=[^-a-z_0-9]))i',
137
			'([^-a-z_0-9]\\K (?=[-a-z_0-9]))i',
138
139
			// Remove the space between two non-word characters as long as they're not two -
140
			'((?!- -)[^-a-z_0-9]\\K (?=[^-a-z_0-9]))i',
141
142
			// Remove the space between a - and a word character as long as there's a space before -
143
			'( -\\K (?=[a-z_0-9]))i',
144
145
			// Remove the space between an operator and the next token
146
			'([ \\)](?:and|div|or)\\K )',
147
148
			// Remove the space after a number
149
			'(\\b\\d+\\K )'
150
		];
151 27
		$expr = preg_replace($regexps, '', $expr);
152
153
		// Remove consecutive parentheses where redundant
154 27
		$expr = self::removeRedundantParentheses($expr);
155
156
		// Restore the literals
157 26
		$expr = self::decodeStrings($expr);
158
159 26
		return $expr;
160
	}
161
162
	/**
163
	* Remove consecutive parentheses where redundant
164
	*/
165 27
	protected static function removeRedundantParentheses(string $expr): string
166
	{
167
		// Add parentheses around the original expression and terminate the expression with a space
168 27
		preg_match_all('([\\(\\)]|[^\\(\\)]++)', '(' . $expr . ') ', $m);
169 27
		$tokens = $m[0];
170
171 27
		$depth = 0;
172 27
		$left  = [-1 => null];
173 27
		foreach ($tokens as $k => $token)
174
		{
175 27
			if ($token === '(')
176
			{
177 27
				$left[$depth] = $k;
178 27
				++$depth;
179
			}
180 27
			elseif ($token === ')')
181
			{
182 27
				if (--$depth < 0)
183
				{
184 1
					throw new RuntimeException("Cannot parse XPath expression '" . $expr . "'");
185
				}
186 27
				if ($tokens[$k + 1] === ')' && $left[$depth - 1] === $left[$depth] - 1)
187
				{
188 4
					unset($tokens[$k], $tokens[$left[$depth]]);
189
				}
190
			}
191
		}
192
193
		// Remove the extra parentheses as well as the last token before serializing them
194 26
		return implode('', array_slice($tokens, 1, -2));
195
	}
196
197
	/**
198
	* Parse an XPath expression that is composed entirely of equality tests between a variable part
199
	* and a constant part
200
	*
201
	* @param  string      $expr
202
	* @return array|false
203
	*/
204 8
	public static function parseEqualityExpr($expr)
205
	{
206
		// Match an equality between a variable and a literal or the concatenation of strings
207
		$eq = '(?<equality>'
208
		    . '(?<key>@[-\\w]+|\\$\\w+|\\.)'
209
		    . '(?<operator>\\s*=\\s*)'
210
		    . '(?:'
211
		    . '(?<literal>(?<string>"[^"]*"|\'[^\']*\')|0|[1-9][0-9]*)'
212
		    . '|'
213
		    . '(?<concat>concat\\(\\s*(?&string)\\s*(?:,\\s*(?&string)\\s*)+\\))'
214
		    . ')'
215
		    . '|'
216
		    . '(?:(?<literal>(?&literal))|(?<concat>(?&concat)))(?&operator)(?<key>(?&key))'
217 8
		    . ')';
218
219
		// Match a string that is entirely composed of equality checks separated with "or"
220 8
		$regexp = '(^(?J)\\s*' . $eq . '\\s*(?:or\\s*(?&equality)\\s*)*$)';
221 8
		if (!preg_match($regexp, $expr))
222
		{
223 1
			return false;
224
		}
225
226 7
		preg_match_all("((?J)$eq)", $expr, $matches, PREG_SET_ORDER);
227
228 7
		$map = [];
229 7
		foreach ($matches as $m)
230
		{
231 7
			$key   = $m['key'];
232 7
			$value = (!empty($m['concat']))
233 1
			       ? self::evaluateConcat($m['concat'])
234 7
			       : self::evaluateLiteral($m['literal']);
235
236 7
			$map[$key][] = $value;
237
		}
238
239 7
		return $map;
240
	}
241
242
	/**
243
	* Evaluate a concat() expression where all arguments are string literals
244
	*
245
	* @param  string $expr concat() expression
246
	* @return string       Expression's value
247
	*/
248 1
	protected static function evaluateConcat($expr)
249
	{
250 1
		preg_match_all('(\'[^\']*\'|"[^"]*")', $expr, $strings);
251
252 1
		$value = '';
253 1
		foreach ($strings[0] as $string)
254
		{
255 1
			$value .= substr($string, 1, -1);
256
		}
257
258 1
		return $value;
259
	}
260
261
	/**
262
	* Evaluate an XPath literal
263
	*
264
	* @param  string $expr XPath literal
265
	* @return string       Literal's string value
266
	*/
267 6
	protected static function evaluateLiteral($expr)
268
	{
269 6
		if ($expr[0] === '"' || $expr[0] === "'")
270
		{
271 6
			$expr = substr($expr, 1, -1);
272
		}
273
274 6
		return $expr;
275
	}
276
277
	/**
278
	* Generate and return a cached XPath parser with a default set of matchers
279
	*
280
	* @return RecursiveParser
281
	*/
282 9
	protected static function getXPathParser()
283
	{
284 9
		static $parser;
285 9
		if (!isset($parser))
286
		{
287 1
			$parser     = new RecursiveParser;
288 1
			$matchers   = [];
289 1
			$matchers[] = new BooleanFunctions($parser);
290 1
			$matchers[] = new BooleanOperators($parser);
291 1
			$matchers[] = new Comparisons($parser);
292 1
			$matchers[] = new Core($parser);
293 1
			$matchers[] = new Math($parser);
294 1
			$matchers[] = new SingleByteStringFunctions($parser);
295
296 1
			$parser->setMatchers($matchers);
297
		}
298
299 9
		return $parser;
300
	}
301
}