Completed
Push — master ( d91fed...fd66aa )
by Josh
17:36
created

XPathConvertor   F

Complexity

Total Complexity 77

Size/Duplication

Total Lines 645
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 0

Importance

Changes 4
Bugs 0 Features 0
Metric Value
wmc 77
lcom 1
cbo 0
dl 0
loc 645
rs 2.195
c 4
b 0
f 0

30 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 4 1
B convertCondition() 0 59 8
B convertXPath() 0 46 10
A attr() 0 4 1
A dot() 0 4 1
A param() 0 4 1
A string() 0 4 1
A lname() 0 4 1
A name() 0 4 1
A number() 0 13 3
A strlen() 0 13 3
A contains() 0 4 1
A startswith() 0 4 1
A not() 0 4 1
A notcontains() 0 4 1
B substr() 0 58 8
A substringafter() 0 4 1
A substringbefore() 0 4 1
A cmp() 0 31 3
A bool() 0 9 1
A parens() 0 4 1
A translate() 0 37 4
A math() 0 19 4
A exportXPath() 0 11 2
A exportXPathCurrent() 0 4 1
A exportXPathFragment() 0 4 1
A exportXPathParam() 0 6 1
B generateXPathRegexp() 0 131 7
A matchXPathForExport() 0 22 3
A tokenizeXPathForExport() 0 18 4

How to fix   Complexity   

Complex Class

Complex classes like XPathConvertor often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use XPathConvertor, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\RendererGenerators\PHP;
9
10
use LogicException;
11
use RuntimeException;
12
13
class XPathConvertor
14
{
15
	/**
16
	* @var string PCRE version
17
	*/
18
	public $pcreVersion;
19
20
	/**
21
	* @var string Regexp used to match XPath expressions
22
	*/
23
	protected $regexp;
24
25
	/**
26
	* @var bool Whether to use the mbstring functions as a replacement for XPath expressions
27
	*/
28
	public $useMultibyteStringFunctions = false;
29
30
	/**
31
	* Constructor
32
	*/
33
	public function __construct()
34
	{
35
		$this->pcreVersion = PCRE_VERSION;
36
	}
37
38
	/**
39
	* Convert an XPath expression (used in a condition) into PHP code
40
	*
41
	* This method is similar to convertXPath() but it selectively replaces some simple conditions
42
	* with the corresponding DOM method for performance reasons
43
	*
44
	* @param  string $expr XPath expression
45
	* @return string       PHP code
46
	*/
47
	public function convertCondition($expr)
48
	{
49
		$expr = trim($expr);
50
51
		// XSL: <xsl:if test="@foo">
52
		// PHP: if ($node->hasAttribute('foo'))
53
		if (preg_match('#^@([-\\w]+)$#', $expr, $m))
54
		{
55
			return '$node->hasAttribute(' . var_export($m[1], true) . ')';
56
		}
57
58
		// XSL: <xsl:if test="@*">
59
		// PHP: if ($node->attributes->length)
60
		if ($expr === '@*')
61
		{
62
			return '$node->attributes->length';
63
		}
64
65
		// XSL: <xsl:if test="not(@foo)">
66
		// PHP: if (!$node->hasAttribute('foo'))
67
		if (preg_match('#^not\\(@([-\\w]+)\\)$#', $expr, $m))
68
		{
69
			return '!$node->hasAttribute(' . var_export($m[1], true) . ')';
70
		}
71
72
		// XSL: <xsl:if test="$foo">
73
		// PHP: if ($this->params['foo']!=='')
74
		if (preg_match('#^\\$(\\w+)$#', $expr, $m))
75
		{
76
			return '$this->params[' . var_export($m[1], true) . "]!==''";
77
		}
78
79
		// XSL: <xsl:if test="not($foo)">
80
		// PHP: if ($this->params['foo']==='')
81
		if (preg_match('#^not\\(\\$(\\w+)\\)$#', $expr, $m))
82
		{
83
			return '$this->params[' . var_export($m[1], true) . "]===''";
84
		}
85
86
		// XSL: <xsl:if test="@foo > 1">
87
		// PHP: if ($node->getAttribute('foo') > 1)
88
		if (preg_match('#^([$@][-\\w]+)\\s*([<>])\\s*(\\d+)$#', $expr, $m))
89
		{
90
			return $this->convertXPath($m[1]) . $m[2] . $m[3];
91
		}
92
93
		// If the condition does not seem to contain a relational expression, or start with a
94
		// function call, we wrap it inside of a boolean() call
95
		if (!preg_match('#[=<>]|\\bor\\b|\\band\\b|^[-\\w]+\\s*\\(#', $expr))
96
		{
97
			// XSL: <xsl:if test="parent::foo">
98
			// PHP: if ($this->xpath->evaluate("boolean(parent::foo)",$node))
99
			$expr = 'boolean(' . $expr . ')';
100
		}
101
102
		// XSL: <xsl:if test="@foo='bar'">
103
		// PHP: if ($this->xpath->evaluate("@foo='bar'",$node))
104
		return $this->convertXPath($expr);
105
	}
106
107
	/**
108
	* Convert an XPath expression (used as value) into PHP code
109
	*
110
	* @param  string $expr XPath expression
111
	* @return string       PHP code
112
	*/
113
	public function convertXPath($expr)
114
	{
115
		$expr = trim($expr);
116
117
		$this->generateXPathRegexp();
118
		if (preg_match($this->regexp, $expr, $m))
119
		{
120
			$methodName = null;
121
			foreach ($m as $k => $v)
122
			{
123
				if (is_numeric($k) || $v === '' || $v === null || !method_exists($this, $k))
124
				{
125
					continue;
126
				}
127
128
				$methodName = $k;
129
				break;
130
			}
131
132
			if (isset($methodName))
133
			{
134
				// Default argument is the whole matched string
135
				$args = [$m[$methodName]];
136
137
				// Overwrite the default arguments with the named captures
138
				$i = 0;
139
				while (isset($m[$methodName . $i]))
140
				{
141
					$args[$i] = $m[$methodName . $i];
142
					++$i;
143
				}
144
145
				return call_user_func_array([$this, $methodName], $args);
146
			}
147
		}
148
149
		// If the condition does not seem to contain a relational expression, or start with a
150
		// function call, we wrap it inside of a string() call
151
		if (!preg_match('#[=<>]|\\bor\\b|\\band\\b|^[-\\w]+\\s*\\(#', $expr))
152
		{
153
			$expr = 'string(' . $expr . ')';
154
		}
155
156
		// Replace parameters in the expression
157
		return '$this->xpath->evaluate(' . $this->exportXPath($expr) . ',$node)';
158
	}
159
160
	protected function attr($attrName)
161
	{
162
		return '$node->getAttribute(' . var_export($attrName, true) . ')';
163
	}
164
165
	protected function dot()
166
	{
167
		return '$node->textContent';
168
	}
169
170
	protected function param($paramName)
171
	{
172
		return '$this->params[' . var_export($paramName, true) . ']';
173
	}
174
175
	protected function string($string)
176
	{
177
		return var_export(substr($string, 1, -1), true);
178
	}
179
180
	protected function lname()
181
	{
182
		return '$node->localName';
183
	}
184
185
	protected function name()
186
	{
187
		return '$node->nodeName';
188
	}
189
190
	protected function number($sign, $number)
191
	{
192
		// Remove leading zeros
193
		$number = ltrim($number, '0') ?: 0;
194
195
		// Disable negative zero
196
		if (!$number)
197
		{
198
			$sign = '';
199
		}
200
201
		return "'" . $sign . $number . "'";
202
	}
203
204
	protected function strlen($expr)
205
	{
206
		if ($expr === '')
207
		{
208
			$expr = '.';
209
		}
210
211
		$php = $this->convertXPath($expr);
212
213
		return ($this->useMultibyteStringFunctions)
214
			? 'mb_strlen(' . $php . ",'utf-8')"
215
			: "strlen(preg_replace('(.)us','.'," . $php . '))';
216
	}
217
218
	protected function contains($haystack, $needle)
219
	{
220
		return '(strpos(' . $this->convertXPath($haystack) . ',' . $this->convertXPath($needle) . ')!==false)';
221
	}
222
223
	protected function startswith($string, $substring)
224
	{
225
		return '(strpos(' . $this->convertXPath($string) . ',' . $this->convertXPath($substring) . ')===0)';
226
	}
227
228
	protected function not($expr)
229
	{
230
		return '!(' . $this->convertCondition($expr) . ')';
231
	}
232
233
	protected function notcontains($haystack, $needle)
234
	{
235
		return '(strpos(' . $this->convertXPath($haystack) . ',' . $this->convertXPath($needle) . ')===false)';
236
	}
237
238
	protected function substr($exprString, $exprPos, $exprLen = null)
239
	{
240
		if (!$this->useMultibyteStringFunctions)
241
		{
242
			$expr = 'substring(' . $exprString . ',' . $exprPos;
243
			if (isset($exprLen))
244
			{
245
				$expr .= ',' . $exprLen;
246
			}
247
			$expr .= ')';
248
249
			return '$this->xpath->evaluate(' . $this->exportXPath($expr) . ',$node)';
250
		}
251
252
		// NOTE: negative values for the second argument do not produce the same result as
253
		//       specified in XPath if the argument is not a literal number
254
		$php = 'mb_substr(' . $this->convertXPath($exprString) . ',';
255
256
		// Hardcode the value if possible
257
		if (is_numeric($exprPos))
258
		{
259
			$php .= max(0, $exprPos - 1);
260
		}
261
		else
262
		{
263
			$php .= 'max(0,' . $this->convertXPath($exprPos) . '-1)';
264
		}
265
266
		$php .= ',';
267
268
		if (isset($exprLen))
269
		{
270
			if (is_numeric($exprLen))
271
			{
272
				// Handles substring(0,2) as per XPath
273
				if (is_numeric($exprPos) && $exprPos < 1)
274
				{
275
					$php .= max(0, $exprPos + $exprLen - 1);
276
				}
277
				else
278
				{
279
					$php .= max(0, $exprLen);
280
				}
281
			}
282
			else
283
			{
284
				$php .= 'max(0,' . $this->convertXPath($exprLen) . ')';
285
			}
286
		}
287
		else
288
		{
289
			$php .= 'null';
290
		}
291
292
		$php .= ",'utf-8')";
293
294
		return $php;
295
	}
296
297
	protected function substringafter($expr, $str)
298
	{
299
		return 'substr(strstr(' . $this->convertXPath($expr) . ',' . $this->convertXPath($str) . '),' . (strlen($str) - 2) . ')';
300
	}
301
302
	protected function substringbefore($expr1, $expr2)
303
	{
304
		return 'strstr(' . $this->convertXPath($expr1) . ',' . $this->convertXPath($expr2) . ',true)';
305
	}
306
307
	protected function cmp($expr1, $operator, $expr2)
308
	{
309
		$operands  = [];
310
		$operators = [
311
			'='  => '===',
312
			'!=' => '!==',
313
			'>'  => '>',
314
			'>=' => '>=',
315
			'<'  => '<',
316
			'<=' => '<='
317
		];
318
319
		// If either operand is a number, represent it as a PHP number and replace the identity
320
		// identity operators
321
		foreach ([$expr1, $expr2] as $expr)
322
		{
323
			if (is_numeric($expr))
324
			{
325
				$operators['=']  = '==';
326
				$operators['!='] = '!=';
327
328
				$operands[] = preg_replace('(^0(.+))', '$1', $expr);
329
			}
330
			else
331
			{
332
				$operands[] = $this->convertXPath($expr);
333
			}
334
		}
335
336
		return implode($operators[$operator], $operands);
337
	}
338
339
	protected function bool($expr1, $operator, $expr2)
340
	{
341
		$operators = [
342
			'and' => '&&',
343
			'or'  => '||'
344
		];
345
346
		return $this->convertCondition($expr1) . $operators[$operator] . $this->convertCondition($expr2);
347
	}
348
349
	protected function parens($expr)
350
	{
351
		return '(' . $this->convertXPath($expr) . ')';
352
	}
353
354
	protected function translate($str, $from, $to)
355
	{
356
		preg_match_all('(.)su', substr($from, 1, -1), $matches);
357
		$from = $matches[0];
358
359
		preg_match_all('(.)su', substr($to, 1, -1), $matches);
360
		$to = $matches[0];
361
362
		// Remove duplicates from $from, keep matching elements in $to then add missing elements
363
		$from = array_unique($from);
364
		$to   = array_intersect_key($to, $from);
365
		$to  += array_fill_keys(array_keys(array_diff_key($from, $to)), '');
366
367
		// Start building the strtr() call
368
		$php = 'strtr(' . $this->convertXPath($str) . ',';
369
370
		// Test whether all elements in $from and $to are exactly 1 byte long, meaning they
371
		// are ASCII and with no empty strings. If so, we can use the scalar version of
372
		// strtr(), otherwise we have to use the array version
373
		if ([1] === array_unique(array_map('strlen', $from))
374
		 && [1] === array_unique(array_map('strlen', $to)))
375
		{
376
			$php .= var_export(implode('', $from), true) . ',' . var_export(implode('', $to), true);
377
		}
378
		else
379
		{
380
			$elements = [];
381
			foreach ($from as $k => $str)
382
			{
383
				$elements[] = var_export($str, true) . '=>' . var_export($to[$k], true);
384
			}
385
			$php .= '[' . implode(',', $elements) . ']';
386
		}
387
		$php .= ')';
388
389
		return $php;
390
	}
391
392
	protected function math($expr1, $operator, $expr2)
393
	{
394
		if (!is_numeric($expr1))
395
		{
396
			$expr1 = $this->convertXPath($expr1);
397
		}
398
399
		if (!is_numeric($expr2))
400
		{
401
			$expr2 = $this->convertXPath($expr2);
402
		}
403
404
		if ($operator === 'div')
405
		{
406
			$operator = '/';
407
		}
408
409
		return $expr1 . $operator . $expr2;
410
	}
411
412
	/**
413
	* Export an XPath expression as PHP with special consideration for XPath variables
414
	*
415
	* Will return PHP source representing the XPath expression, with special consideration for XPath
416
	* variables which are returned as a method call to XPath::export()
417
	*
418
	* @param  string $expr XPath expression
419
	* @return string       PHP representation of the expression
420
	*/
421
	protected function exportXPath($expr)
422
	{
423
		$phpTokens = [];
424
		foreach ($this->tokenizeXPathForExport($expr) as list($type, $content))
425
		{
426
			$methodName  = 'exportXPath' . ucfirst($type);
427
			$phpTokens[] = $this->$methodName($content);
428
		}
429
430
		return implode('.', $phpTokens);
431
	}
432
433
	/**
434
	* Convert a "current()" XPath expression to its PHP source representation
435
	*
436
	* @return string
437
	*/
438
	protected function exportXPathCurrent()
439
	{
440
		return '$node->getNodePath()';
441
	}
442
443
	/**
444
	* Convert a fragment of an XPath expression to its PHP source representation
445
	*
446
	* @param  string $fragment
447
	* @return string
448
	*/
449
	protected function exportXPathFragment($fragment)
450
	{
451
		return var_export($fragment, true);
452
	}
453
454
	/**
455
	* Convert an XSLT parameter to its PHP source representation
456
	*
457
	* @param  string $param Parameter, including the leading $
458
	* @return string
459
	*/
460
	protected function exportXPathParam($param)
461
	{
462
		$paramName = ltrim($param, '$');
463
464
		return '$this->getParamAsXPath(' . var_export($paramName, true) . ')';
465
	}
466
467
	/**
468
	* Generate a regexp used to parse XPath expressions
469
	*
470
	* @return void
471
	*/
472
	protected function generateXPathRegexp()
473
	{
474
		if (isset($this->regexp))
475
		{
476
			return;
477
		}
478
479
		$patterns = [
480
			'attr'      => ['@', '(?<attr0>[-\\w]+)'],
481
			'dot'       => '\\.',
482
			'name'      => 'name\\(\\)',
483
			'lname'     => 'local-name\\(\\)',
484
			'param'     => ['\\$', '(?<param0>\\w+)'],
485
			'string'    => '"[^"]*"|\'[^\']*\'',
486
			'number'    => ['(?<number0>-?)', '(?<number1>\\d++)'],
487
			'strlen'    => ['string-length', '\\(', '(?<strlen0>(?&value)?)', '\\)'],
488
			'contains'  => [
489
				'contains',
490
				'\\(',
491
				'(?<contains0>(?&value))',
492
				',',
493
				'(?<contains1>(?&value))',
494
				'\\)'
495
			],
496
			'translate' => [
497
				'translate',
498
				'\\(',
499
				'(?<translate0>(?&value))',
500
				',',
501
				'(?<translate1>(?&string))',
502
				',',
503
				'(?<translate2>(?&string))',
504
				'\\)'
505
			],
506
			'substr' => [
507
				'substring',
508
				'\\(',
509
				'(?<substr0>(?&value))',
510
				',',
511
				'(?<substr1>(?&value))',
512
				'(?:, (?<substr2>(?&value)))?',
513
				'\\)'
514
			],
515
			'substringafter' => [
516
				'substring-after',
517
				'\\(',
518
				'(?<substringafter0>(?&value))',
519
				',',
520
				'(?<substringafter1>(?&string))',
521
				'\\)'
522
			],
523
			'substringbefore' => [
524
				'substring-before',
525
				'\\(',
526
				'(?<substringbefore0>(?&value))',
527
				',',
528
				'(?<substringbefore1>(?&value))',
529
				'\\)'
530
			],
531
			'startswith' => [
532
				'starts-with',
533
				'\\(',
534
				'(?<startswith0>(?&value))',
535
				',',
536
				'(?<startswith1>(?&value))',
537
				'\\)'
538
			],
539
			'math' => [
540
				'(?<math0>(?&attr)|(?&number)|(?&param))',
541
				'(?<math1>[-+*]|div)',
542
				'(?<math2>(?&math)|(?&math0))'
543
			],
544
			'notcontains' => [
545
				'not',
546
				'\\(',
547
				'contains',
548
				'\\(',
549
				'(?<notcontains0>(?&value))',
550
				',',
551
				'(?<notcontains1>(?&value))',
552
				'\\)',
553
				'\\)'
554
			]
555
		];
556
557
		$exprs = [];
558
		if (version_compare($this->pcreVersion, '8.13', '>='))
559
		{
560
			// Create a regexp that matches a comparison such as "@foo = 1"
561
			// NOTE: cannot support < or > because of NaN -- (@foo<5) returns false if @foo=''
562
			$exprs[] = '(?<cmp>(?<cmp0>(?&value)) (?<cmp1>!?=) (?<cmp2>(?&value)))';
563
564
			// Create a regexp that matches a parenthesized expression
565
			// NOTE: could be expanded to support any expression
566
			$exprs[] = '(?<parens>\\( (?<parens0>(?&bool)|(?&cmp)|(?&math)) \\))';
567
568
			// Create a regexp that matches boolean operations
569
			$exprs[] = '(?<bool>(?<bool0>(?&cmp)|(?&not)|(?&value)|(?&parens)) (?<bool1>and|or) (?<bool2>(?&bool)|(?&cmp)|(?&not)|(?&value)|(?&parens)))';
570
571
			// Create a regexp that matches not() expressions
572
			$exprs[] = '(?<not>not \\( (?<not0>(?&bool)|(?&value)) \\))';
573
574
			// Modify the math pattern to accept parenthesized expressions
575
			$patterns['math'][0] = str_replace('))', ')|(?&parens))', $patterns['math'][0]);
576
			$patterns['math'][1] = str_replace('))', ')|(?&parens))', $patterns['math'][1]);
577
		}
578
579
		// Create a regexp that matches values, such as "@foo" or "42"
580
		$valueExprs = [];
581
		foreach ($patterns as $name => $pattern)
582
		{
583
			if (is_array($pattern))
584
			{
585
				$pattern = implode(' ', $pattern);
586
			}
587
588
			if (strpos($pattern, '?&') === false || version_compare($this->pcreVersion, '8.13', '>='))
589
			{
590
				$valueExprs[] = '(?<' . $name . '>' . $pattern . ')';
591
			}
592
		}
593
		array_unshift($exprs, '(?<value>' . implode('|', $valueExprs) . ')');
594
595
		// Assemble the final regexp
596
		$regexp = '#^(?:' . implode('|', $exprs) . ')$#S';
597
598
		// Replace spaces with any amount of whitespace
599
		$regexp = str_replace(' ', '\\s*', $regexp);
600
601
		$this->regexp = $regexp;
602
	}
603
604
	/**
605
	* Match the relevant components of an XPath expression
606
	*
607
	* @param  string $expr XPath expression
608
	* @return array
609
	*/
610
	protected function matchXPathForExport($expr)
611
	{
612
		$tokenExprs = [
613
			'(?<current>\\bcurrent\\(\\))',
614
			'(?<param>\\$\\w+)',
615
			'(?<fragment>"[^"]*"|\'[^\']*\'|.)'
616
		];
617
		preg_match_all('(' . implode('|', $tokenExprs) . ')s', $expr, $matches, PREG_SET_ORDER);
618
619
		// Merge fragment tokens
620
		$i = count($matches);
621
		while (--$i > 0)
622
		{
623
			if (isset($matches[$i]['fragment'], $matches[$i - 1]['fragment']))
624
			{
625
				$matches[$i - 1]['fragment'] .= $matches[$i]['fragment'];
626
				unset($matches[$i]);
627
			}
628
		}
629
630
		return array_values($matches);
631
	}
632
633
	/**
634
	* Tokenize an XPath expression for use in PHP
635
	*
636
	* @param  string $expr XPath expression
637
	* @return array
638
	*/
639
	protected function tokenizeXPathForExport($expr)
640
	{
641
		$tokens = [];
642
		foreach ($this->matchXPathForExport($expr) as $match)
643
		{
644
			foreach (array_reverse($match) as $k => $v)
645
			{
646
				// Use the last non-numeric match
647
				if (!is_numeric($k))
648
				{
649
					$tokens[] = [$k, $v];
650
					break;
651
				}
652
			}
653
		}
654
655
		return $tokens;
656
	}
657
}