Completed
Push — master ( ff6731...8cfcbe )
by Josh
28:58
created

XPathConvertor::exportXPathParam()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 6
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\RendererGenerators\PHP;
9
10
use LogicException;
11
use RuntimeException;
12
13
class XPathConvertor
14
{
15
	/**
16
	* @var string PCRE version
17
	*/
18
	public $pcreVersion;
19
20
	/**
21
	* @var string Regexp used to match XPath expressions
22
	*/
23
	protected $regexp;
24
25
	/**
26
	* @var bool Whether to use the mbstring functions as a replacement for XPath expressions
27
	*/
28
	public $useMultibyteStringFunctions = false;
29
30
	/**
31
	* Constructor
32
	*/
33
	public function __construct()
34
	{
35
		$this->pcreVersion = PCRE_VERSION;
36
	}
37
38
	/**
39
	* Convert an XPath expression (used in a condition) into PHP code
40
	*
41
	* This method is similar to convertXPath() but it selectively replaces some simple conditions
42
	* with the corresponding DOM method for performance reasons
43
	*
44
	* @param  string $expr XPath expression
45
	* @return string       PHP code
46
	*/
47
	public function convertCondition($expr)
48
	{
49
		$expr = trim($expr);
50
51
		// XSL: <xsl:if test="@foo">
52
		// PHP: if ($node->hasAttribute('foo'))
53
		if (preg_match('#^@([-\\w]+)$#', $expr, $m))
54
		{
55
			return '$node->hasAttribute(' . var_export($m[1], true) . ')';
56
		}
57
58
		// XSL: <xsl:if test="@*">
59
		// PHP: if ($node->attributes->length)
60
		if ($expr === '@*')
61
		{
62
			return '$node->attributes->length';
63
		}
64
65
		// XSL: <xsl:if test="not(@foo)">
66
		// PHP: if (!$node->hasAttribute('foo'))
67
		if (preg_match('#^not\\(@([-\\w]+)\\)$#', $expr, $m))
68
		{
69
			return '!$node->hasAttribute(' . var_export($m[1], true) . ')';
70
		}
71
72
		// XSL: <xsl:if test="$foo">
73
		// PHP: if (!empty($this->params['foo']))
74
		if (preg_match('#^\\$(\\w+)$#', $expr, $m))
75
		{
76
			return '!empty($this->params[' . var_export($m[1], true) . '])';
77
		}
78
79
		// XSL: <xsl:if test="not($foo)">
80
		// PHP: if (empty($this->params['foo']))
81
		if (preg_match('#^not\\(\\$(\\w+)\\)$#', $expr, $m))
82
		{
83
			return 'empty($this->params[' . var_export($m[1], true) . '])';
84
		}
85
86
		// XSL: <xsl:if test="@foo > 1">
87
		// PHP: if ($node->getAttribute('foo') > 1)
88
		if (preg_match('#^([$@][-\\w]+)\\s*([<>])\\s*(\\d+)$#', $expr, $m))
89
		{
90
			return $this->convertXPath($m[1]) . $m[2] . $m[3];
91
		}
92
93
		// If the condition does not seem to contain a relational expression, or start with a
94
		// function call, we wrap it inside of a boolean() call
95
		if (!preg_match('#[=<>]|\\bor\\b|\\band\\b|^[-\\w]+\\s*\\(#', $expr))
96
		{
97
			// XSL: <xsl:if test="parent::foo">
98
			// PHP: if ($this->xpath->evaluate("boolean(parent::foo)",$node))
99
			$expr = 'boolean(' . $expr . ')';
100
		}
101
102
		// XSL: <xsl:if test="@foo='bar'">
103
		// PHP: if ($this->xpath->evaluate("@foo='bar'",$node))
104
		return $this->convertXPath($expr);
105
	}
106
107
	/**
108
	* Convert an XPath expression (used as value) into PHP code
109
	*
110
	* @param  string $expr XPath expression
111
	* @return string       PHP code
112
	*/
113
	public function convertXPath($expr)
114
	{
115
		$expr = trim($expr);
116
117
		$this->generateXPathRegexp();
118
		if (preg_match($this->regexp, $expr, $m))
119
		{
120
			$methodName = null;
121
			foreach ($m as $k => $v)
122
			{
123
				if (is_numeric($k) || $v === '' || $v === null || !method_exists($this, $k))
124
				{
125
					continue;
126
				}
127
128
				$methodName = $k;
129
				break;
130
			}
131
132
			if (isset($methodName))
133
			{
134
				// Default argument is the whole matched string
135
				$args = [$m[$methodName]];
136
137
				// Overwrite the default arguments with the named captures
138
				$i = 0;
139
				while (isset($m[$methodName . $i]))
140
				{
141
					$args[$i] = $m[$methodName . $i];
142
					++$i;
143
				}
144
145
				return call_user_func_array([$this, $methodName], $args);
146
			}
147
		}
148
149
		// If the condition does not seem to contain a relational expression, or start with a
150
		// function call, we wrap it inside of a string() call
151
		if (!preg_match('#[=<>]|\\bor\\b|\\band\\b|^[-\\w]+\\s*\\(#', $expr))
152
		{
153
			$expr = 'string(' . $expr . ')';
154
		}
155
156
		// Replace parameters in the expression
157
		return '$this->xpath->evaluate(' . $this->exportXPath($expr) . ',$node)';
158
	}
159
160
	protected function attr($attrName)
161
	{
162
		return '$node->getAttribute(' . var_export($attrName, true) . ')';
163
	}
164
165
	protected function dot()
166
	{
167
		return '$node->textContent';
168
	}
169
170
	protected function param($paramName)
171
	{
172
		return '$this->params[' . var_export($paramName, true) . ']';
173
	}
174
175
	protected function string($string)
176
	{
177
		return var_export(substr($string, 1, -1), true);
178
	}
179
180
	protected function lname()
181
	{
182
		return '$node->localName';
183
	}
184
185
	protected function name()
186
	{
187
		return '$node->nodeName';
188
	}
189
190
	protected function number($number)
191
	{
192
		return "'" . $number . "'";
193
	}
194
195
	protected function strlen($expr)
196
	{
197
		if ($expr === '')
198
		{
199
			$expr = '.';
200
		}
201
202
		$php = $this->convertXPath($expr);
203
204
		return ($this->useMultibyteStringFunctions)
205
			? 'mb_strlen(' . $php . ",'utf-8')"
206
			: "strlen(preg_replace('(.)us','.'," . $php . '))';
207
	}
208
209
	protected function contains($haystack, $needle)
210
	{
211
		return '(strpos(' . $this->convertXPath($haystack) . ',' . $this->convertXPath($needle) . ')!==false)';
212
	}
213
214
	protected function startswith($string, $substring)
215
	{
216
		return '(strpos(' . $this->convertXPath($string) . ',' . $this->convertXPath($substring) . ')===0)';
217
	}
218
219
	protected function not($expr)
220
	{
221
		return '!(' . $this->convertCondition($expr) . ')';
222
	}
223
224
	protected function notcontains($haystack, $needle)
225
	{
226
		return '(strpos(' . $this->convertXPath($haystack) . ',' . $this->convertXPath($needle) . ')===false)';
227
	}
228
229
	protected function substr($exprString, $exprPos, $exprLen = null)
230
	{
231
		if (!$this->useMultibyteStringFunctions)
232
		{
233
			$expr = 'substring(' . $exprString . ',' . $exprPos;
234
			if (isset($exprLen))
235
			{
236
				$expr .= ',' . $exprLen;
237
			}
238
			$expr .= ')';
239
240
			return '$this->xpath->evaluate(' . $this->exportXPath($expr) . ',$node)';
241
		}
242
243
		// NOTE: negative values for the second argument do not produce the same result as
244
		//       specified in XPath if the argument is not a literal number
245
		$php = 'mb_substr(' . $this->convertXPath($exprString) . ',';
246
247
		// Hardcode the value if possible
248
		if (is_numeric($exprPos))
249
		{
250
			$php .= max(0, $exprPos - 1);
251
		}
252
		else
253
		{
254
			$php .= 'max(0,' . $this->convertXPath($exprPos) . '-1)';
255
		}
256
257
		$php .= ',';
258
259
		if (isset($exprLen))
260
		{
261
			if (is_numeric($exprLen))
262
			{
263
				// Handles substring(0,2) as per XPath
264
				if (is_numeric($exprPos) && $exprPos < 1)
265
				{
266
					$php .= max(0, $exprPos + $exprLen - 1);
267
				}
268
				else
269
				{
270
					$php .= max(0, $exprLen);
271
				}
272
			}
273
			else
274
			{
275
				$php .= 'max(0,' . $this->convertXPath($exprLen) . ')';
276
			}
277
		}
278
		else
279
		{
280
			$php .= 'null';
281
		}
282
283
		$php .= ",'utf-8')";
284
285
		return $php;
286
	}
287
288
	protected function substringafter($expr, $str)
289
	{
290
		return 'substr(strstr(' . $this->convertXPath($expr) . ',' . $this->convertXPath($str) . '),' . (strlen($str) - 2) . ')';
291
	}
292
293
	protected function substringbefore($expr1, $expr2)
294
	{
295
		return 'strstr(' . $this->convertXPath($expr1) . ',' . $this->convertXPath($expr2) . ',true)';
296
	}
297
298
	protected function cmp($expr1, $operator, $expr2)
299
	{
300
		$operands  = [];
301
		$operators = [
302
			'='  => '===',
303
			'!=' => '!==',
304
			'>'  => '>',
305
			'>=' => '>=',
306
			'<'  => '<',
307
			'<=' => '<='
308
		];
309
310
		// If either operand is a number, represent it as a PHP number and replace the identity
311
		// identity operators
312
		foreach ([$expr1, $expr2] as $expr)
313
		{
314
			if (is_numeric($expr))
315
			{
316
				$operators['=']  = '==';
317
				$operators['!='] = '!=';
318
319
				$operands[] = preg_replace('(^0(.+))', '$1', $expr);
320
			}
321
			else
322
			{
323
				$operands[] = $this->convertXPath($expr);
324
			}
325
		}
326
327
		return implode($operators[$operator], $operands);
328
	}
329
330
	protected function bool($expr1, $operator, $expr2)
331
	{
332
		$operators = [
333
			'and' => '&&',
334
			'or'  => '||'
335
		];
336
337
		return $this->convertCondition($expr1) . $operators[$operator] . $this->convertCondition($expr2);
338
	}
339
340
	protected function parens($expr)
341
	{
342
		return '(' . $this->convertXPath($expr) . ')';
343
	}
344
345
	protected function translate($str, $from, $to)
346
	{
347
		preg_match_all('(.)su', substr($from, 1, -1), $matches);
348
		$from = $matches[0];
349
350
		preg_match_all('(.)su', substr($to, 1, -1), $matches);
351
		$to = $matches[0];
352
353
		// We adjust $to to match the number of elements in $from, either by truncating it
354
		// or by padding it with empty strings
355
		if (count($to) > count($from))
356
		{
357
			$to = array_slice($to, 0, count($from));
358
		}
359
		else
360
		{
361
			// NOTE: we don't use array_merge() because of potential side-effects when
362
			//       translating digits
363
			while (count($from) > count($to))
364
			{
365
				$to[] = '';
366
			}
367
		}
368
369
		// Remove duplicates in $from, as well as the corresponding elements in $to
370
		$from = array_unique($from);
371
		$to   = array_intersect_key($to, $from);
372
373
		// Start building the strtr() call
374
		$php = 'strtr(' . $this->convertXPath($str) . ',';
375
376
		// Test whether all elements in $from and $to are exactly 1 byte long, meaning they
377
		// are ASCII and with no empty strings. If so, we can use the scalar version of
378
		// strtr(), otherwise we have to use the array version
379
		if ([1] === array_unique(array_map('strlen', $from))
380
		 && [1] === array_unique(array_map('strlen', $to)))
381
		{
382
			$php .= var_export(implode('', $from), true) . ',' . var_export(implode('', $to), true);
383
		}
384
		else
385
		{
386
			$php .= '[';
387
388
			$cnt = count($from);
389
			for ($i = 0; $i < $cnt; ++$i)
390
			{
391
				if ($i)
392
				{
393
					$php .= ',';
394
				}
395
396
				$php .= var_export($from[$i], true) . '=>' . var_export($to[$i], true);
397
			}
398
399
			$php .= ']';
400
		}
401
402
		$php .= ')';
403
404
		return $php;
405
	}
406
407
	protected function math($expr1, $operator, $expr2)
408
	{
409
		if (!is_numeric($expr1))
410
		{
411
			$expr1 = $this->convertXPath($expr1);
412
		}
413
414
		if (!is_numeric($expr2))
415
		{
416
			$expr2 = $this->convertXPath($expr2);
417
		}
418
419
		if ($operator === 'div')
420
		{
421
			$operator = '/';
422
		}
423
424
		return $expr1 . $operator . $expr2;
425
	}
426
427
	/**
428
	* Export an XPath expression as PHP with special consideration for XPath variables
429
	*
430
	* Will return PHP source representing the XPath expression, with special consideration for XPath
431
	* variables which are returned as a method call to XPath::export()
432
	*
433
	* @param  string $expr XPath expression
434
	* @return string       PHP representation of the expression
435
	*/
436
	protected function exportXPath($expr)
437
	{
438
		$phpTokens = [];
439
		foreach ($this->tokenizeXPathForExport($expr) as list($type, $content))
440
		{
441
			$methodName  = 'exportXPath' . ucfirst($type);
442
			$phpTokens[] = $this->$methodName($content);
443
		}
444
445
		return implode('.', $phpTokens);
446
	}
447
448
	/**
449
	* Convert a "current()" XPath expression to its PHP source representation
450
	*
451
	* @return string
452
	*/
453
	protected function exportXPathCurrent()
454
	{
455
		return '$node->getNodePath()';
456
	}
457
458
	/**
459
	* Convert a fragment of an XPath expression to its PHP source representation
460
	*
461
	* @param  string $fragment
462
	* @return string
463
	*/
464
	protected function exportXPathFragment($fragment)
465
	{
466
		return var_export($fragment, true);
467
	}
468
469
	/**
470
	* Convert an XSLT parameter to its PHP source representation
471
	*
472
	* @param  string $param Parameter, including the leading $
473
	* @return string
474
	*/
475
	protected function exportXPathParam($param)
476
	{
477
		$paramName = ltrim($param, '$');
478
479
		return '$this->getParamAsXPath(' . var_export($paramName, true) . ')';
480
	}
481
482
	/**
483
	* Generate a regexp used to parse XPath expressions
484
	*
485
	* @return void
486
	*/
487
	protected function generateXPathRegexp()
488
	{
489
		if (isset($this->regexp))
490
		{
491
			return;
492
		}
493
494
		$patterns = [
495
			'attr'      => ['@', '(?<attr0>[-\\w]+)'],
496
			'dot'       => '\\.',
497
			'name'      => 'name\\(\\)',
498
			'lname'     => 'local-name\\(\\)',
499
			'param'     => ['\\$', '(?<param0>\\w+)'],
500
			'string'    => '"[^"]*"|\'[^\']*\'',
501
			'number'    => ['-?', '\\d++'],
502
			'strlen'    => ['string-length', '\\(', '(?<strlen0>(?&value)?)', '\\)'],
503
			'contains'  => [
504
				'contains',
505
				'\\(',
506
				'(?<contains0>(?&value))',
507
				',',
508
				'(?<contains1>(?&value))',
509
				'\\)'
510
			],
511
			'translate' => [
512
				'translate',
513
				'\\(',
514
				'(?<translate0>(?&value))',
515
				',',
516
				'(?<translate1>(?&string))',
517
				',',
518
				'(?<translate2>(?&string))',
519
				'\\)'
520
			],
521
			'substr' => [
522
				'substring',
523
				'\\(',
524
				'(?<substr0>(?&value))',
525
				',',
526
				'(?<substr1>(?&value))',
527
				'(?:, (?<substr2>(?&value)))?',
528
				'\\)'
529
			],
530
			'substringafter' => [
531
				'substring-after',
532
				'\\(',
533
				'(?<substringafter0>(?&value))',
534
				',',
535
				'(?<substringafter1>(?&string))',
536
				'\\)'
537
			],
538
			'substringbefore' => [
539
				'substring-before',
540
				'\\(',
541
				'(?<substringbefore0>(?&value))',
542
				',',
543
				'(?<substringbefore1>(?&value))',
544
				'\\)'
545
			],
546
			'startswith' => [
547
				'starts-with',
548
				'\\(',
549
				'(?<startswith0>(?&value))',
550
				',',
551
				'(?<startswith1>(?&value))',
552
				'\\)'
553
			],
554
			'math' => [
555
				'(?<math0>(?&attr)|(?&number)|(?&param))',
556
				'(?<math1>[-+*]|div)',
557
				'(?<math2>(?&math)|(?&math0))'
558
			],
559
			'notcontains' => [
560
				'not',
561
				'\\(',
562
				'contains',
563
				'\\(',
564
				'(?<notcontains0>(?&value))',
565
				',',
566
				'(?<notcontains1>(?&value))',
567
				'\\)',
568
				'\\)'
569
			]
570
		];
571
572
		$exprs = [];
573
		if (version_compare($this->pcreVersion, '8.13', '>='))
574
		{
575
			// Create a regexp that matches a comparison such as "@foo = 1"
576
			// NOTE: cannot support < or > because of NaN -- (@foo<5) returns false if @foo=''
577
			$exprs[] = '(?<cmp>(?<cmp0>(?&value)) (?<cmp1>!?=) (?<cmp2>(?&value)))';
578
579
			// Create a regexp that matches a parenthesized expression
580
			// NOTE: could be expanded to support any expression
581
			$exprs[] = '(?<parens>\\( (?<parens0>(?&bool)|(?&cmp)|(?&math)) \\))';
582
583
			// Create a regexp that matches boolean operations
584
			$exprs[] = '(?<bool>(?<bool0>(?&cmp)|(?&not)|(?&value)|(?&parens)) (?<bool1>and|or) (?<bool2>(?&bool)|(?&cmp)|(?&not)|(?&value)|(?&parens)))';
585
586
			// Create a regexp that matches not() expressions
587
			$exprs[] = '(?<not>not \\( (?<not0>(?&bool)|(?&value)) \\))';
588
589
			// Modify the math pattern to accept parenthesized expressions
590
			$patterns['math'][0] = str_replace('))', ')|(?&parens))', $patterns['math'][0]);
591
			$patterns['math'][1] = str_replace('))', ')|(?&parens))', $patterns['math'][1]);
592
		}
593
594
		// Create a regexp that matches values, such as "@foo" or "42"
595
		$valueExprs = [];
596
		foreach ($patterns as $name => $pattern)
597
		{
598
			if (is_array($pattern))
599
			{
600
				$pattern = implode(' ', $pattern);
601
			}
602
603
			if (strpos($pattern, '?&') === false || version_compare($this->pcreVersion, '8.13', '>='))
604
			{
605
				$valueExprs[] = '(?<' . $name . '>' . $pattern . ')';
606
			}
607
		}
608
		array_unshift($exprs, '(?<value>' . implode('|', $valueExprs) . ')');
609
610
611
		// Assemble the final regexp
612
		$regexp = '#^(?:' . implode('|', $exprs) . ')$#S';
613
614
		// Replace spaces with any amount of whitespace
615
		$regexp = str_replace(' ', '\\s*', $regexp);
616
617
		$this->regexp = $regexp;
618
	}
619
620
	/**
621
	* Match the relevant components of an XPath expression
622
	*
623
	* @param  string $expr XPath expression
624
	* @return array
625
	*/
626
	protected function matchXPathForExport($expr)
627
	{
628
		$tokenExprs = [
629
			'(?<current>\\bcurrent\\(\\))',
630
			'(?<param>\\$\\w+)',
631
			'(?<fragment>"[^"]*"|\'[^\']*\'|.)'
632
		];
633
		preg_match_all('(' . implode('|', $tokenExprs) . ')s', $expr, $matches, PREG_SET_ORDER);
634
635
		// Merge fragment tokens
636
		$i   = 0;
637
		$max = count($matches) - 2;
638
		while ($i <= $max)
639
		{
640
			if (!isset($matches[$i]['fragment']))
641
			{
642
				++$i;
643
				continue;
644
			}
645
646
			$j = $i;
647
			while (isset($matches[++$j]['fragment']))
648
			{
649
				$matches[$i]['fragment'] .= $matches[$j]['fragment'];
650
				unset($matches[$j]);
651
			}
652
			$i = $j;
653
		}
654
655
		return array_values($matches);
656
	}
657
658
	/**
659
	* Tokenize an XPath expression for use in PHP
660
	*
661
	* @param  string $expr XPath expression
662
	* @return array
663
	*/
664
	protected function tokenizeXPathForExport($expr)
665
	{
666
		$tokens = [];
667
		foreach ($this->matchXPathForExport($expr) as $match)
668
		{
669
			foreach (array_reverse($match) as $k => $v)
670
			{
671
				// Use the last non-numeric match
672
				if (!is_numeric($k))
673
				{
674
					$tokens[] = [$k, $v];
675
					break;
676
				}
677
			}
678
		}
679
680
		return $tokens;
681
	}
682
}