1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package s9e\TextFormatter |
5
|
|
|
* @copyright Copyright (c) 2010-2018 The s9e Authors |
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
7
|
|
|
*/ |
8
|
|
|
namespace s9e\TextFormatter\Configurator\RendererGenerators\PHP; |
9
|
|
|
|
10
|
|
|
use LogicException; |
11
|
|
|
use RuntimeException; |
12
|
|
|
|
13
|
|
|
class XPathConvertor |
14
|
|
|
{ |
15
|
|
|
/** |
16
|
|
|
* @var string PCRE version |
17
|
|
|
*/ |
18
|
|
|
public $pcreVersion; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* @var string Regexp used to match XPath expressions |
22
|
|
|
*/ |
23
|
|
|
protected $regexp; |
24
|
|
|
|
25
|
|
|
/** |
26
|
|
|
* @var bool Whether to use the mbstring functions as a replacement for XPath expressions |
27
|
|
|
*/ |
28
|
|
|
public $useMultibyteStringFunctions = false; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* Constructor |
32
|
|
|
*/ |
33
|
|
|
public function __construct() |
34
|
|
|
{ |
35
|
|
|
$this->pcreVersion = PCRE_VERSION; |
36
|
|
|
} |
37
|
|
|
|
38
|
|
|
/** |
39
|
|
|
* Convert an XPath expression (used in a condition) into PHP code |
40
|
|
|
* |
41
|
|
|
* This method is similar to convertXPath() but it selectively replaces some simple conditions |
42
|
|
|
* with the corresponding DOM method for performance reasons |
43
|
|
|
* |
44
|
|
|
* @param string $expr XPath expression |
45
|
|
|
* @return string PHP code |
46
|
|
|
*/ |
47
|
|
|
public function convertCondition($expr) |
48
|
|
|
{ |
49
|
|
|
$expr = trim($expr); |
50
|
|
|
|
51
|
|
|
// XSL: <xsl:if test="@foo"> |
52
|
|
|
// PHP: if ($node->hasAttribute('foo')) |
53
|
|
|
if (preg_match('#^@([-\\w]+)$#', $expr, $m)) |
54
|
|
|
{ |
55
|
|
|
return '$node->hasAttribute(' . var_export($m[1], true) . ')'; |
56
|
|
|
} |
57
|
|
|
|
58
|
|
|
// XSL: <xsl:if test="@*"> |
59
|
|
|
// PHP: if ($node->attributes->length) |
60
|
|
|
if ($expr === '@*') |
61
|
|
|
{ |
62
|
|
|
return '$node->attributes->length'; |
63
|
|
|
} |
64
|
|
|
|
65
|
|
|
// XSL: <xsl:if test="not(@foo)"> |
66
|
|
|
// PHP: if (!$node->hasAttribute('foo')) |
67
|
|
|
if (preg_match('#^not\\(@([-\\w]+)\\)$#', $expr, $m)) |
68
|
|
|
{ |
69
|
|
|
return '!$node->hasAttribute(' . var_export($m[1], true) . ')'; |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
// XSL: <xsl:if test="$foo"> |
73
|
|
|
// PHP: if (!empty($this->params['foo'])) |
74
|
|
|
if (preg_match('#^\\$(\\w+)$#', $expr, $m)) |
75
|
|
|
{ |
76
|
|
|
return '!empty($this->params[' . var_export($m[1], true) . '])'; |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
// XSL: <xsl:if test="not($foo)"> |
80
|
|
|
// PHP: if (empty($this->params['foo'])) |
81
|
|
|
if (preg_match('#^not\\(\\$(\\w+)\\)$#', $expr, $m)) |
82
|
|
|
{ |
83
|
|
|
return 'empty($this->params[' . var_export($m[1], true) . '])'; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
// XSL: <xsl:if test="@foo > 1"> |
87
|
|
|
// PHP: if ($node->getAttribute('foo') > 1) |
88
|
|
|
if (preg_match('#^([$@][-\\w]+)\\s*([<>])\\s*(\\d+)$#', $expr, $m)) |
89
|
|
|
{ |
90
|
|
|
return $this->convertXPath($m[1]) . $m[2] . $m[3]; |
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
// If the condition does not seem to contain a relational expression, or start with a |
94
|
|
|
// function call, we wrap it inside of a boolean() call |
95
|
|
|
if (!preg_match('#[=<>]|\\bor\\b|\\band\\b|^[-\\w]+\\s*\\(#', $expr)) |
96
|
|
|
{ |
97
|
|
|
// XSL: <xsl:if test="parent::foo"> |
98
|
|
|
// PHP: if ($this->xpath->evaluate("boolean(parent::foo)",$node)) |
99
|
|
|
$expr = 'boolean(' . $expr . ')'; |
100
|
|
|
} |
101
|
|
|
|
102
|
|
|
// XSL: <xsl:if test="@foo='bar'"> |
103
|
|
|
// PHP: if ($this->xpath->evaluate("@foo='bar'",$node)) |
104
|
|
|
return $this->convertXPath($expr); |
105
|
|
|
} |
106
|
|
|
|
107
|
|
|
/** |
108
|
|
|
* Convert an XPath expression (used as value) into PHP code |
109
|
|
|
* |
110
|
|
|
* @param string $expr XPath expression |
111
|
|
|
* @return string PHP code |
112
|
|
|
*/ |
113
|
|
|
public function convertXPath($expr) |
114
|
|
|
{ |
115
|
|
|
$expr = trim($expr); |
116
|
|
|
|
117
|
|
|
$this->generateXPathRegexp(); |
118
|
|
|
if (preg_match($this->regexp, $expr, $m)) |
119
|
|
|
{ |
120
|
|
|
$methodName = null; |
121
|
|
|
foreach ($m as $k => $v) |
122
|
|
|
{ |
123
|
|
|
if (is_numeric($k) || $v === '' || $v === null || !method_exists($this, $k)) |
124
|
|
|
{ |
125
|
|
|
continue; |
126
|
|
|
} |
127
|
|
|
|
128
|
|
|
$methodName = $k; |
129
|
|
|
break; |
130
|
|
|
} |
131
|
|
|
|
132
|
|
|
if (isset($methodName)) |
133
|
|
|
{ |
134
|
|
|
// Default argument is the whole matched string |
135
|
|
|
$args = [$m[$methodName]]; |
136
|
|
|
|
137
|
|
|
// Overwrite the default arguments with the named captures |
138
|
|
|
$i = 0; |
139
|
|
|
while (isset($m[$methodName . $i])) |
140
|
|
|
{ |
141
|
|
|
$args[$i] = $m[$methodName . $i]; |
142
|
|
|
++$i; |
143
|
|
|
} |
144
|
|
|
|
145
|
|
|
return call_user_func_array([$this, $methodName], $args); |
146
|
|
|
} |
147
|
|
|
} |
148
|
|
|
|
149
|
|
|
// If the condition does not seem to contain a relational expression, or start with a |
150
|
|
|
// function call, we wrap it inside of a string() call |
151
|
|
|
if (!preg_match('#[=<>]|\\bor\\b|\\band\\b|^[-\\w]+\\s*\\(#', $expr)) |
152
|
|
|
{ |
153
|
|
|
$expr = 'string(' . $expr . ')'; |
154
|
|
|
} |
155
|
|
|
|
156
|
|
|
// Replace parameters in the expression |
157
|
|
|
return '$this->xpath->evaluate(' . $this->exportXPath($expr) . ',$node)'; |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
protected function attr($attrName) |
161
|
|
|
{ |
162
|
|
|
return '$node->getAttribute(' . var_export($attrName, true) . ')'; |
163
|
|
|
} |
164
|
|
|
|
165
|
|
|
protected function dot() |
166
|
|
|
{ |
167
|
|
|
return '$node->textContent'; |
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
protected function param($paramName) |
171
|
|
|
{ |
172
|
|
|
return '$this->params[' . var_export($paramName, true) . ']'; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
protected function string($string) |
176
|
|
|
{ |
177
|
|
|
return var_export(substr($string, 1, -1), true); |
178
|
|
|
} |
179
|
|
|
|
180
|
|
|
protected function lname() |
181
|
|
|
{ |
182
|
|
|
return '$node->localName'; |
183
|
|
|
} |
184
|
|
|
|
185
|
|
|
protected function name() |
186
|
|
|
{ |
187
|
|
|
return '$node->nodeName'; |
188
|
|
|
} |
189
|
|
|
|
190
|
|
|
protected function number($number) |
191
|
|
|
{ |
192
|
|
|
return "'" . $number . "'"; |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
protected function strlen($expr) |
196
|
|
|
{ |
197
|
|
|
if ($expr === '') |
198
|
|
|
{ |
199
|
|
|
$expr = '.'; |
200
|
|
|
} |
201
|
|
|
|
202
|
|
|
$php = $this->convertXPath($expr); |
203
|
|
|
|
204
|
|
|
return ($this->useMultibyteStringFunctions) |
205
|
|
|
? 'mb_strlen(' . $php . ",'utf-8')" |
206
|
|
|
: "strlen(preg_replace('(.)us','.'," . $php . '))'; |
207
|
|
|
} |
208
|
|
|
|
209
|
|
|
protected function contains($haystack, $needle) |
210
|
|
|
{ |
211
|
|
|
return '(strpos(' . $this->convertXPath($haystack) . ',' . $this->convertXPath($needle) . ')!==false)'; |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
protected function startswith($string, $substring) |
215
|
|
|
{ |
216
|
|
|
return '(strpos(' . $this->convertXPath($string) . ',' . $this->convertXPath($substring) . ')===0)'; |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
protected function not($expr) |
220
|
|
|
{ |
221
|
|
|
return '!(' . $this->convertCondition($expr) . ')'; |
222
|
|
|
} |
223
|
|
|
|
224
|
|
|
protected function notcontains($haystack, $needle) |
225
|
|
|
{ |
226
|
|
|
return '(strpos(' . $this->convertXPath($haystack) . ',' . $this->convertXPath($needle) . ')===false)'; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
protected function substr($exprString, $exprPos, $exprLen = null) |
230
|
|
|
{ |
231
|
|
|
if (!$this->useMultibyteStringFunctions) |
232
|
|
|
{ |
233
|
|
|
$expr = 'substring(' . $exprString . ',' . $exprPos; |
234
|
|
|
if (isset($exprLen)) |
235
|
|
|
{ |
236
|
|
|
$expr .= ',' . $exprLen; |
237
|
|
|
} |
238
|
|
|
$expr .= ')'; |
239
|
|
|
|
240
|
|
|
return '$this->xpath->evaluate(' . $this->exportXPath($expr) . ',$node)'; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
// NOTE: negative values for the second argument do not produce the same result as |
244
|
|
|
// specified in XPath if the argument is not a literal number |
245
|
|
|
$php = 'mb_substr(' . $this->convertXPath($exprString) . ','; |
246
|
|
|
|
247
|
|
|
// Hardcode the value if possible |
248
|
|
|
if (is_numeric($exprPos)) |
249
|
|
|
{ |
250
|
|
|
$php .= max(0, $exprPos - 1); |
251
|
|
|
} |
252
|
|
|
else |
253
|
|
|
{ |
254
|
|
|
$php .= 'max(0,' . $this->convertXPath($exprPos) . '-1)'; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
$php .= ','; |
258
|
|
|
|
259
|
|
|
if (isset($exprLen)) |
260
|
|
|
{ |
261
|
|
|
if (is_numeric($exprLen)) |
262
|
|
|
{ |
263
|
|
|
// Handles substring(0,2) as per XPath |
264
|
|
|
if (is_numeric($exprPos) && $exprPos < 1) |
265
|
|
|
{ |
266
|
|
|
$php .= max(0, $exprPos + $exprLen - 1); |
267
|
|
|
} |
268
|
|
|
else |
269
|
|
|
{ |
270
|
|
|
$php .= max(0, $exprLen); |
271
|
|
|
} |
272
|
|
|
} |
273
|
|
|
else |
274
|
|
|
{ |
275
|
|
|
$php .= 'max(0,' . $this->convertXPath($exprLen) . ')'; |
276
|
|
|
} |
277
|
|
|
} |
278
|
|
|
else |
279
|
|
|
{ |
280
|
|
|
$php .= 'null'; |
281
|
|
|
} |
282
|
|
|
|
283
|
|
|
$php .= ",'utf-8')"; |
284
|
|
|
|
285
|
|
|
return $php; |
286
|
|
|
} |
287
|
|
|
|
288
|
|
|
protected function substringafter($expr, $str) |
289
|
|
|
{ |
290
|
|
|
return 'substr(strstr(' . $this->convertXPath($expr) . ',' . $this->convertXPath($str) . '),' . (strlen($str) - 2) . ')'; |
291
|
|
|
} |
292
|
|
|
|
293
|
|
|
protected function substringbefore($expr1, $expr2) |
294
|
|
|
{ |
295
|
|
|
return 'strstr(' . $this->convertXPath($expr1) . ',' . $this->convertXPath($expr2) . ',true)'; |
296
|
|
|
} |
297
|
|
|
|
298
|
|
|
protected function cmp($expr1, $operator, $expr2) |
299
|
|
|
{ |
300
|
|
|
$operands = []; |
301
|
|
|
$operators = [ |
302
|
|
|
'=' => '===', |
303
|
|
|
'!=' => '!==', |
304
|
|
|
'>' => '>', |
305
|
|
|
'>=' => '>=', |
306
|
|
|
'<' => '<', |
307
|
|
|
'<=' => '<=' |
308
|
|
|
]; |
309
|
|
|
|
310
|
|
|
// If either operand is a number, represent it as a PHP number and replace the identity |
311
|
|
|
// identity operators |
312
|
|
|
foreach ([$expr1, $expr2] as $expr) |
313
|
|
|
{ |
314
|
|
|
if (is_numeric($expr)) |
315
|
|
|
{ |
316
|
|
|
$operators['='] = '=='; |
317
|
|
|
$operators['!='] = '!='; |
318
|
|
|
|
319
|
|
|
$operands[] = preg_replace('(^0(.+))', '$1', $expr); |
320
|
|
|
} |
321
|
|
|
else |
322
|
|
|
{ |
323
|
|
|
$operands[] = $this->convertXPath($expr); |
324
|
|
|
} |
325
|
|
|
} |
326
|
|
|
|
327
|
|
|
return implode($operators[$operator], $operands); |
328
|
|
|
} |
329
|
|
|
|
330
|
|
|
protected function bool($expr1, $operator, $expr2) |
331
|
|
|
{ |
332
|
|
|
$operators = [ |
333
|
|
|
'and' => '&&', |
334
|
|
|
'or' => '||' |
335
|
|
|
]; |
336
|
|
|
|
337
|
|
|
return $this->convertCondition($expr1) . $operators[$operator] . $this->convertCondition($expr2); |
338
|
|
|
} |
339
|
|
|
|
340
|
|
|
protected function parens($expr) |
341
|
|
|
{ |
342
|
|
|
return '(' . $this->convertXPath($expr) . ')'; |
343
|
|
|
} |
344
|
|
|
|
345
|
|
|
protected function translate($str, $from, $to) |
346
|
|
|
{ |
347
|
|
|
preg_match_all('(.)su', substr($from, 1, -1), $matches); |
348
|
|
|
$from = $matches[0]; |
349
|
|
|
|
350
|
|
|
preg_match_all('(.)su', substr($to, 1, -1), $matches); |
351
|
|
|
$to = $matches[0]; |
352
|
|
|
|
353
|
|
|
// We adjust $to to match the number of elements in $from, either by truncating it |
354
|
|
|
// or by padding it with empty strings |
355
|
|
|
if (count($to) > count($from)) |
356
|
|
|
{ |
357
|
|
|
$to = array_slice($to, 0, count($from)); |
358
|
|
|
} |
359
|
|
|
else |
360
|
|
|
{ |
361
|
|
|
// NOTE: we don't use array_merge() because of potential side-effects when |
362
|
|
|
// translating digits |
363
|
|
|
while (count($from) > count($to)) |
364
|
|
|
{ |
365
|
|
|
$to[] = ''; |
366
|
|
|
} |
367
|
|
|
} |
368
|
|
|
|
369
|
|
|
// Remove duplicates in $from, as well as the corresponding elements in $to |
370
|
|
|
$from = array_unique($from); |
371
|
|
|
$to = array_intersect_key($to, $from); |
372
|
|
|
|
373
|
|
|
// Start building the strtr() call |
374
|
|
|
$php = 'strtr(' . $this->convertXPath($str) . ','; |
375
|
|
|
|
376
|
|
|
// Test whether all elements in $from and $to are exactly 1 byte long, meaning they |
377
|
|
|
// are ASCII and with no empty strings. If so, we can use the scalar version of |
378
|
|
|
// strtr(), otherwise we have to use the array version |
379
|
|
|
if ([1] === array_unique(array_map('strlen', $from)) |
380
|
|
|
&& [1] === array_unique(array_map('strlen', $to))) |
381
|
|
|
{ |
382
|
|
|
$php .= var_export(implode('', $from), true) . ',' . var_export(implode('', $to), true); |
383
|
|
|
} |
384
|
|
|
else |
385
|
|
|
{ |
386
|
|
|
$php .= '['; |
387
|
|
|
|
388
|
|
|
$cnt = count($from); |
389
|
|
|
for ($i = 0; $i < $cnt; ++$i) |
390
|
|
|
{ |
391
|
|
|
if ($i) |
392
|
|
|
{ |
393
|
|
|
$php .= ','; |
394
|
|
|
} |
395
|
|
|
|
396
|
|
|
$php .= var_export($from[$i], true) . '=>' . var_export($to[$i], true); |
397
|
|
|
} |
398
|
|
|
|
399
|
|
|
$php .= ']'; |
400
|
|
|
} |
401
|
|
|
|
402
|
|
|
$php .= ')'; |
403
|
|
|
|
404
|
|
|
return $php; |
405
|
|
|
} |
406
|
|
|
|
407
|
|
|
protected function math($expr1, $operator, $expr2) |
408
|
|
|
{ |
409
|
|
|
if (!is_numeric($expr1)) |
410
|
|
|
{ |
411
|
|
|
$expr1 = $this->convertXPath($expr1); |
412
|
|
|
} |
413
|
|
|
|
414
|
|
|
if (!is_numeric($expr2)) |
415
|
|
|
{ |
416
|
|
|
$expr2 = $this->convertXPath($expr2); |
417
|
|
|
} |
418
|
|
|
|
419
|
|
|
if ($operator === 'div') |
420
|
|
|
{ |
421
|
|
|
$operator = '/'; |
422
|
|
|
} |
423
|
|
|
|
424
|
|
|
return $expr1 . $operator . $expr2; |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
/** |
428
|
|
|
* Export an XPath expression as PHP with special consideration for XPath variables |
429
|
|
|
* |
430
|
|
|
* Will return PHP source representing the XPath expression, with special consideration for XPath |
431
|
|
|
* variables which are returned as a method call to XPath::export() |
432
|
|
|
* |
433
|
|
|
* @param string $expr XPath expression |
434
|
|
|
* @return string PHP representation of the expression |
435
|
|
|
*/ |
436
|
|
|
protected function exportXPath($expr) |
437
|
|
|
{ |
438
|
|
|
$phpTokens = []; |
439
|
|
|
foreach ($this->tokenizeXPathForExport($expr) as list($type, $content)) |
440
|
|
|
{ |
441
|
|
|
$methodName = 'exportXPath' . ucfirst($type); |
442
|
|
|
$phpTokens[] = $this->$methodName($content); |
443
|
|
|
} |
444
|
|
|
|
445
|
|
|
return implode('.', $phpTokens); |
446
|
|
|
} |
447
|
|
|
|
448
|
|
|
/** |
449
|
|
|
* Convert a "current()" XPath expression to its PHP source representation |
450
|
|
|
* |
451
|
|
|
* @return string |
452
|
|
|
*/ |
453
|
|
|
protected function exportXPathCurrent() |
454
|
|
|
{ |
455
|
|
|
return '$node->getNodePath()'; |
456
|
|
|
} |
457
|
|
|
|
458
|
|
|
/** |
459
|
|
|
* Convert a fragment of an XPath expression to its PHP source representation |
460
|
|
|
* |
461
|
|
|
* @param string $fragment |
462
|
|
|
* @return string |
463
|
|
|
*/ |
464
|
|
|
protected function exportXPathFragment($fragment) |
465
|
|
|
{ |
466
|
|
|
return var_export($fragment, true); |
467
|
|
|
} |
468
|
|
|
|
469
|
|
|
/** |
470
|
|
|
* Convert an XSLT parameter to its PHP source representation |
471
|
|
|
* |
472
|
|
|
* @param string $param Parameter, including the leading $ |
473
|
|
|
* @return string |
474
|
|
|
*/ |
475
|
|
|
protected function exportXPathParam($param) |
476
|
|
|
{ |
477
|
|
|
$paramName = ltrim($param, '$'); |
478
|
|
|
|
479
|
|
|
return '$this->getParamAsXPath(' . var_export($paramName, true) . ')'; |
480
|
|
|
} |
481
|
|
|
|
482
|
|
|
/** |
483
|
|
|
* Generate a regexp used to parse XPath expressions |
484
|
|
|
* |
485
|
|
|
* @return void |
486
|
|
|
*/ |
487
|
|
|
protected function generateXPathRegexp() |
488
|
|
|
{ |
489
|
|
|
if (isset($this->regexp)) |
490
|
|
|
{ |
491
|
|
|
return; |
492
|
|
|
} |
493
|
|
|
|
494
|
|
|
$patterns = [ |
495
|
|
|
'attr' => ['@', '(?<attr0>[-\\w]+)'], |
496
|
|
|
'dot' => '\\.', |
497
|
|
|
'name' => 'name\\(\\)', |
498
|
|
|
'lname' => 'local-name\\(\\)', |
499
|
|
|
'param' => ['\\$', '(?<param0>\\w+)'], |
500
|
|
|
'string' => '"[^"]*"|\'[^\']*\'', |
501
|
|
|
'number' => ['-?', '\\d++'], |
502
|
|
|
'strlen' => ['string-length', '\\(', '(?<strlen0>(?&value)?)', '\\)'], |
503
|
|
|
'contains' => [ |
504
|
|
|
'contains', |
505
|
|
|
'\\(', |
506
|
|
|
'(?<contains0>(?&value))', |
507
|
|
|
',', |
508
|
|
|
'(?<contains1>(?&value))', |
509
|
|
|
'\\)' |
510
|
|
|
], |
511
|
|
|
'translate' => [ |
512
|
|
|
'translate', |
513
|
|
|
'\\(', |
514
|
|
|
'(?<translate0>(?&value))', |
515
|
|
|
',', |
516
|
|
|
'(?<translate1>(?&string))', |
517
|
|
|
',', |
518
|
|
|
'(?<translate2>(?&string))', |
519
|
|
|
'\\)' |
520
|
|
|
], |
521
|
|
|
'substr' => [ |
522
|
|
|
'substring', |
523
|
|
|
'\\(', |
524
|
|
|
'(?<substr0>(?&value))', |
525
|
|
|
',', |
526
|
|
|
'(?<substr1>(?&value))', |
527
|
|
|
'(?:, (?<substr2>(?&value)))?', |
528
|
|
|
'\\)' |
529
|
|
|
], |
530
|
|
|
'substringafter' => [ |
531
|
|
|
'substring-after', |
532
|
|
|
'\\(', |
533
|
|
|
'(?<substringafter0>(?&value))', |
534
|
|
|
',', |
535
|
|
|
'(?<substringafter1>(?&string))', |
536
|
|
|
'\\)' |
537
|
|
|
], |
538
|
|
|
'substringbefore' => [ |
539
|
|
|
'substring-before', |
540
|
|
|
'\\(', |
541
|
|
|
'(?<substringbefore0>(?&value))', |
542
|
|
|
',', |
543
|
|
|
'(?<substringbefore1>(?&value))', |
544
|
|
|
'\\)' |
545
|
|
|
], |
546
|
|
|
'startswith' => [ |
547
|
|
|
'starts-with', |
548
|
|
|
'\\(', |
549
|
|
|
'(?<startswith0>(?&value))', |
550
|
|
|
',', |
551
|
|
|
'(?<startswith1>(?&value))', |
552
|
|
|
'\\)' |
553
|
|
|
], |
554
|
|
|
'math' => [ |
555
|
|
|
'(?<math0>(?&attr)|(?&number)|(?¶m))', |
556
|
|
|
'(?<math1>[-+*]|div)', |
557
|
|
|
'(?<math2>(?&math)|(?&math0))' |
558
|
|
|
], |
559
|
|
|
'notcontains' => [ |
560
|
|
|
'not', |
561
|
|
|
'\\(', |
562
|
|
|
'contains', |
563
|
|
|
'\\(', |
564
|
|
|
'(?<notcontains0>(?&value))', |
565
|
|
|
',', |
566
|
|
|
'(?<notcontains1>(?&value))', |
567
|
|
|
'\\)', |
568
|
|
|
'\\)' |
569
|
|
|
] |
570
|
|
|
]; |
571
|
|
|
|
572
|
|
|
$exprs = []; |
573
|
|
|
if (version_compare($this->pcreVersion, '8.13', '>=')) |
574
|
|
|
{ |
575
|
|
|
// Create a regexp that matches a comparison such as "@foo = 1" |
576
|
|
|
// NOTE: cannot support < or > because of NaN -- (@foo<5) returns false if @foo='' |
577
|
|
|
$exprs[] = '(?<cmp>(?<cmp0>(?&value)) (?<cmp1>!?=) (?<cmp2>(?&value)))'; |
578
|
|
|
|
579
|
|
|
// Create a regexp that matches a parenthesized expression |
580
|
|
|
// NOTE: could be expanded to support any expression |
581
|
|
|
$exprs[] = '(?<parens>\\( (?<parens0>(?&bool)|(?&cmp)|(?&math)) \\))'; |
582
|
|
|
|
583
|
|
|
// Create a regexp that matches boolean operations |
584
|
|
|
$exprs[] = '(?<bool>(?<bool0>(?&cmp)|(?¬)|(?&value)|(?&parens)) (?<bool1>and|or) (?<bool2>(?&bool)|(?&cmp)|(?¬)|(?&value)|(?&parens)))'; |
585
|
|
|
|
586
|
|
|
// Create a regexp that matches not() expressions |
587
|
|
|
$exprs[] = '(?<not>not \\( (?<not0>(?&bool)|(?&value)) \\))'; |
588
|
|
|
|
589
|
|
|
// Modify the math pattern to accept parenthesized expressions |
590
|
|
|
$patterns['math'][0] = str_replace('))', ')|(?&parens))', $patterns['math'][0]); |
591
|
|
|
$patterns['math'][1] = str_replace('))', ')|(?&parens))', $patterns['math'][1]); |
592
|
|
|
} |
593
|
|
|
|
594
|
|
|
// Create a regexp that matches values, such as "@foo" or "42" |
595
|
|
|
$valueExprs = []; |
596
|
|
|
foreach ($patterns as $name => $pattern) |
597
|
|
|
{ |
598
|
|
|
if (is_array($pattern)) |
599
|
|
|
{ |
600
|
|
|
$pattern = implode(' ', $pattern); |
601
|
|
|
} |
602
|
|
|
|
603
|
|
|
if (strpos($pattern, '?&') === false || version_compare($this->pcreVersion, '8.13', '>=')) |
604
|
|
|
{ |
605
|
|
|
$valueExprs[] = '(?<' . $name . '>' . $pattern . ')'; |
606
|
|
|
} |
607
|
|
|
} |
608
|
|
|
array_unshift($exprs, '(?<value>' . implode('|', $valueExprs) . ')'); |
609
|
|
|
|
610
|
|
|
|
611
|
|
|
// Assemble the final regexp |
612
|
|
|
$regexp = '#^(?:' . implode('|', $exprs) . ')$#S'; |
613
|
|
|
|
614
|
|
|
// Replace spaces with any amount of whitespace |
615
|
|
|
$regexp = str_replace(' ', '\\s*', $regexp); |
616
|
|
|
|
617
|
|
|
$this->regexp = $regexp; |
618
|
|
|
} |
619
|
|
|
|
620
|
|
|
/** |
621
|
|
|
* Match the relevant components of an XPath expression |
622
|
|
|
* |
623
|
|
|
* @param string $expr XPath expression |
624
|
|
|
* @return array |
625
|
|
|
*/ |
626
|
|
|
protected function matchXPathForExport($expr) |
627
|
|
|
{ |
628
|
|
|
$tokenExprs = [ |
629
|
|
|
'(?<current>\\bcurrent\\(\\))', |
630
|
|
|
'(?<param>\\$\\w+)', |
631
|
|
|
'(?<fragment>"[^"]*"|\'[^\']*\'|.)' |
632
|
|
|
]; |
633
|
|
|
preg_match_all('(' . implode('|', $tokenExprs) . ')s', $expr, $matches, PREG_SET_ORDER); |
634
|
|
|
|
635
|
|
|
// Merge fragment tokens |
636
|
|
|
$i = 0; |
637
|
|
|
$max = count($matches) - 2; |
638
|
|
|
while ($i <= $max) |
639
|
|
|
{ |
640
|
|
|
if (!isset($matches[$i]['fragment'])) |
641
|
|
|
{ |
642
|
|
|
++$i; |
643
|
|
|
continue; |
644
|
|
|
} |
645
|
|
|
|
646
|
|
|
$j = $i; |
647
|
|
|
while (isset($matches[++$j]['fragment'])) |
648
|
|
|
{ |
649
|
|
|
$matches[$i]['fragment'] .= $matches[$j]['fragment']; |
650
|
|
|
unset($matches[$j]); |
651
|
|
|
} |
652
|
|
|
$i = $j; |
653
|
|
|
} |
654
|
|
|
|
655
|
|
|
return array_values($matches); |
656
|
|
|
} |
657
|
|
|
|
658
|
|
|
/** |
659
|
|
|
* Tokenize an XPath expression for use in PHP |
660
|
|
|
* |
661
|
|
|
* @param string $expr XPath expression |
662
|
|
|
* @return array |
663
|
|
|
*/ |
664
|
|
|
protected function tokenizeXPathForExport($expr) |
665
|
|
|
{ |
666
|
|
|
$tokens = []; |
667
|
|
|
foreach ($this->matchXPathForExport($expr) as $match) |
668
|
|
|
{ |
669
|
|
|
foreach (array_reverse($match) as $k => $v) |
670
|
|
|
{ |
671
|
|
|
// Use the last non-numeric match |
672
|
|
|
if (!is_numeric($k)) |
673
|
|
|
{ |
674
|
|
|
$tokens[] = [$k, $v]; |
675
|
|
|
break; |
676
|
|
|
} |
677
|
|
|
} |
678
|
|
|
} |
679
|
|
|
|
680
|
|
|
return $tokens; |
681
|
|
|
} |
682
|
|
|
} |