1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* @package s9e\TextFormatter |
5
|
|
|
* @copyright Copyright (c) 2010-2018 The s9e Authors |
6
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php The MIT License |
7
|
|
|
*/ |
8
|
|
|
namespace s9e\TextFormatter\Plugins\Preg; |
9
|
|
|
|
10
|
|
|
use DOMAttr; |
11
|
|
|
use DOMText; |
12
|
|
|
use DOMXPath; |
13
|
|
|
use Exception; |
14
|
|
|
use InvalidArgumentException; |
15
|
|
|
use s9e\TextFormatter\Configurator\Helpers\RegexpParser; |
16
|
|
|
use s9e\TextFormatter\Configurator\Helpers\TemplateHelper; |
17
|
|
|
use s9e\TextFormatter\Configurator\Items\Regexp; |
18
|
|
|
use s9e\TextFormatter\Configurator\Items\Tag; |
19
|
|
|
use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor; |
20
|
|
|
use s9e\TextFormatter\Configurator\Validators\TagName; |
21
|
|
|
use s9e\TextFormatter\Plugins\ConfiguratorBase; |
22
|
|
|
|
23
|
|
|
class Configurator extends ConfiguratorBase |
24
|
|
|
{ |
25
|
|
|
/** |
26
|
|
|
* @var array[] Captures from current regexp |
27
|
|
|
*/ |
28
|
|
|
protected $captures; |
29
|
|
|
|
30
|
|
|
/** |
31
|
|
|
* @var array[] List of [tagName, regexp, passthroughIdx] |
32
|
|
|
*/ |
33
|
|
|
protected $collection = []; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @var string Delimiter used in current regexp |
37
|
|
|
*/ |
38
|
|
|
protected $delimiter; |
39
|
|
|
|
40
|
|
|
/** |
41
|
|
|
* @var string Non-D modifiers used in current regexp |
42
|
|
|
*/ |
43
|
|
|
protected $modifiers; |
44
|
|
|
|
45
|
|
|
/** |
46
|
|
|
* @var array References used in current template |
47
|
|
|
*/ |
48
|
|
|
protected $references; |
49
|
|
|
|
50
|
|
|
/** |
51
|
|
|
* @var string Regexp used to find references in the templates. We check that the reference is |
52
|
|
|
* not preceded with an odd number of backslashes |
53
|
|
|
*/ |
54
|
|
|
protected $referencesRegexp = '((?<!\\\\)(?:\\\\\\\\)*\\K(?:[$\\\\]\\d+|\\$\\{\\d+\\}))S'; |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* {@inheritdoc} |
58
|
|
|
*/ |
59
|
6 |
|
public function asConfig() |
60
|
|
|
{ |
61
|
6 |
|
if (!count($this->collection)) |
62
|
|
|
{ |
63
|
1 |
|
return; |
64
|
|
|
} |
65
|
|
|
|
66
|
5 |
|
$pregs = []; |
67
|
5 |
|
foreach ($this->collection as list($tagName, $regexp, $passthroughIdx)) |
68
|
|
|
{ |
69
|
5 |
|
$captures = RegexpParser::getCaptureNames($regexp); |
70
|
5 |
|
$pregs[] = [$tagName, new Regexp($regexp, true), $passthroughIdx, $captures]; |
71
|
|
|
} |
72
|
|
|
|
73
|
5 |
|
return ['generics' => $pregs]; |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
/** |
77
|
|
|
* {@inheritdoc} |
78
|
|
|
*/ |
79
|
2 |
|
public function getJSHints() |
80
|
|
|
{ |
81
|
2 |
|
$hasPassthrough = false; |
82
|
2 |
|
foreach ($this->collection as list($tagName, $regexp, $passthroughIdx)) |
83
|
|
|
{ |
84
|
2 |
|
if ($passthroughIdx) |
85
|
|
|
{ |
86
|
1 |
|
$hasPassthrough = true; |
87
|
2 |
|
break; |
88
|
|
|
} |
89
|
|
|
} |
90
|
|
|
|
91
|
2 |
|
return ['PREG_HAS_PASSTHROUGH' => $hasPassthrough]; |
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
/** |
95
|
|
|
* Configure a pattern-based match |
96
|
|
|
* |
97
|
|
|
* @param string $regexp Regexp to be used by the parser |
98
|
|
|
* @param string $tagName Name of the tag that holds the matched text |
99
|
|
|
* @return void |
100
|
|
|
*/ |
101
|
1 |
|
public function match($regexp, $tagName) |
102
|
|
|
{ |
103
|
1 |
|
$tagName = TagName::normalize($tagName); |
104
|
1 |
|
$passthroughIdx = 0; |
105
|
1 |
|
$this->parseRegexp($regexp); |
106
|
1 |
|
foreach ($this->captures as $i => $capture) |
107
|
|
|
{ |
108
|
1 |
|
if (!$this->isCatchAll($capture['expr'])) |
109
|
|
|
{ |
110
|
1 |
|
continue; |
111
|
|
|
} |
112
|
1 |
|
$passthroughIdx = $i; |
113
|
|
|
} |
114
|
|
|
|
115
|
1 |
|
$this->collection[] = [$tagName, $regexp, $passthroughIdx]; |
116
|
1 |
|
} |
117
|
|
|
|
118
|
|
|
/** |
119
|
|
|
* Configure a pattern-based replacement |
120
|
|
|
* |
121
|
|
|
* @param string $regexp Regexp to be used by the parser |
122
|
|
|
* @param string $template Template to be used for rendering |
123
|
|
|
* @param string $tagName Name of the tag to create. A name based on the regexp is |
124
|
|
|
* automatically generated if none is provided |
125
|
|
|
* @return Tag The tag created to represent this replacement |
126
|
|
|
*/ |
127
|
34 |
|
public function replace($regexp, $template, $tagName = null) |
128
|
|
|
{ |
129
|
34 |
|
if (!isset($tagName)) |
130
|
|
|
{ |
131
|
31 |
|
$tagName = 'PREG_' . strtoupper(dechex(crc32($regexp))); |
132
|
|
|
} |
133
|
34 |
|
$this->parseRegexp($regexp); |
134
|
33 |
|
$this->parseTemplate($template); |
135
|
|
|
|
136
|
33 |
|
$passthroughIdx = $this->getPassthroughCapture(); |
137
|
33 |
|
if ($passthroughIdx) |
138
|
|
|
{ |
139
|
6 |
|
$this->captures[$passthroughIdx]['passthrough'] = true; |
140
|
|
|
} |
141
|
|
|
|
142
|
33 |
|
$regexp = $this->fixUnnamedCaptures($regexp); |
143
|
33 |
|
$template = $this->convertTemplate($template, $passthroughIdx); |
144
|
|
|
|
145
|
33 |
|
$this->collection[] = [$tagName, $regexp, $passthroughIdx]; |
146
|
|
|
|
147
|
33 |
|
return $this->createTag($tagName, $template); |
148
|
|
|
} |
149
|
|
|
|
150
|
|
|
/** |
151
|
|
|
* Add given attribute to given tag based on parsed captures |
152
|
|
|
* |
153
|
|
|
* @param Tag $tag |
154
|
|
|
* @param string $attrName |
155
|
|
|
* @return void |
156
|
|
|
*/ |
157
|
27 |
|
protected function addAttribute(Tag $tag, $attrName) |
158
|
|
|
{ |
159
|
27 |
|
$isUrl = false; |
160
|
27 |
|
$exprs = []; |
161
|
27 |
|
foreach ($this->captures as $key => $capture) |
162
|
|
|
{ |
163
|
27 |
|
if ($capture['name'] !== $attrName) |
164
|
|
|
{ |
165
|
27 |
|
continue; |
166
|
|
|
} |
167
|
27 |
|
$exprs[] = $capture['expr']; |
168
|
27 |
|
if (isset($this->references['asUrl'][$key])) |
169
|
|
|
{ |
170
|
27 |
|
$isUrl = true; |
171
|
|
|
} |
172
|
|
|
} |
173
|
27 |
|
$exprs = array_unique($exprs); |
174
|
|
|
|
175
|
27 |
|
$regexp = $this->delimiter . '^'; |
176
|
27 |
|
$regexp .= (count($exprs) === 1) ? $exprs[0] : '(?:' . implode('|', $exprs) . ')'; |
177
|
27 |
|
$regexp .= '$' . $this->delimiter . 'D' . $this->modifiers; |
178
|
|
|
|
179
|
27 |
|
$attribute = $tag->attributes->add($attrName); |
180
|
|
|
|
181
|
27 |
|
$filter = $this->configurator->attributeFilters['#regexp']; |
182
|
27 |
|
$filter->setRegexp($regexp); |
183
|
27 |
|
$attribute->filterChain[] = $filter; |
184
|
|
|
|
185
|
27 |
|
if ($isUrl) |
186
|
|
|
{ |
187
|
3 |
|
$filter = $this->configurator->attributeFilters['#url']; |
188
|
3 |
|
$attribute->filterChain[] = $filter; |
189
|
|
|
} |
190
|
27 |
|
} |
191
|
|
|
|
192
|
|
|
/** |
193
|
|
|
* Convert a preg-style replacement to a template |
194
|
|
|
* |
195
|
|
|
* @param string $template Original template |
196
|
|
|
* @param integer $passthroughIdx Index of the passthrough capture |
197
|
|
|
* @return string Modified template |
198
|
|
|
*/ |
199
|
33 |
|
protected function convertTemplate($template, $passthroughIdx) |
200
|
|
|
{ |
201
|
|
|
// Replace numeric references in the template with the value of the corresponding attribute |
202
|
|
|
// values or passthrough |
203
|
33 |
|
$template = TemplateHelper::replaceTokens( |
204
|
33 |
|
$template, |
205
|
33 |
|
$this->referencesRegexp, |
206
|
33 |
|
function ($m, $node) use ($passthroughIdx) |
207
|
|
|
{ |
208
|
22 |
|
$key = (int) trim($m[0], '\\${}'); |
209
|
22 |
|
if ($key === 0) |
210
|
|
|
{ |
211
|
|
|
// $0 copies the whole textContent |
212
|
1 |
|
return ['expression', '.']; |
213
|
|
|
} |
214
|
22 |
|
if ($key === $passthroughIdx && $node instanceof DOMText) |
215
|
|
|
{ |
216
|
|
|
// Passthrough capture, does not include start/end tags |
217
|
6 |
|
return ['passthrough']; |
218
|
|
|
} |
219
|
18 |
|
if (isset($this->captures[$key]['name'])) |
220
|
|
|
{ |
221
|
|
|
// Normal capture, replaced by the equivalent expression |
222
|
17 |
|
return ['expression', '@' . $this->captures[$key]['name']]; |
223
|
|
|
} |
224
|
|
|
|
225
|
|
|
// Non-existent captures are simply ignored, similarly to preg_replace() |
226
|
1 |
|
return ['literal', '']; |
227
|
33 |
|
} |
228
|
|
|
); |
229
|
|
|
|
230
|
|
|
// Unescape backslashes and special characters in the template |
231
|
33 |
|
$template = TemplateHelper::replaceTokens( |
232
|
33 |
|
$template, |
233
|
33 |
|
'(\\\\+[0-9${\\\\])', |
234
|
33 |
|
function ($m) |
235
|
|
|
{ |
236
|
3 |
|
return ['literal', stripslashes($m[0])]; |
237
|
33 |
|
} |
238
|
|
|
); |
239
|
|
|
|
240
|
33 |
|
return $template; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
/** |
244
|
|
|
* Create the tag that matches current regexp |
245
|
|
|
* |
246
|
|
|
* @param string $tagName |
247
|
|
|
* @param string $template |
248
|
|
|
* @return Tag |
249
|
|
|
*/ |
250
|
33 |
|
protected function createTag($tagName, $template) |
251
|
|
|
{ |
252
|
33 |
|
$tag = new Tag; |
253
|
33 |
|
foreach ($this->captures as $key => $capture) |
254
|
|
|
{ |
255
|
33 |
|
if (!isset($capture['name'])) |
256
|
|
|
{ |
257
|
33 |
|
continue; |
258
|
|
|
} |
259
|
|
|
|
260
|
27 |
|
$attrName = $capture['name']; |
261
|
27 |
|
if (isset($tag->attributes[$attrName])) |
262
|
|
|
{ |
263
|
1 |
|
continue; |
264
|
|
|
} |
265
|
|
|
|
266
|
27 |
|
$this->addAttribute($tag, $attrName); |
267
|
|
|
} |
268
|
33 |
|
$tag->template = $template; |
269
|
|
|
|
270
|
|
|
// Normalize the tag's template |
271
|
33 |
|
$this->configurator->templateNormalizer->normalizeTag($tag); |
272
|
|
|
|
273
|
|
|
// Check the safeness of this tag |
274
|
33 |
|
$this->configurator->templateChecker->checkTag($tag); |
275
|
|
|
|
276
|
31 |
|
return $this->configurator->tags->add($tagName, $tag); |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
/** |
280
|
|
|
* Give a name to unnamed captures that are referenced in current replacement |
281
|
|
|
* |
282
|
|
|
* @param string $regexp Original regexp |
283
|
|
|
* @return string Modified regexp |
284
|
|
|
*/ |
285
|
33 |
|
protected function fixUnnamedCaptures($regexp) |
286
|
|
|
{ |
287
|
33 |
|
$keys = []; |
288
|
33 |
|
foreach ($this->references['anywhere'] as $key) |
289
|
|
|
{ |
290
|
21 |
|
$capture = $this->captures[$key]; |
291
|
21 |
|
if (!$key || isset($capture['name'])) |
292
|
|
|
{ |
293
|
2 |
|
continue; |
294
|
|
|
} |
295
|
|
|
// Give the capture a name if it's used as URL or it's not a passthrough |
296
|
20 |
|
if (isset($this->references['asUrl'][$key]) || !isset($capture['passthrough'])) |
297
|
|
|
{ |
298
|
20 |
|
$keys[] = $key; |
299
|
|
|
} |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
// Alter the original regexp to inject the subpatterns' names. The position is equal to the |
303
|
|
|
// subpattern's position plus 2, to account for the delimiter at the start of the regexp and |
304
|
|
|
// the opening parenthesis of the subpattern. Also, we need to process them in reverse order |
305
|
|
|
// so that replacements don't change the position of subsequent subpatterns |
306
|
33 |
|
rsort($keys); |
307
|
33 |
|
foreach ($keys as $key) |
308
|
|
|
{ |
309
|
16 |
|
$name = '_' . $key; |
310
|
16 |
|
$pos = $this->captures[$key]['pos']; |
311
|
16 |
|
$regexp = substr_replace($regexp, "?'" . $name . "'", 2 + $pos, 0); |
312
|
16 |
|
$this->captures[$key]['name'] = $name; |
313
|
|
|
} |
314
|
|
|
|
315
|
33 |
|
return $regexp; |
316
|
|
|
} |
317
|
|
|
|
318
|
|
|
/** |
319
|
|
|
* Get the index of the capture used for passthrough in current replacement |
320
|
|
|
* |
321
|
|
|
* @return integer |
322
|
|
|
*/ |
323
|
33 |
|
protected function getPassthroughCapture() |
324
|
|
|
{ |
325
|
33 |
|
$passthrough = 0; |
326
|
33 |
|
foreach ($this->references['inText'] as $key) |
327
|
|
|
{ |
328
|
21 |
|
if (!$this->isCatchAll($this->captures[$key]['expr'])) |
329
|
|
|
{ |
330
|
|
|
// Ignore if it's not a catch-all expression such as .*? |
331
|
14 |
|
continue; |
332
|
|
|
} |
333
|
7 |
|
if ($passthrough) |
334
|
|
|
{ |
335
|
|
|
// Abort if there's more than 1 possible passthrough |
336
|
1 |
|
$passthrough = 0; |
337
|
1 |
|
break; |
338
|
|
|
} |
339
|
7 |
|
$passthrough = (int) $key; |
340
|
|
|
} |
341
|
|
|
|
342
|
33 |
|
return $passthrough; |
343
|
|
|
} |
344
|
|
|
|
345
|
|
|
/** |
346
|
|
|
* Parse a regexp and return its info |
347
|
|
|
* |
348
|
|
|
* @param string $regexp |
349
|
|
|
* @return array |
350
|
|
|
*/ |
351
|
35 |
|
protected function getRegexpInfo($regexp) |
352
|
|
|
{ |
353
|
35 |
|
if (@preg_match_all($regexp, '') === false) |
354
|
|
|
{ |
355
|
1 |
|
throw new InvalidArgumentException('Invalid regexp'); |
356
|
|
|
} |
357
|
|
|
|
358
|
34 |
|
return RegexpParser::parse($regexp); |
359
|
|
|
} |
360
|
|
|
|
361
|
|
|
/** |
362
|
|
|
* Test whether given expression is a catch-all expression such as .*? |
363
|
|
|
* |
364
|
|
|
* @param string $expr Subpattern |
365
|
|
|
* @return bool |
366
|
|
|
*/ |
367
|
22 |
|
protected function isCatchAll($expr) |
368
|
|
|
{ |
369
|
22 |
|
return (bool) preg_match('(^\\.[*+]\\??$)D', $expr); |
370
|
|
|
} |
371
|
|
|
|
372
|
|
|
/** |
373
|
|
|
* Parse given regexp and store its information |
374
|
|
|
* |
375
|
|
|
* @param string $regexp |
376
|
|
|
* @return void |
377
|
|
|
*/ |
378
|
35 |
|
protected function parseRegexp($regexp) |
379
|
|
|
{ |
380
|
35 |
|
$this->captures = [['name' => null, 'expr' => null]]; |
381
|
35 |
|
$regexpInfo = $this->getRegexpInfo($regexp); |
382
|
34 |
|
$this->delimiter = $regexpInfo['delimiter']; |
383
|
34 |
|
$this->modifiers = str_replace('D', '', $regexpInfo['modifiers']); |
384
|
34 |
|
foreach ($regexpInfo['tokens'] as $token) |
385
|
|
|
{ |
386
|
33 |
|
if ($token['type'] !== 'capturingSubpatternStart') |
387
|
|
|
{ |
388
|
33 |
|
continue; |
389
|
|
|
} |
390
|
32 |
|
$this->captures[] = [ |
391
|
32 |
|
'pos' => $token['pos'], |
392
|
32 |
|
'name' => (isset($token['name'])) ? $token['name'] : null, |
393
|
32 |
|
'expr' => $token['content'] |
394
|
|
|
]; |
395
|
|
|
} |
396
|
34 |
|
} |
397
|
|
|
|
398
|
|
|
/** |
399
|
|
|
* Parse given template and store the references it contains |
400
|
|
|
* |
401
|
|
|
* @param string $template |
402
|
|
|
* @return void |
403
|
|
|
*/ |
404
|
33 |
|
protected function parseTemplate($template) |
405
|
|
|
{ |
406
|
33 |
|
$this->references = [ |
407
|
|
|
'anywhere' => [], |
408
|
|
|
'asUrl' => [], |
409
|
|
|
'inText' => [] |
410
|
|
|
]; |
411
|
|
|
|
412
|
33 |
|
preg_match_all($this->referencesRegexp, $template, $matches); |
413
|
33 |
|
foreach ($matches[0] as $match) |
414
|
|
|
{ |
415
|
22 |
|
$key = trim($match, '\\${}'); |
416
|
22 |
|
$this->references['anywhere'][$key] = $key; |
417
|
|
|
} |
418
|
|
|
|
419
|
33 |
|
$dom = TemplateHelper::loadTemplate($template); |
420
|
33 |
|
$xpath = new DOMXPath($dom); |
421
|
33 |
|
foreach ($xpath->query('//text()') as $node) |
422
|
|
|
{ |
423
|
22 |
|
preg_match_all($this->referencesRegexp, $node->textContent, $matches); |
424
|
22 |
|
foreach ($matches[0] as $match) |
425
|
|
|
{ |
426
|
22 |
|
$key = trim($match, '\\${}'); |
427
|
22 |
|
$this->references['inText'][$key] = $key; |
428
|
|
|
} |
429
|
|
|
} |
430
|
|
|
|
431
|
33 |
|
foreach (TemplateHelper::getURLNodes($dom) as $node) |
432
|
|
|
{ |
433
|
|
|
// We only bother with literal attributes that start with a capture |
434
|
4 |
|
if ($node instanceof DOMAttr |
435
|
4 |
|
&& preg_match('(^(?:[$\\\\]\\d+|\\$\\{\\d+\\}))', trim($node->value), $m)) |
436
|
|
|
{ |
437
|
3 |
|
$key = trim($m[0], '\\${}'); |
438
|
4 |
|
$this->references['asUrl'][$key] = $key; |
439
|
|
|
} |
440
|
|
|
} |
441
|
|
|
|
442
|
33 |
|
$this->removeUnknownReferences(); |
443
|
33 |
|
} |
444
|
|
|
|
445
|
|
|
/** |
446
|
|
|
* Remove references that do not correspond to an existing capture |
447
|
|
|
* |
448
|
|
|
* @return void |
449
|
|
|
*/ |
450
|
33 |
|
protected function removeUnknownReferences() |
451
|
|
|
{ |
452
|
33 |
|
foreach ($this->references as &$references) |
453
|
|
|
{ |
454
|
33 |
|
$references = array_intersect_key($references, $this->captures); |
455
|
|
|
} |
456
|
|
|
} |
457
|
|
|
} |