Configurator::getRegexpInfo()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 3
dl 0
loc 8
ccs 4
cts 4
cp 1
rs 10
c 0
b 0
f 0
cc 2
nc 2
nop 1
crap 2
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Preg;
9
10
use DOMAttr;
11
use DOMText;
12
use DOMXPath;
13
use Exception;
14
use InvalidArgumentException;
15
use s9e\TextFormatter\Configurator\Helpers\NodeLocator;
16
use s9e\TextFormatter\Configurator\Helpers\RegexpParser;
17
use s9e\TextFormatter\Configurator\Helpers\TemplateLoader;
18
use s9e\TextFormatter\Configurator\Helpers\TemplateModifier;
19
use s9e\TextFormatter\Configurator\Items\Regexp;
20
use s9e\TextFormatter\Configurator\Items\Tag;
21
use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor;
22
use s9e\TextFormatter\Configurator\Validators\TagName;
23
use s9e\TextFormatter\Plugins\ConfiguratorBase;
24
25
class Configurator extends ConfiguratorBase
26
{
27
	/**
28
	* @var array[] Captures from current regexp
29
	*/
30
	protected $captures;
31
32
	/**
33
	* @var array[] List of [tagName, regexp, passthroughIdx]
34
	*/
35
	protected $collection = [];
36
37
	/**
38
	* @var string Delimiter used in current regexp
39
	*/
40
	protected $delimiter;
41
42
	/**
43
	* @var string Non-D modifiers used in current regexp
44
	*/
45
	protected $modifiers;
46
47
	/**
48
	* @var array References used in current template
49
	*/
50
	protected $references;
51
52
	/**
53
	* @var string Regexp used to find references in the templates. We check that the reference is
54
	*             not preceded with an odd number of backslashes
55
	*/
56
	protected $referencesRegexp = '((?<!\\\\)(?:\\\\\\\\)*\\K(?:[$\\\\]\\d+|\\$\\{\\d+\\}))S';
57
58
	/**
59
	* {@inheritdoc}
60
	*/
61 6
	public function asConfig()
62
	{
63 6
		if (!count($this->collection))
64
		{
65 1
			return;
66
		}
67
68 5
		$pregs = [];
69 5
		foreach ($this->collection as list($tagName, $regexp, $passthroughIdx))
70
		{
71 5
			$captures = RegexpParser::getCaptureNames($regexp);
72 5
			$pregs[]  = [$tagName, new Regexp($regexp, true), $passthroughIdx, $captures];
73
		}
74
75 5
		return ['generics' => $pregs];
76
	}
77
78
	/**
79
	* {@inheritdoc}
80
	*/
81 2
	public function getJSHints()
82
	{
83 2
		$hasPassthrough = false;
84 2
		foreach ($this->collection as list($tagName, $regexp, $passthroughIdx))
85
		{
86 2
			if ($passthroughIdx)
87
			{
88 1
				$hasPassthrough = true;
89 1
				break;
90
			}
91
		}
92
93 2
		return ['PREG_HAS_PASSTHROUGH' => $hasPassthrough];
94
	}
95
96
	/**
97
	* Configure a pattern-based match
98
	*
99
	* @param  string $regexp   Regexp to be used by the parser
100
	* @param  string $tagName  Name of the tag that holds the matched text
101
	* @return void
102
	*/
103 1
	public function match($regexp, $tagName)
104
	{
105 1
		$tagName        = TagName::normalize($tagName);
106 1
		$passthroughIdx = 0;
107 1
		$this->parseRegexp($regexp);
108 1
		foreach ($this->captures as $i => $capture)
109
		{
110 1
			if (!$this->isCatchAll((string) $capture['expr']))
111
			{
112 1
				continue;
113
			}
114 1
			$passthroughIdx = $i;
115
		}
116
117 1
		$this->collection[] = [$tagName, $regexp, $passthroughIdx];
118
	}
119
120
	/**
121
	* Configure a pattern-based replacement
122
	*
123
	* @param  string $regexp   Regexp to be used by the parser
124
	* @param  string $template Template to be used for rendering
125
	* @param  string $tagName  Name of the tag to create. A name based on the regexp is
126
	*                          automatically generated if none is provided
127
	* @return Tag              The tag created to represent this replacement
128
	*/
129 34
	public function replace($regexp, $template, $tagName = null)
130
	{
131 34
		if (!isset($tagName))
132
		{
133 31
			$tagName = 'PREG_' . strtoupper(dechex(crc32($regexp)));
134
		}
135 34
		$this->parseRegexp($regexp);
136 33
		$this->parseTemplate($template);
137
138 33
		$passthroughIdx = $this->getPassthroughCapture();
139 33
		if ($passthroughIdx)
140
		{
141 6
			$this->captures[$passthroughIdx]['passthrough'] = true;
142
		}
143
144 33
		$regexp   = $this->fixUnnamedCaptures($regexp);
145 33
		$template = $this->convertTemplate($template, $passthroughIdx);
146
147 33
		$this->collection[] = [$tagName, $regexp, $passthroughIdx];
148
149 33
		return $this->createTag($tagName, $template);
150
	}
151
152
	/**
153
	* Add given attribute to given tag based on parsed captures
154
	*
155
	* @param  Tag    $tag
156
	* @param  string $attrName
157
	* @return void
158
	*/
159 27
	protected function addAttribute(Tag $tag, $attrName)
160
	{
161 27
		$isUrl = false;
162 27
		$exprs = [];
163 27
		foreach ($this->captures as $key => $capture)
164
		{
165 27
			if ($capture['name'] !== $attrName)
166
			{
167 27
				continue;
168
			}
169 27
			$exprs[] = $capture['expr'];
170 27
			if (isset($this->references['asUrl'][$key]))
171
			{
172 3
				$isUrl = true;
173
			}
174
		}
175 27
		$exprs = array_unique($exprs);
176
177 27
		$regexp = $this->delimiter . '^';
178 27
		$regexp .= (count($exprs) === 1) ? $exprs[0] : '(?:' . implode('|', $exprs) . ')';
179 27
		$regexp .= '$' . $this->delimiter . 'D' . $this->modifiers;
180
181 27
		$attribute = $tag->attributes->add($attrName);
182
183 27
		$filter = $this->configurator->attributeFilters['#regexp'];
184 27
		$filter->setRegexp($regexp);
185 27
		$attribute->filterChain[] = $filter;
186
187 27
		if ($isUrl)
188
		{
189 3
			$filter = $this->configurator->attributeFilters['#url'];
190 3
			$attribute->filterChain[] = $filter;
191
		}
192
	}
193
194
	/**
195
	* Convert a preg-style replacement to a template
196
	*
197
	* @param  string  $template       Original template
198
	* @param  integer $passthroughIdx Index of the passthrough capture
199
	* @return string                  Modified template
200
	*/
201 33
	protected function convertTemplate($template, $passthroughIdx)
202
	{
203
		// Replace numeric references in the template with the value of the corresponding attribute
204
		// values or passthrough
205 33
		$template = TemplateModifier::replaceTokens(
206 33
			$template,
207 33
			$this->referencesRegexp,
208 33
			function ($m, $node) use ($passthroughIdx)
209
			{
210 22
				$key = (int) trim($m[0], '\\${}');
211 22
				if ($key === 0)
212
				{
213
					// $0 copies the whole textContent
214 1
					return ['expression', '.'];
215
				}
216 22
				if ($key === $passthroughIdx && $node instanceof DOMText)
217
				{
218
					// Passthrough capture, does not include start/end tags
219 6
					return ['passthrough'];
220
				}
221 18
				if (isset($this->captures[$key]['name']))
222
				{
223
					// Normal capture, replaced by the equivalent expression
224 17
					return ['expression', '@' . $this->captures[$key]['name']];
225
				}
226
227
				// Non-existent captures are simply ignored, similarly to preg_replace()
228 1
				return ['literal', ''];
229 33
			}
230
		);
231
232
		// Unescape backslashes and special characters in the template
233 33
		$template = TemplateModifier::replaceTokens(
234 33
			$template,
235 33
			'(\\\\+[0-9${\\\\])',
236 33
			function ($m)
237
			{
238 3
				return ['literal', stripslashes($m[0])];
239 33
			}
240
		);
241
242 33
		return $template;
243
	}
244
245
	/**
246
	* Create the tag that matches current regexp
247
	*
248
	* @param  string $tagName
249
	* @param  string $template
250
	* @return Tag
251
	*/
252 33
	protected function createTag($tagName, $template)
253
	{
254 33
		$tag = new Tag;
255 33
		foreach ($this->captures as $key => $capture)
256
		{
257 33
			if (!isset($capture['name']))
258
			{
259 33
				continue;
260
			}
261
262 27
			$attrName = $capture['name'];
263 27
			if (isset($tag->attributes[$attrName]))
264
			{
265 1
				continue;
266
			}
267
268 27
			$this->addAttribute($tag, $attrName);
269
		}
270 33
		$tag->template = $template;
271
272
		// Normalize the tag's template
273 33
		$this->configurator->templateNormalizer->normalizeTag($tag);
274
275
		// Check the safeness of this tag
276 33
		$this->configurator->templateChecker->checkTag($tag);
277
278 31
		return $this->configurator->tags->add($tagName, $tag);
279
	}
280
281
	/**
282
	* Give a name to unnamed captures that are referenced in current replacement
283
	*
284
	* @param  string $regexp Original regexp
285
	* @return string         Modified regexp
286
	*/
287 33
	protected function fixUnnamedCaptures($regexp)
288
	{
289 33
		$keys = [];
290 33
		foreach ($this->references['anywhere'] as $key)
291
		{
292 21
			$capture = $this->captures[$key];
293 21
			if (!$key || isset($capture['name']))
294
			{
295 2
				continue;
296
			}
297
			// Give the capture a name if it's used as URL or it's not a passthrough
298 20
			if (isset($this->references['asUrl'][$key]) || !isset($capture['passthrough']))
299
			{
300 16
				$keys[] = $key;
301
			}
302
		}
303
304
		// Alter the original regexp to inject the subpatterns' names. The position is equal to the
305
		// subpattern's position plus 2, to account for the delimiter at the start of the regexp and
306
		// the opening parenthesis of the subpattern. Also, we need to process them in reverse order
307
		// so that replacements don't change the position of subsequent subpatterns
308 33
		rsort($keys);
309 33
		foreach ($keys as $key)
310
		{
311 16
			$name   = '_' . $key;
312 16
			$pos    = $this->captures[$key]['pos'];
313 16
			$regexp = substr_replace($regexp, "?'" . $name . "'", 2 + $pos, 0);
314 16
			$this->captures[$key]['name'] = $name;
315
		}
316
317 33
		return $regexp;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $regexp also could return the type array which is incompatible with the documented return type string.
Loading history...
318
	}
319
320
	/**
321
	* Get the index of the capture used for passthrough in current replacement
322
	*
323
	* @return integer
324
	*/
325 33
	protected function getPassthroughCapture()
326
	{
327 33
		$passthrough = 0;
328 33
		foreach ($this->references['inText'] as $key)
329
		{
330 21
			if (!$this->isCatchAll((string) $this->captures[$key]['expr']))
331
			{
332
				// Ignore if it's not a catch-all expression such as .*?
333 14
				continue;
334
			}
335 7
			if ($passthrough)
336
			{
337
				// Abort if there's more than 1 possible passthrough
338 1
				$passthrough = 0;
339 1
				break;
340
			}
341 7
			$passthrough = (int) $key;
342
		}
343
344 33
		return $passthrough;
345
	}
346
347
	/**
348
	* Parse a regexp and return its info
349
	*
350
	* @param  string $regexp
351
	* @return array
352
	*/
353 35
	protected function getRegexpInfo($regexp)
354
	{
355 35
		if (@preg_match_all($regexp, '') === false)
356
		{
357 1
			throw new InvalidArgumentException('Invalid regexp');
358
		}
359
360 34
		return RegexpParser::parse($regexp);
361
	}
362
363
	/**
364
	* Test whether given expression is a catch-all expression such as .*?
365
	*
366
	* @param  string $expr Subpattern
367
	* @return bool
368
	*/
369 22
	protected function isCatchAll($expr)
370
	{
371 22
		return (bool) preg_match('(^\\.[*+]\\??$)D', $expr);
372
	}
373
374
	/**
375
	* Parse given regexp and store its information
376
	*
377
	* @param  string  $regexp
378
	* @return void
379
	*/
380 35
	protected function parseRegexp($regexp)
381
	{
382 35
		$this->captures = [['name' => null, 'expr' => null]];
383 35
		$regexpInfo = $this->getRegexpInfo($regexp);
384 34
		$this->delimiter = $regexpInfo['delimiter'];
385 34
		$this->modifiers = str_replace('D', '', $regexpInfo['modifiers']);
386 34
		foreach ($regexpInfo['tokens'] as $token)
387
		{
388 33
			if ($token['type'] !== 'capturingSubpatternStart')
389
			{
390 33
				continue;
391
			}
392 32
			$this->captures[] = [
393 32
				'pos'    => $token['pos'],
394 32
				'name'   => $token['name'] ?? null,
395 32
				'expr'   => $token['content']
396
			];
397
		}
398
	}
399
400
	/**
401
	* Parse given template and store the references it contains
402
	*
403
	* @param  string $template
404
	* @return void
405
	*/
406 33
	protected function parseTemplate($template)
407
	{
408 33
		$this->references = [
409
			'anywhere' => [],
410
			'asUrl'    => [],
411
			'inText'   => []
412
		];
413
414 33
		preg_match_all($this->referencesRegexp, $template, $matches);
415 33
		foreach ($matches[0] as $match)
416
		{
417 22
			$key = trim($match, '\\${}');
418 22
			$this->references['anywhere'][$key] = $key;
419
		}
420
421 33
		$dom   = TemplateLoader::load($template);
422 33
		$xpath = new DOMXPath($dom);
423 33
		foreach ($xpath->query('//text()') as $node)
424
		{
425 22
			preg_match_all($this->referencesRegexp, $node->textContent, $matches);
426 22
			foreach ($matches[0] as $match)
427
			{
428 22
				$key = trim($match, '\\${}');
429 22
				$this->references['inText'][$key] = $key;
430
			}
431
		}
432
433 33
		foreach (NodeLocator::getURLNodes($dom) as $node)
434
		{
435
			// We only bother with literal attributes that start with a capture
436 4
			if ($node instanceof DOMAttr
437 4
			 && preg_match('(^(?:[$\\\\]\\d+|\\$\\{\\d+\\}))', trim($node->value), $m))
438
			{
439 3
				$key = trim($m[0], '\\${}');
440 3
				$this->references['asUrl'][$key] = $key;
441
			}
442
		}
443
444 33
		$this->removeUnknownReferences();
445
	}
446
447
	/**
448
	* Remove references that do not correspond to an existing capture
449
	*
450
	* @return void
451
	*/
452 33
	protected function removeUnknownReferences()
453
	{
454 33
		foreach ($this->references as &$references)
455
		{
456 33
			$references = array_intersect_key($references, $this->captures);
457
		}
458
	}
459
}