Completed
Push — master ( d91fed...fd66aa )
by Josh
17:36
created

Configurator::fixUnnamedCaptures()   B

Complexity

Conditions 7
Paths 8

Size

Total Lines 32

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 7

Importance

Changes 0
Metric Value
dl 0
loc 32
rs 8.4746
c 0
b 0
f 0
ccs 15
cts 15
cp 1
cc 7
nc 8
nop 1
crap 7
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2018 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Preg;
9
10
use DOMAttr;
11
use DOMText;
12
use DOMXPath;
13
use Exception;
14
use InvalidArgumentException;
15
use s9e\TextFormatter\Configurator\Helpers\RegexpParser;
16
use s9e\TextFormatter\Configurator\Helpers\TemplateHelper;
17
use s9e\TextFormatter\Configurator\Items\Regexp;
18
use s9e\TextFormatter\Configurator\Items\Tag;
19
use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor;
20
use s9e\TextFormatter\Configurator\Validators\TagName;
21
use s9e\TextFormatter\Plugins\ConfiguratorBase;
22
23
class Configurator extends ConfiguratorBase
24
{
25
	/**
26
	* @var array[] Captures from current regexp
27
	*/
28
	protected $captures;
29
30
	/**
31
	* @var array[] List of [tagName, regexp, passthroughIdx]
32
	*/
33
	protected $collection = [];
34
35
	/**
36
	* @var string Delimiter used in current regexp
37
	*/
38
	protected $delimiter;
39
40
	/**
41
	* @var string Non-D modifiers used in current regexp
42
	*/
43
	protected $modifiers;
44
45
	/**
46
	* @var array References used in current template
47
	*/
48
	protected $references;
49
50
	/**
51
	* @var string Regexp used to find references in the templates. We check that the reference is
52
	*             not preceded with an odd number of backslashes
53
	*/
54
	protected $referencesRegexp = '((?<!\\\\)(?:\\\\\\\\)*\\K(?:[$\\\\]\\d+|\\$\\{\\d+\\}))S';
55
56
	/**
57
	* {@inheritdoc}
58
	*/
59 6
	public function asConfig()
60
	{
61 6
		if (!count($this->collection))
62
		{
63 1
			return;
64
		}
65
66 5
		$pregs = [];
67 5
		foreach ($this->collection as list($tagName, $regexp, $passthroughIdx))
68
		{
69 5
			$captures = RegexpParser::getCaptureNames($regexp);
70 5
			$pregs[]  = [$tagName, new Regexp($regexp, true), $passthroughIdx, $captures];
71
		}
72
73 5
		return ['generics' => $pregs];
74
	}
75
76
	/**
77
	* {@inheritdoc}
78
	*/
79 2
	public function getJSHints()
80
	{
81 2
		$hasPassthrough = false;
82 2
		foreach ($this->collection as list($tagName, $regexp, $passthroughIdx))
83
		{
84 2
			if ($passthroughIdx)
85
			{
86 1
				$hasPassthrough = true;
87 2
				break;
88
			}
89
		}
90
91 2
		return ['PREG_HAS_PASSTHROUGH' => $hasPassthrough];
92
	}
93
94
	/**
95
	* Configure a pattern-based match
96
	*
97
	* @param  string $regexp   Regexp to be used by the parser
98
	* @param  string $tagName  Name of the tag that holds the matched text
99
	* @return void
100
	*/
101 1
	public function match($regexp, $tagName)
102
	{
103 1
		$tagName        = TagName::normalize($tagName);
104 1
		$passthroughIdx = 0;
105 1
		$this->parseRegexp($regexp);
106 1
		foreach ($this->captures as $i => $capture)
107
		{
108 1
			if (!$this->isCatchAll($capture['expr']))
109
			{
110 1
				continue;
111
			}
112 1
			$passthroughIdx = $i;
113
		}
114
115 1
		$this->collection[] = [$tagName, $regexp, $passthroughIdx];
116 1
	}
117
118
	/**
119
	* Configure a pattern-based replacement
120
	*
121
	* @param  string $regexp   Regexp to be used by the parser
122
	* @param  string $template Template to be used for rendering
123
	* @param  string $tagName  Name of the tag to create. A name based on the regexp is
124
	*                          automatically generated if none is provided
125
	* @return Tag              The tag created to represent this replacement
126
	*/
127 34
	public function replace($regexp, $template, $tagName = null)
128
	{
129 34
		if (!isset($tagName))
130
		{
131 31
			$tagName = 'PREG_' . strtoupper(dechex(crc32($regexp)));
132
		}
133 34
		$this->parseRegexp($regexp);
134 33
		$this->parseTemplate($template);
135
136 33
		$passthroughIdx = $this->getPassthroughCapture();
137 33
		if ($passthroughIdx)
138
		{
139 6
			$this->captures[$passthroughIdx]['passthrough'] = true;
140
		}
141
142 33
		$regexp   = $this->fixUnnamedCaptures($regexp);
143 33
		$template = $this->convertTemplate($template, $passthroughIdx);
144
145 33
		$this->collection[] = [$tagName, $regexp, $passthroughIdx];
146
147 33
		return $this->createTag($tagName, $template);
148
	}
149
150
	/**
151
	* Add given attribute to given tag based on parsed captures
152
	*
153
	* @param  Tag    $tag
154
	* @param  string $attrName
155
	* @return void
156
	*/
157 27
	protected function addAttribute(Tag $tag, $attrName)
158
	{
159 27
		$isUrl = false;
160 27
		$exprs = [];
161 27
		foreach ($this->captures as $key => $capture)
162
		{
163 27
			if ($capture['name'] !== $attrName)
164
			{
165 27
				continue;
166
			}
167 27
			$exprs[] = $capture['expr'];
168 27
			if (isset($this->references['asUrl'][$key]))
169
			{
170 27
				$isUrl = true;
171
			}
172
		}
173 27
		$exprs = array_unique($exprs);
174
175 27
		$regexp = $this->delimiter . '^';
176 27
		$regexp .= (count($exprs) === 1) ? $exprs[0] : '(?:' . implode('|', $exprs) . ')';
177 27
		$regexp .= '$' . $this->delimiter . 'D' . $this->modifiers;
178
179 27
		$attribute = $tag->attributes->add($attrName);
180
181 27
		$filter = $this->configurator->attributeFilters['#regexp'];
182 27
		$filter->setRegexp($regexp);
183 27
		$attribute->filterChain[] = $filter;
184
185 27
		if ($isUrl)
186
		{
187 3
			$filter = $this->configurator->attributeFilters['#url'];
188 3
			$attribute->filterChain[] = $filter;
189
		}
190 27
	}
191
192
	/**
193
	* Convert a preg-style replacement to a template
194
	*
195
	* @param  string  $template       Original template
196
	* @param  integer $passthroughIdx Index of the passthrough capture
197
	* @return string                  Modified template
198
	*/
199 33
	protected function convertTemplate($template, $passthroughIdx)
200
	{
201
		// Replace numeric references in the template with the value of the corresponding attribute
202
		// values or passthrough
203 33
		$template = TemplateHelper::replaceTokens(
204 33
			$template,
205 33
			$this->referencesRegexp,
206 33
			function ($m, $node) use ($passthroughIdx)
207
			{
208 22
				$key = (int) trim($m[0], '\\${}');
209 22
				if ($key === 0)
210
				{
211
					// $0 copies the whole textContent
212 1
					return ['expression', '.'];
213
				}
214 22
				if ($key === $passthroughIdx && $node instanceof DOMText)
215
				{
216
					// Passthrough capture, does not include start/end tags
217 6
					return ['passthrough'];
218
				}
219 18
				if (isset($this->captures[$key]['name']))
220
				{
221
					// Normal capture, replaced by the equivalent expression
222 17
					return ['expression', '@' . $this->captures[$key]['name']];
223
				}
224
225
				// Non-existent captures are simply ignored, similarly to preg_replace()
226 1
				return ['literal', ''];
227 33
			}
228
		);
229
230
		// Unescape backslashes and special characters in the template
231 33
		$template = TemplateHelper::replaceTokens(
232 33
			$template,
233 33
			'(\\\\+[0-9${\\\\])',
234 33
			function ($m)
235
			{
236 3
				return ['literal', stripslashes($m[0])];
237 33
			}
238
		);
239
240 33
		return $template;
241
	}
242
243
	/**
244
	* Create the tag that matches current regexp
245
	*
246
	* @param  string $tagName
247
	* @param  string $template
248
	* @return Tag
249
	*/
250 33
	protected function createTag($tagName, $template)
251
	{
252 33
		$tag = new Tag;
253 33
		foreach ($this->captures as $key => $capture)
254
		{
255 33
			if (!isset($capture['name']))
256
			{
257 33
				continue;
258
			}
259
260 27
			$attrName = $capture['name'];
261 27
			if (isset($tag->attributes[$attrName]))
262
			{
263 1
				continue;
264
			}
265
266 27
			$this->addAttribute($tag, $attrName);
267
		}
268 33
		$tag->template = $template;
269
270
		// Normalize the tag's template
271 33
		$this->configurator->templateNormalizer->normalizeTag($tag);
272
273
		// Check the safeness of this tag
274 33
		$this->configurator->templateChecker->checkTag($tag);
275
276 31
		return $this->configurator->tags->add($tagName, $tag);
277
	}
278
279
	/**
280
	* Give a name to unnamed captures that are referenced in current replacement
281
	*
282
	* @param  string $regexp Original regexp
283
	* @return string         Modified regexp
284
	*/
285 33
	protected function fixUnnamedCaptures($regexp)
286
	{
287 33
		$keys = [];
288 33
		foreach ($this->references['anywhere'] as $key)
289
		{
290 21
			$capture = $this->captures[$key];
291 21
			if (!$key || isset($capture['name']))
292
			{
293 2
				continue;
294
			}
295
			// Give the capture a name if it's used as URL or it's not a passthrough
296 20
			if (isset($this->references['asUrl'][$key]) || !isset($capture['passthrough']))
297
			{
298 20
				$keys[] = $key;
299
			}
300
		}
301
302
		// Alter the original regexp to inject the subpatterns' names. The position is equal to the
303
		// subpattern's position plus 2, to account for the delimiter at the start of the regexp and
304
		// the opening parenthesis of the subpattern. Also, we need to process them in reverse order
305
		// so that replacements don't change the position of subsequent subpatterns
306 33
		rsort($keys);
307 33
		foreach ($keys as $key)
308
		{
309 16
			$name   = '_' . $key;
310 16
			$pos    = $this->captures[$key]['pos'];
311 16
			$regexp = substr_replace($regexp, "?'" . $name . "'", 2 + $pos, 0);
312 16
			$this->captures[$key]['name'] = $name;
313
		}
314
315 33
		return $regexp;
316
	}
317
318
	/**
319
	* Get the index of the capture used for passthrough in current replacement
320
	*
321
	* @return integer
322
	*/
323 33
	protected function getPassthroughCapture()
324
	{
325 33
		$passthrough = 0;
326 33
		foreach ($this->references['inText'] as $key)
327
		{
328 21
			if (!$this->isCatchAll($this->captures[$key]['expr']))
329
			{
330
				// Ignore if it's not a catch-all expression such as .*?
331 14
				continue;
332
			}
333 7
			if ($passthrough)
334
			{
335
				// Abort if there's more than 1 possible passthrough
336 1
				$passthrough = 0;
337 1
				break;
338
			}
339 7
			$passthrough = (int) $key;
340
		}
341
342 33
		return $passthrough;
343
	}
344
345
	/**
346
	* Parse a regexp and return its info
347
	*
348
	* @param  string $regexp
349
	* @return array
350
	*/
351 35
	protected function getRegexpInfo($regexp)
352
	{
353 35
		if (@preg_match_all($regexp, '') === false)
354
		{
355 1
			throw new InvalidArgumentException('Invalid regexp');
356
		}
357
358 34
		return RegexpParser::parse($regexp);
359
	}
360
361
	/**
362
	* Test whether given expression is a catch-all expression such as .*?
363
	*
364
	* @param  string $expr Subpattern
365
	* @return bool
366
	*/
367 22
	protected function isCatchAll($expr)
368
	{
369 22
		return (bool) preg_match('(^\\.[*+]\\??$)D', $expr);
370
	}
371
372
	/**
373
	* Parse given regexp and store its information
374
	*
375
	* @param  string  $regexp
376
	* @return void
377
	*/
378 35
	protected function parseRegexp($regexp)
379
	{
380 35
		$this->captures = [['name' => null, 'expr' => null]];
381 35
		$regexpInfo = $this->getRegexpInfo($regexp);
382 34
		$this->delimiter = $regexpInfo['delimiter'];
383 34
		$this->modifiers = str_replace('D', '', $regexpInfo['modifiers']);
384 34
		foreach ($regexpInfo['tokens'] as $token)
385
		{
386 33
			if ($token['type'] !== 'capturingSubpatternStart')
387
			{
388 33
				continue;
389
			}
390 32
			$this->captures[] = [
391 32
				'pos'    => $token['pos'],
392 32
				'name'   => (isset($token['name'])) ? $token['name'] : null,
393 32
				'expr'   => $token['content']
394
			];
395
		}
396 34
	}
397
398
	/**
399
	* Parse given template and store the references it contains
400
	*
401
	* @param  string $template
402
	* @return void
403
	*/
404 33
	protected function parseTemplate($template)
405
	{
406 33
		$this->references = [
407
			'anywhere' => [],
408
			'asUrl'    => [],
409
			'inText'   => []
410
		];
411
412 33
		preg_match_all($this->referencesRegexp, $template, $matches);
413 33
		foreach ($matches[0] as $match)
414
		{
415 22
			$key = trim($match, '\\${}');
416 22
			$this->references['anywhere'][$key] = $key;
417
		}
418
419 33
		$dom   = TemplateHelper::loadTemplate($template);
420 33
		$xpath = new DOMXPath($dom);
421 33
		foreach ($xpath->query('//text()') as $node)
422
		{
423 22
			preg_match_all($this->referencesRegexp, $node->textContent, $matches);
424 22
			foreach ($matches[0] as $match)
425
			{
426 22
				$key = trim($match, '\\${}');
427 22
				$this->references['inText'][$key] = $key;
428
			}
429
		}
430
431 33
		foreach (TemplateHelper::getURLNodes($dom) as $node)
432
		{
433
			// We only bother with literal attributes that start with a capture
434 4
			if ($node instanceof DOMAttr
435 4
			 && preg_match('(^(?:[$\\\\]\\d+|\\$\\{\\d+\\}))', trim($node->value), $m))
436
			{
437 3
				$key = trim($m[0], '\\${}');
438 4
				$this->references['asUrl'][$key] = $key;
439
			}
440
		}
441
442 33
		$this->removeUnknownReferences();
443 33
	}
444
445
	/**
446
	* Remove references that do not correspond to an existing capture
447
	*
448
	* @return void
449
	*/
450 33
	protected function removeUnknownReferences()
451
	{
452 33
		foreach ($this->references as &$references)
453
		{
454 33
			$references = array_intersect_key($references, $this->captures);
455
		}
456
	}
457
}