Completed
Push — master ( 68fe6d...993f5c )
by Josh
15:01
created

Configurator::getJSHints()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 14
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 14
ccs 8
cts 8
cp 1
rs 9.4286
cc 3
eloc 7
nc 3
nop 0
crap 3
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2015 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Plugins\Preg;
9
10
use DOMAttr;
11
use DOMText;
12
use DOMXPath;
13
use Exception;
14
use InvalidArgumentException;
15
use s9e\TextFormatter\Configurator\Helpers\RegexpParser;
16
use s9e\TextFormatter\Configurator\Helpers\TemplateHelper;
17
use s9e\TextFormatter\Configurator\Items\Regexp;
18
use s9e\TextFormatter\Configurator\Items\Tag;
19
use s9e\TextFormatter\Configurator\JavaScript\RegexpConvertor;
20
use s9e\TextFormatter\Plugins\ConfiguratorBase;
21
22
class Configurator extends ConfiguratorBase
23
{
24
	/**
25
	* @var array[] Captures from current regexp
26
	*/
27
	protected $captures;
28
29
	/**
30
	* @var array[] List of [tagName, regexp, passthroughIdx]
31
	*/
32
	protected $collection = [];
33
34
	/**
35
	* @var string Delimiter used in current regexp
36
	*/
37
	protected $delimiter;
38
39
	/**
40
	* @var string Non-D modifiers used in current regexp
41
	*/
42
	protected $modifiers;
43
44
	/**
45
	* @var array References used in current template
46
	*/
47
	protected $references;
48
49
	/**
50
	* @var string Regexp used to find references in the templates. We check that the reference is
51
	*             not preceded with an odd number of backslashes
52
	*/
53
	protected $referencesRegexp = '((?<!\\\\)(?:\\\\\\\\)*\\K(?:[$\\\\]\\d+|\\$\\{\\d+\\}))S';
54
55
	/**
56
	* {@inheritdoc}
57
	*/
58 6
	public function asConfig()
59
	{
60 6
		if (!count($this->collection))
61 6
		{
62 1
			return;
63
		}
64
65 5
		$pregs = [];
66 5
		foreach ($this->collection as list($tagName, $regexp, $passthroughIdx))
67
		{
68 5
			$captures = RegexpParser::getCaptureNames($regexp);
69 5
			$pregs[]  = [$tagName, new Regexp($regexp, true), $passthroughIdx, $captures];
70 5
		}
71
72 5
		return ['generics' => $pregs];
73
	}
74
75
	/**
76
	* {@inheritdoc}
77
	*/
78 2
	public function getJSHints()
79
	{
80 2
		$hasPassthrough = false;
81 2
		foreach ($this->collection as list($tagName, $regexp, $passthroughIdx))
82
		{
83
			if ($passthroughIdx)
84 2
			{
85 1
				$hasPassthrough = true;
86 1
				break;
87
			}
88 2
		}
89
90 2
		return ['PREG_HAS_PASSTHROUGH' => $hasPassthrough];
91
	}
92
93
	/**
94
	* Configure a pattern-based match
95
	*
96
	* @param  string $regexp   Regexp to be used by the parser
97
	* @param  string $tagName  Name of the tag that holds the matched text
98
	* @return void
99
	*/
100 1
	public function match($regexp, $tagName)
101
	{
102 1
		$passthrough = 0;
103 1
		$this->parseRegexp($regexp);
104 1
		foreach ($this->captures as $i => $capture)
105
		{
106 1
			if (!$this->isCatchAll($capture['expr']))
107 1
			{
108 1
				continue;
109
			}
110 1
			$passthrough = $i;
111 1
		}
112
113 1
		$this->collection[] = [$tagName, $regexp, $passthrough];
114 1
	}
115
116
	/**
117
	* Configure a pattern-based replacement
118
	*
119
	* @param  string $regexp   Regexp to be used by the parser
120
	* @param  string $template Template to be used for rendering
121
	* @param  string $tagName  Name of the tag to create. A name based on the regexp is
122
	*                          automatically generated if none is provided
123
	* @return Tag              The tag created to represent this replacement
124
	*/
125 32
	public function replace($regexp, $template, $tagName = null)
126
	{
127 32
		if (!isset($tagName))
128 32
		{
129 31
			$tagName = 'PREG_' . strtoupper(dechex(crc32($regexp)));
130 31
		}
131 32
		$this->parseRegexp($regexp);
132 31
		$this->parseTemplate($template);
133
134 31
		$passthrough = $this->getPassthroughCapture();
135
		if ($passthrough)
136 31
		{
137 6
			$this->captures[$passthrough]['passthrough'] = true;
138 6
		}
139
140 31
		$regexp   = $this->fixUnnamedCaptures($regexp);
141 31
		$template = $this->convertTemplate($template, $passthrough);
142
143 31
		$this->collection[] = [$tagName, $regexp, $passthrough];
144
145 31
		return $this->createTag($tagName, $template);
146
	}
147
148
	/**
149
	* Add given attribute to given tag based on parsed captures
150
	*
151
	* @param  Tag    $tag
152
	* @param  string $attrName
153
	* @return void
154
	*/
155 25
	protected function addAttribute(Tag $tag, $attrName)
156
	{
157 25
		$isUrl = false;
158 25
		$exprs = [];
159 25
		foreach ($this->captures as $key => $capture)
160
		{
161 25
			if ($capture['name'] !== $attrName)
162 25
			{
163 25
				continue;
164
			}
165 25
			$exprs[] = $capture['expr'];
166 25
			if (isset($this->references['asUrl'][$key]))
167 25
			{
168 3
				$isUrl = true;
169 3
			}
170 25
		}
171 25
		$exprs = array_unique($exprs);
172
173 25
		$regexp = $this->delimiter . '^';
174 25
		$regexp .= (count($exprs) === 1) ? $exprs[0] : '(?:' . implode('|', $exprs) . ')';
175 25
		$regexp .= '$' . $this->delimiter . 'D' . $this->modifiers;
176
177 25
		$attribute = $tag->attributes->add($attrName);
178
179 25
		$filter = $this->configurator->attributeFilters['#regexp'];
180 25
		$filter->setRegexp($regexp);
181 25
		$attribute->filterChain[] = $filter;
182
183
		if ($isUrl)
184 25
		{
185 3
			$filter = $this->configurator->attributeFilters['#url'];
186 3
			$attribute->filterChain[] = $filter;
187 3
		}
188 25
	}
189
190
	/**
191
	* Convert a preg-style replacement to a template
192
	*
193
	* @param  string  $template    Original template
194
	* @param  integer $passthrough Index of the passthrough capture
195
	* @return string               Modified template
196
	*/
197 31
	protected function convertTemplate($template, $passthrough)
198
	{
199
		// Replace numeric references in the template with the value of the corresponding attribute
200
		// values or passthrough
201 31
		$template = TemplateHelper::replaceTokens(
202 31
			$template,
203 31
			$this->referencesRegexp,
204
			function ($m, $node) use ($passthrough)
205
			{
206 22
				$key = (int) trim($m[0], '\\${}');
207 22
				if ($key === 0)
208 22
				{
209
					// $0 copies the whole textContent
210 1
					return ['expression', '.'];
211
				}
212 22
				if ($key === $passthrough && $node instanceof DOMText)
213 22
				{
214
					// Passthrough capture, does not include start/end tags
215 6
					return ['passthrough'];
216
				}
217 18
				if (isset($this->captures[$key]['name']))
218 18
				{
219
					// Normal capture, replaced by the equivalent expression
220 17
					return ['expression', '@' . $this->captures[$key]['name']];
221
				}
222
223
				// Non-existent captures are simply ignored, similarly to preg_replace()
224 1
				return ['literal', ''];
225
			}
226 31
		);
227
228
		// Unescape backslashes and special characters in the template
229 31
		$template = TemplateHelper::replaceTokens(
230 31
			$template,
231 31
			'(\\\\+[0-9${\\\\])',
232 3
			function ($m)
233
			{
234 3
				return ['literal', stripslashes($m[0])];
235
			}
236 31
		);
237
238 31
		return $template;
239
	}
240
241
	/**
242
	* Create the tag that matches current regexp
243
	*
244
	* @param  string $tagName
245
	* @param  string $template
246
	* @return Tag
247
	*/
248 31
	protected function createTag($tagName, $template)
249
	{
250 31
		$tag = new Tag;
251 31
		foreach ($this->captures as $key => $capture)
252
		{
253 31
			if (!isset($capture['name']))
254 31
			{
255 31
				continue;
256
			}
257
258 25
			$attrName = $capture['name'];
259 25
			if (isset($tag->attributes[$attrName]))
260 25
			{
261 1
				continue;
262
			}
263
264 25
			$this->addAttribute($tag, $attrName);
265 31
		}
266 31
		$tag->template = $template;
1 ignored issue
show
Documentation Bug introduced by
It seems like $template of type string is incompatible with the declared type object<s9e\TextFormatter...gurator\Items\Template> of property $template.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
267
268
		// Normalize the tag's template
269 31
		$this->configurator->templateNormalizer->normalizeTag($tag);
270
271
		// Check the safeness of this tag
272 31
		$this->configurator->templateChecker->checkTag($tag);
273
274 29
		return $this->configurator->tags->add($tagName, $tag);
275
	}
276
277
	/**
278
	* Give a name to unnamed captures that are referenced in current replacement
279
	*
280
	* @param  string $regexp Original regexp
281
	* @return string         Modified regexp
282
	*/
283 31
	protected function fixUnnamedCaptures($regexp)
284
	{
285 31
		$keys = [];
286 31
		foreach ($this->references['anywhere'] as $key)
287
		{
288 21
			$capture = $this->captures[$key];
289 21
			if (!$key || isset($capture['name']))
290 21
			{
291 2
				continue;
292
			}
293
			// Give the capture a name if it's used as URL or it's not a passthrough
294 20
			if (isset($this->references['asUrl'][$key]) || !isset($capture['passthrough']))
295 20
			{
296 16
				$keys[] = $key;
297 16
			}
298 31
		}
299
300
		// Alter the original regexp to inject the subpatterns' names. The position is equal to the
301
		// subpattern's position plus 2, to account for the delimiter at the start of the regexp and
302
		// the opening parenthesis of the subpattern. Also, we need to process them in reverse order
303
		// so that replacements don't change the position of subsequent subpatterns
304 31
		rsort($keys);
305 31
		foreach ($keys as $key)
306
		{
307 16
			$name   = '_' . $key;
308 16
			$pos    = $this->captures[$key]['pos'];
309 16
			$regexp = substr_replace($regexp, "?'" . $name . "'", 2 + $pos, 0);
310 16
			$this->captures[$key]['name'] = $name;
311 31
		}
312
313 31
		return $regexp;
314
	}
315
316
	/**
317
	* Get the index of the capture used for passthrough in current replacement
318
	*
319
	* @return integer
320
	*/
321 31
	protected function getPassthroughCapture()
322
	{
323 31
		$passthrough = 0;
324 31
		foreach ($this->references['inText'] as $key)
325
		{
326 21
			if (!$this->isCatchAll($this->captures[$key]['expr']))
327 21
			{
328
				// Ignore if it's not a catch-all expression such as .*?
329 14
				continue;
330
			}
331
			if ($passthrough)
332 7
			{
333
				// Abort if there's more than 1 possible passthrough
334 1
				$passthrough = 0;
335 1
				break;
336
			}
337 7
			$passthrough = (int) $key;
338 31
		}
339
340 31
		return $passthrough;
341
	}
342
343
	/**
344
	* Parse a regexp and return its info
345
	*
346
	* @param  string $regexp
347
	* @return array
348
	*/
349 33
	protected function getRegexpInfo($regexp)
350
	{
351 33
		$valid = false;
352
		try
353
		{
354 33
			$valid = @preg_match_all($regexp, '', $m);
355
		}
356 33
		catch (Exception $e)
357
		{
358
			// Nothing to do here
359
		}
360 33
		if ($valid === false)
361 33
		{
362 1
			throw new InvalidArgumentException('Invalid regexp');
363
		}
364
365 32
		return RegexpParser::parse($regexp);
366
	}
367
368
	/**
369
	* Test whether given expression is a catch-all expression such as .*?
370
	*
371
	* @param  string $expr Subpattern
372
	* @return bool
373
	*/
374 22
	protected function isCatchAll($expr)
375
	{
376 22
		return (bool) preg_match('(^\\.[*+]\\??$)D', $expr);
377
	}
378
379
	/**
380
	* Parse given regexp and store its information
381
	*
382
	* @param  string  $regexp
383
	* @return void
384
	*/
385 33
	protected function parseRegexp($regexp)
386
	{
387 33
		$this->captures = [['name' => null, 'expr' => null]];
388 33
		$regexpInfo = $this->getRegexpInfo($regexp);
389 32
		$this->delimiter = $regexpInfo['delimiter'];
390 32
		$this->modifiers = str_replace('D', '', $regexpInfo['modifiers']);
391 32
		foreach ($regexpInfo['tokens'] as $token)
392
		{
393 31
			if ($token['type'] !== 'capturingSubpatternStart')
394 31
			{
395 31
				continue;
396
			}
397 30
			$this->captures[] = [
398 30
				'pos'    => $token['pos'],
399 30
				'name'   => (isset($token['name'])) ? $token['name'] : null,
400 30
				'expr'   => $token['content']
401 30
			];
402 32
		}
403 32
	}
404
405
	/**
406
	* Parse given template and store the references it contains
407
	*
408
	* @param  string $template
409
	* @return void
410
	*/
411 31
	protected function parseTemplate($template)
412
	{
413 31
		$this->references = [
414 31
			'anywhere' => [],
415 31
			'asUrl'    => [],
416 31
			'inText'   => []
417 31
		];
418
419 31
		preg_match_all($this->referencesRegexp, $template, $matches);
420 31
		foreach ($matches[0] as $match)
421
		{
422 22
			$key = trim($match, '\\${}');
423 22
			$this->references['anywhere'][$key] = $key;
424 31
		}
425
426 31
		$dom   = TemplateHelper::loadTemplate($template);
427 31
		$xpath = new DOMXPath($dom);
428 31
		foreach ($xpath->query('//text()') as $node)
429
		{
430 22
			preg_match_all($this->referencesRegexp, $node->textContent, $matches);
431 22
			foreach ($matches[0] as $match)
432
			{
433 22
				$key = trim($match, '\\${}');
434 22
				$this->references['inText'][$key] = $key;
435 22
			}
436 31
		}
437
438 31
		foreach (TemplateHelper::getURLNodes($dom) as $node)
439
		{
440
			// We only bother with literal attributes that start with a capture
441
			if ($node instanceof DOMAttr
442 4
			 && preg_match('(^(?:[$\\\\]\\d+|\\$\\{\\d+\\}))', trim($node->value), $m))
443 4
			{
444 3
				$key = trim($m[0], '\\${}');
445 3
				$this->references['asUrl'][$key] = $key;
446 3
			}
447 31
		}
448
449 31
		$this->removeUnknownReferences();
450 31
	}
451
452
	/**
453
	* Remove references that do not correspond to an existing capture
454
	*
455
	* @return void
456
	*/
457 31
	protected function removeUnknownReferences()
458
	{
459 31
		foreach ($this->references as &$references)
460
		{
461 31
			$references = array_intersect_key($references, $this->captures);
462 31
		}
463
	}
464
}