TemplateInspector::analyseBranches()   A
last analyzed

Complexity

Conditions 2
Paths 2

Size

Total Lines 18
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 18
ccs 14
cts 14
cp 1
rs 9.8333
c 0
b 0
f 0
cc 2
nc 2
nop 0
crap 2
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) The s9e authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMElement;
11
use DOMXPath;
12
13
/**
14
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
15
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
16
* content model.
17
*
18
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
19
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
20
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
21
*
22
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
23
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
24
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
25
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
26
*
27
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
28
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
29
*/
30
class TemplateInspector
31
{
32
	/**
33
	* XSL namespace
34
	*/
35
	const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
36
37
	/**
38
	* @var string[] allowChild bitfield for each branch
39
	*/
40
	protected $allowChildBitfields = [];
41
42
	/**
43
	* @var bool Whether elements are allowed as children
44
	*/
45
	protected $allowsChildElements;
46
47
	/**
48
	* @var bool Whether text nodes are allowed as children
49
	*/
50
	protected $allowsText;
51
52
	/**
53
	* @var array[] Array of array of DOMElement instances
54
	*/
55
	protected $branches;
56
57
	/**
58
	* @var string OR-ed bitfield representing all of the categories used by this template
59
	*/
60
	protected $contentBitfield = "\0";
61
62
	/**
63
	* @var string Default bitfield used at the root of a branch
64
	*/
65
	protected $defaultBranchBitfield;
66
67
	/**
68
	* @var string denyDescendant bitfield
69
	*/
70
	protected $denyDescendantBitfield = "\0";
71
72
	/**
73
	* @var \DOMDocument Document containing the template
74
	*/
75
	protected $dom;
76
77
	/**
78
	* @var bool Whether this template contains any HTML elements
79
	*/
80
	protected $hasElements = false;
81
82
	/**
83
	* @var bool Whether this template renders non-whitespace text nodes at its root
84
	*/
85
	protected $hasRootText;
86
87
	/**
88
	* @var bool Whether this template should be considered a block-level element
89
	*/
90
	protected $isBlock = false;
91
92
	/**
93
	* @var bool Whether the template uses the "empty" content model
94
	*/
95
	protected $isEmpty;
96
97
	/**
98
	* @var bool Whether this template adds to the list of active formatting elements
99
	*/
100
	protected $isFormattingElement;
101
102
	/**
103
	* @var bool Whether this template lets content through via an xsl:apply-templates element
104
	*/
105
	protected $isPassthrough = false;
106
107
	/**
108
	* @var bool Whether all branches use the transparent content model
109
	*/
110
	protected $isTransparent = false;
111
112
	/**
113
	* @var bool Whether all branches have an ancestor that is a void element
114
	*/
115
	protected $isVoid;
116
117
	/**
118
	* @var array Last HTML element that precedes an <xsl:apply-templates/> node
119
	*/
120
	protected $leafNodes = [];
121
122
	/**
123
	* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
124
	*/
125
	protected $preservesNewLines = false;
126
127
	/**
128
	* @var array Bitfield of the first HTML element of every branch
129
	*/
130
	protected $rootBitfields = [];
131
132
	/**
133
	* @var array Every HTML element that has no HTML parent
134
	*/
135
	protected $rootNodes = [];
136
137
	/**
138
	* @var DOMXPath XPath engine associated with $this->dom
139
	*/
140
	protected $xpath;
141
142
	/**
143
	* Constructor
144
	*
145
	* @param string $template Template content
146
	*/
147 97
	public function __construct($template)
148
	{
149 97
		$this->dom   = TemplateLoader::load($template);
150 97
		$this->xpath = new DOMXPath($this->dom);
151
152 97
		$this->defaultBranchBitfield = ElementInspector::getAllowChildBitfield($this->dom->createElement('div'));
0 ignored issues
show
Bug introduced by
It seems like $this->dom->createElement('div') can also be of type false; however, parameter $element of s9e\TextFormatter\Config...getAllowChildBitfield() does only seem to accept DOMElement, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

152
		$this->defaultBranchBitfield = ElementInspector::getAllowChildBitfield(/** @scrutinizer ignore-type */ $this->dom->createElement('div'));
Loading history...
153
154 97
		$this->analyseRootNodes();
155 97
		$this->analyseBranches();
156 97
		$this->analyseContent();
157
	}
158
159
	/**
160
	* Return whether this template allows a given child
161
	*
162
	* @param  TemplateInspector $child
163
	* @return bool
164
	*/
165 30
	public function allowsChild(TemplateInspector $child)
166
	{
167
		// Sometimes, a template can technically be allowed as a child but denied as a descendant
168 30
		if (!$this->allowsDescendant($child))
169
		{
170 3
			return false;
171
		}
172
173 27
		foreach ($child->rootBitfields as $rootBitfield)
174
		{
175 22
			foreach ($this->allowChildBitfields as $allowChildBitfield)
176
			{
177 22
				if (!self::match($rootBitfield, $allowChildBitfield))
178
				{
179 11
					return false;
180
				}
181
			}
182
		}
183
184 16
		return ($this->allowsText || !$child->hasRootText);
185
	}
186
187
	/**
188
	* Return whether this template allows a given descendant
189
	*
190
	* @param  TemplateInspector $descendant
191
	* @return bool
192
	*/
193 33
	public function allowsDescendant(TemplateInspector $descendant)
194
	{
195
		// Test whether the descendant is explicitly disallowed
196 33
		if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
197
		{
198 3
			return false;
199
		}
200
201
		// Test whether the descendant contains any elements and we disallow elements
202 30
		return ($this->allowsChildElements || !$descendant->hasElements);
203
	}
204
205
	/**
206
	* Return whether this template allows elements as children
207
	*
208
	* @return bool
209
	*/
210 3
	public function allowsChildElements()
211
	{
212 3
		return $this->allowsChildElements;
213
	}
214
215
	/**
216
	* Return whether this template allows text nodes as children
217
	*
218
	* @return bool
219
	*/
220 4
	public function allowsText()
221
	{
222 4
		return $this->allowsText;
223
	}
224
225
	/**
226
	* Return whether this template automatically closes given parent template
227
	*
228
	* @param  TemplateInspector $parent
229
	* @return bool
230
	*/
231 10
	public function closesParent(TemplateInspector $parent)
232
	{
233
		// Test whether any of this template's root nodes closes any of given template's leaf nodes
234 10
		foreach ($this->rootNodes as $rootNode)
235
		{
236 10
			foreach ($parent->leafNodes as $leafNode)
237
			{
238 10
				if (ElementInspector::closesParent($rootNode, $leafNode))
239
				{
240 7
					return true;
241
				}
242
			}
243
		}
244
245 3
		return false;
246
	}
247
248
	/**
249
	* Evaluate an XPath expression
250
	*
251
	* @param  string     $expr XPath expression
252
	* @param  DOMElement $node Context node
253
	* @return mixed
254
	*/
255 97
	public function evaluate($expr, ?DOMElement $node = null)
256
	{
257 97
		return $this->xpath->evaluate($expr, $node);
258
	}
259
260
	/**
261
	* Return whether this template should be considered a block-level element
262
	*
263
	* @return bool
264
	*/
265 6
	public function isBlock()
266
	{
267 6
		return $this->isBlock;
268
	}
269
270
	/**
271
	* Return whether this template adds to the list of active formatting elements
272
	*
273
	* @return bool
274
	*/
275 14
	public function isFormattingElement()
276
	{
277 14
		return $this->isFormattingElement;
278
	}
279
280
	/**
281
	* Return whether this template uses the "empty" content model
282
	*
283
	* @return bool
284
	*/
285 6
	public function isEmpty()
286
	{
287 6
		return $this->isEmpty;
288
	}
289
290
	/**
291
	* Return whether this template lets content through via an xsl:apply-templates element
292
	*
293
	* @return bool
294
	*/
295 3
	public function isPassthrough()
296
	{
297 3
		return $this->isPassthrough;
298
	}
299
300
	/**
301
	* Return whether this template uses the "transparent" content model
302
	*
303
	* @return bool
304
	*/
305 5
	public function isTransparent()
306
	{
307 5
		return $this->isTransparent;
308
	}
309
310
	/**
311
	* Return whether all branches have an ancestor that is a void element
312
	*
313
	* @return bool
314
	*/
315 4
	public function isVoid()
316
	{
317 4
		return $this->isVoid;
318
	}
319
320
	/**
321
	* Return whether this template preserves the whitespace in its descendants
322
	*
323
	* @return bool
324
	*/
325 8
	public function preservesNewLines()
326
	{
327 8
		return $this->preservesNewLines;
328
	}
329
330
	/**
331
	* Analyses the content of the whole template and set $this->contentBitfield accordingly
332
	*/
333 97
	protected function analyseContent()
334
	{
335
		// Get all non-XSL elements
336 97
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
337 97
		foreach ($this->xpath->query($query) as $node)
338
		{
339 92
			$this->contentBitfield |= ElementInspector::getCategoryBitfield($node);
340 92
			$this->hasElements = true;
341
		}
342
343
		// Test whether this template is passthrough
344 97
		$this->isPassthrough = (bool) $this->evaluate('count(//xsl:apply-templates)');
345
	}
346
347
	/**
348
	* Records the HTML elements (and their bitfield) rendered at the root of the template
349
	*/
350 97
	protected function analyseRootNodes()
351
	{
352
		// Get every non-XSL element with no non-XSL ancestor. This should return us the first
353
		// HTML element of every branch
354 97
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]'
355 97
		       . '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
356 97
		foreach ($this->xpath->query($query) as $node)
357
		{
358
			// Store the root node of this branch
359 92
			$this->rootNodes[] = $node;
360
361
			// If any root node is a block-level element, we'll mark the template as such
362 92
			if ($this->elementIsBlock($node))
363
			{
364 49
				$this->isBlock = true;
365
			}
366
367 92
			$this->rootBitfields[] = ElementInspector::getCategoryBitfield($node);
368
		}
369
370
		// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
371
		// out: nodes with a non-XSL ancestor,
372 97
		$predicate = '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
373
374
		// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
375 97
		$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
376
377 97
		$query = '//text()[normalize-space() != ""]' . $predicate
378 97
		       . '|'
379 97
		       . '//xsl:text[normalize-space() != ""]' . $predicate
380 97
		       . '|'
381 97
		       . '//xsl:value-of' . $predicate;
382
383 97
		$this->hasRootText = (bool) $this->evaluate('count(' . $query . ')');
384
	}
385
386
	/**
387
	* Analyses each branch that leads to an <xsl:apply-templates/> tag
388
	*/
389 97
	protected function analyseBranches()
390
	{
391 97
		$this->branches = [];
392 97
		foreach ($this->xpath->query('//xsl:apply-templates') as $applyTemplates)
393
		{
394 89
			$query            = 'ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
395 89
			$this->branches[] = iterator_to_array($this->xpath->query($query, $applyTemplates));
396
		}
397
398 97
		$this->computeAllowsChildElements();
399 97
		$this->computeAllowsText();
400 97
		$this->computeBitfields();
401 97
		$this->computeFormattingElement();
402 97
		$this->computeIsEmpty();
403 97
		$this->computeIsTransparent();
404 97
		$this->computeIsVoid();
405 97
		$this->computePreservesNewLines();
406 97
		$this->storeLeafNodes();
407
	}
408
409
	/**
410
	* Test whether any branch of this template has an element that has given property
411
	*
412
	* @param  string $methodName
413
	* @return bool
414
	*/
415 97
	protected function anyBranchHasProperty($methodName)
416
	{
417 97
		foreach ($this->branches as $branch)
418
		{
419 89
			foreach ($branch as $element)
420
			{
421 85
				if (ElementInspector::$methodName($element))
422
				{
423 9
					return true;
424
				}
425
			}
426
		}
427
428 97
		return false;
429
	}
430
431
	/**
432
	* Compute the allowChildBitfields and denyDescendantBitfield properties
433
	*
434
	* @return void
435
	*/
436 97
	protected function computeBitfields()
437
	{
438 97
		if (empty($this->branches))
439
		{
440 90
			$this->allowChildBitfields = ["\0"];
441
442 90
			return;
443
		}
444 89
		foreach ($this->branches as $branch)
445
		{
446
			/**
447
			* @var string allowChild bitfield for current branch. Starts with the value associated
448
			*             with <div> in order to approximate a value if the whole branch uses the
449
			*             transparent content model
450
			*/
451 89
			$branchBitfield = $this->defaultBranchBitfield;
452
453 89
			foreach ($branch as $element)
454
			{
455 85
				if (!ElementInspector::isTransparent($element))
456
				{
457
					// If the element isn't transparent, we reset its bitfield
458 78
					$branchBitfield = "\0";
459
				}
460
461
				// allowChild rules are cumulative if transparent, and reset above otherwise
462 85
				$branchBitfield |= ElementInspector::getAllowChildBitfield($element);
463
464
				// denyDescendant rules are cumulative
465 85
				$this->denyDescendantBitfield |= ElementInspector::getDenyDescendantBitfield($element);
466
			}
467
468
			// Add this branch's bitfield to the list
469 89
			$this->allowChildBitfields[] = $branchBitfield;
470
		}
471
	}
472
473
	/**
474
	* Compute the allowsChildElements property
475
	*
476
	* A template allows child Elements if it has at least one xsl:apply-templates and none of its
477
	* ancestors have the text-only ("to") property
478
	*
479
	* @return void
480
	*/
481 97
	protected function computeAllowsChildElements()
482
	{
483 97
		$this->allowsChildElements = ($this->anyBranchHasProperty('isTextOnly')) ? false : !empty($this->branches);
484
	}
485
486
	/**
487
	* Compute the allowsText property
488
	*
489
	* A template is said to allow text if none of the leaf elements disallow text
490
	*
491
	* @return void
492
	*/
493 97
	protected function computeAllowsText()
494
	{
495 97
		foreach (array_filter($this->branches) as $branch)
496
		{
497 85
			if (ElementInspector::disallowsText(end($branch)))
498
			{
499 18
				$this->allowsText = false;
500
501 18
				return;
502
			}
503
		}
504 97
		$this->allowsText = true;
505
	}
506
507
	/**
508
	* Compute the isFormattingElement property
509
	*
510
	* A template is said to be a formatting element if all (non-zero) of its branches are entirely
511
	* composed of formatting elements
512
	*
513
	* @return void
514
	*/
515 97
	protected function computeFormattingElement()
516
	{
517 97
		foreach ($this->branches as $branch)
518
		{
519 89
			foreach ($branch as $element)
520
			{
521 85
				if (!ElementInspector::isFormattingElement($element) && !$this->isFormattingSpan($element))
522
				{
523 70
					$this->isFormattingElement = false;
524
525 70
					return;
526
				}
527
			}
528
		}
529 90
		$this->isFormattingElement = (bool) count(array_filter($this->branches));
530
	}
531
532
	/**
533
	* Compute the isEmpty property
534
	*
535
	* A template is said to be empty if it has no xsl:apply-templates elements or any there is a empty
536
	* element ancestor to an xsl:apply-templates element
537
	*
538
	* @return void
539
	*/
540 97
	protected function computeIsEmpty()
541
	{
542 97
		$this->isEmpty = ($this->anyBranchHasProperty('isEmpty')) || empty($this->branches);
543
	}
544
545
	/**
546
	* Compute the isTransparent property
547
	*
548
	* A template is said to be transparent if it has at least one branch and no non-transparent
549
	* elements in its path
550
	*
551
	* @return void
552
	*/
553 97
	protected function computeIsTransparent()
554
	{
555 97
		foreach ($this->branches as $branch)
556
		{
557 89
			foreach ($branch as $element)
558
			{
559 85
				if (!ElementInspector::isTransparent($element))
560
				{
561 78
					$this->isTransparent = false;
562
563 78
					return;
564
				}
565
			}
566
		}
567 90
		$this->isTransparent = !empty($this->branches);
568
	}
569
570
	/**
571
	* Compute the isVoid property
572
	*
573
	* A template is said to be void if it has no xsl:apply-templates elements or any there is a void
574
	* element ancestor to an xsl:apply-templates element
575
	*
576
	* @return void
577
	*/
578 97
	protected function computeIsVoid()
579
	{
580 97
		$this->isVoid = ($this->anyBranchHasProperty('isVoid')) || empty($this->branches);
581
	}
582
583
	/**
584
	* Compute the preservesNewLines property
585
	*
586
	* @return void
587
	*/
588 97
	protected function computePreservesNewLines()
589
	{
590 97
		foreach ($this->branches as $branch)
591
		{
592 89
			$style = '';
593 89
			foreach ($branch as $element)
594
			{
595 85
				$style .= $this->getStyle($element, true);
596
			}
597
598 89
			if (preg_match('(.*white-space\\s*:\\s*(no|pre))is', $style, $m) && strtolower($m[1]) === 'pre')
599
			{
600 6
				$this->preservesNewLines = true;
601
602 6
				return;
603
			}
604
		}
605 97
		$this->preservesNewLines = false;
606
	}
607
608
	/**
609
	* Test whether given element is a block-level element
610
	*
611
	* @param  DOMElement $element
612
	* @return bool
613
	*/
614 92
	protected function elementIsBlock(DOMElement $element)
615
	{
616 92
		$style = $this->getStyle($element);
617 92
		if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
618
		{
619 1
			return true;
620
		}
621 91
		if (preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style))
622
		{
623 3
			return false;
624
		}
625
626 88
		return ElementInspector::isBlock($element);
627
	}
628
629
	/**
630
	* Retrieve and return the inline style assigned to given element
631
	*
632
	* @param  DOMElement $node Context node
633
	* @param  bool       $deep Whether to retrieve the content of all xsl:attribute descendants
634
	* @return string
635
	*/
636 92
	protected function getStyle(DOMElement $node, $deep = false)
637
	{
638 92
		$style = '';
639 92
		if (ElementInspector::preservesWhitespace($node))
640
		{
641 4
			$style .= 'white-space:pre;';
642
		}
643 92
		$style .= $node->getAttribute('style');
644
645
		// Add the content of any descendant/child xsl:attribute named "style"
646 92
		$query = (($deep) ? './/' : './') . 'xsl:attribute[@name="style"]';
647 92
		foreach ($this->xpath->query($query, $node) as $attribute)
648
		{
649 2
			$style .= ';' . $attribute->textContent;
650
		}
651
652 92
		return $style;
653
	}
654
655
	/**
656
	* Test whether given node is a span element used for formatting
657
	*
658
	* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
659
	* and no other attributes
660
	*
661
	* @param  DOMElement $node
662
	* @return boolean
663
	*/
664 76
	protected function isFormattingSpan(DOMElement $node)
665
	{
666 76
		if ($node->nodeName !== 'span')
667
		{
668 54
			return false;
669
		}
670
671 24
		if ($node->getAttribute('class') === '' && $node->getAttribute('style') === '')
672
		{
673 17
			return false;
674
		}
675
676 7
		foreach ($node->attributes as $attrName => $attribute)
677
		{
678 7
			if ($attrName !== 'class' && $attrName !== 'style')
679
			{
680 1
				return false;
681
			}
682
		}
683
684 6
		return true;
685
	}
686
687
	/**
688
	* Store the names of every leaf node
689
	*
690
	* A leaf node is defined as the closest non-XSL ancestor to an xsl:apply-templates element
691
	*
692
	* @return void
693
	*/
694 97
	protected function storeLeafNodes()
695
	{
696 97
		foreach (array_filter($this->branches) as $branch)
697
		{
698 85
			$this->leafNodes[] = end($branch);
699
		}
700
	}
701
702
	/**
703
	* Test whether two bitfields have any bits in common
704
	*
705
	* @param  string $bitfield1
706
	* @param  string $bitfield2
707
	* @return bool
708
	*/
709 33
	protected static function match($bitfield1, $bitfield2)
710
	{
711 33
		return (trim($bitfield1 & $bitfield2, "\0") !== '');
0 ignored issues
show
Bug introduced by
Are you sure you want to use the bitwise & or did you mean &&?
Loading history...
712
	}
713
}