Completed
Push — master ( ee4b1b...d3fde3 )
by Josh
13:55
created

TemplateInspector::closesParent()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 15
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 15
rs 9.2
c 0
b 0
f 0
cc 4
eloc 6
nc 4
nop 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMElement;
11
use DOMXPath;
12
13
/**
14
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
15
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
16
* content model.
17
*
18
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
19
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
20
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
21
*
22
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
23
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
24
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
25
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
26
*
27
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
28
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
29
*/
30
class TemplateInspector
31
{
32
	/**
33
	* XSL namespace
34
	*/
35
	const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
36
37
	/**
38
	* @var string[] allowChild bitfield for each branch
39
	*/
40
	protected $allowChildBitfields = [];
41
42
	/**
43
	* @var bool Whether elements are allowed as children
44
	*/
45
	protected $allowsChildElements;
46
47
	/**
48
	* @var bool Whether text nodes are allowed as children
49
	*/
50
	protected $allowsText;
51
52
	/**
53
	* @var array[] Array of array of DOMElement instances
54
	*/
55
	protected $branches;
56
57
	/**
58
	* @var string OR-ed bitfield representing all of the categories used by this template
59
	*/
60
	protected $contentBitfield = "\0";
61
62
	/**
63
	* @var string Default bitfield used at the root of a branch
64
	*/
65
	protected $defaultBranchBitfield;
66
67
	/**
68
	* @var string denyDescendant bitfield
69
	*/
70
	protected $denyDescendantBitfield = "\0";
71
72
	/**
73
	* @var \DOMDocument Document containing the template
74
	*/
75
	protected $dom;
76
77
	/**
78
	* @var bool Whether this template contains any HTML elements
79
	*/
80
	protected $hasElements = false;
81
82
	/**
83
	* @var bool Whether this template renders non-whitespace text nodes at its root
84
	*/
85
	protected $hasRootText;
86
87
	/**
88
	* @var bool Whether this template should be considered a block-level element
89
	*/
90
	protected $isBlock = false;
91
92
	/**
93
	* @var bool Whether the template uses the "empty" content model
94
	*/
95
	protected $isEmpty;
96
97
	/**
98
	* @var bool Whether this template adds to the list of active formatting elements
99
	*/
100
	protected $isFormattingElement;
101
102
	/**
103
	* @var bool Whether this template lets content through via an xsl:apply-templates element
104
	*/
105
	protected $isPassthrough = false;
106
107
	/**
108
	* @var bool Whether all branches use the transparent content model
109
	*/
110
	protected $isTransparent = false;
111
112
	/**
113
	* @var bool Whether all branches have an ancestor that is a void element
114
	*/
115
	protected $isVoid;
116
117
	/**
118
	* @var array Last HTML element that precedes an <xsl:apply-templates/> node
119
	*/
120
	protected $leafNodes = [];
121
122
	/**
123
	* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
124
	*/
125
	protected $preservesNewLines = false;
126
127
	/**
128
	* @var array Bitfield of the first HTML element of every branch
129
	*/
130
	protected $rootBitfields = [];
131
132
	/**
133
	* @var array Every HTML element that has no HTML parent
134
	*/
135
	protected $rootNodes = [];
136
137
	/**
138
	* @var DOMXPath XPath engine associated with $this->dom
139
	*/
140
	protected $xpath;
141
142
	/**
143
	* Constructor
144
	*
145
	* @param string $template Template content
146
	*/
147
	public function __construct($template)
148
	{
149
		$this->dom   = TemplateHelper::loadTemplate($template);
150
		$this->xpath = new DOMXPath($this->dom);
151
152
		$this->defaultBranchBitfield = ElementInspector::getAllowChildBitfield($this->dom->createElement('div'));
153
154
		$this->analyseRootNodes();
155
		$this->analyseBranches();
156
		$this->analyseContent();
157
	}
158
159
	/**
160
	* Return whether this template allows a given child
161
	*
162
	* @param  TemplateInspector $child
163
	* @return bool
164
	*/
165
	public function allowsChild(TemplateInspector $child)
166
	{
167
		// Sometimes, a template can technically be allowed as a child but denied as a descendant
168
		if (!$this->allowsDescendant($child))
169
		{
170
			return false;
171
		}
172
173
		foreach ($child->rootBitfields as $rootBitfield)
174
		{
175
			foreach ($this->allowChildBitfields as $allowChildBitfield)
176
			{
177
				if (!self::match($rootBitfield, $allowChildBitfield))
178
				{
179
					return false;
180
				}
181
			}
182
		}
183
184
		return ($this->allowsText || !$child->hasRootText);
185
	}
186
187
	/**
188
	* Return whether this template allows a given descendant
189
	*
190
	* @param  TemplateInspector $descendant
191
	* @return bool
192
	*/
193
	public function allowsDescendant(TemplateInspector $descendant)
194
	{
195
		// Test whether the descendant is explicitly disallowed
196
		if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
197
		{
198
			return false;
199
		}
200
201
		// Test whether the descendant contains any elements and we disallow elements
202
		return ($this->allowsChildElements || !$descendant->hasElements);
203
	}
204
205
	/**
206
	* Return whether this template allows elements as children
207
	*
208
	* @return bool
209
	*/
210
	public function allowsChildElements()
211
	{
212
		return $this->allowsChildElements;
213
	}
214
215
	/**
216
	* Return whether this template allows text nodes as children
217
	*
218
	* @return bool
219
	*/
220
	public function allowsText()
221
	{
222
		return $this->allowsText;
223
	}
224
225
	/**
226
	* Return whether this template automatically closes given parent template
227
	*
228
	* @param  TemplateInspector $parent
229
	* @return bool
230
	*/
231
	public function closesParent(TemplateInspector $parent)
232
	{
233
		foreach ($this->rootNodes as $rootNode)
234
		{
235
			foreach ($parent->leafNodes as $leafNode)
236
			{
237
				if (ElementInspector::closesParent($leafNode, $rootNode))
238
				{
239
					return true;
240
				}
241
			}
242
		}
243
244
		return false;
245
	}
246
247
	/**
248
	* Evaluate an XPath expression
249
	*
250
	* @param  string     $expr XPath expression
251
	* @param  DOMElement $node Context node
252
	* @return mixed
253
	*/
254
	public function evaluate($expr, DOMElement $node = null)
255
	{
256
		return $this->xpath->evaluate($expr, $node);
257
	}
258
259
	/**
260
	* Return whether this template should be considered a block-level element
261
	*
262
	* @return bool
263
	*/
264
	public function isBlock()
265
	{
266
		return $this->isBlock;
267
	}
268
269
	/**
270
	* Return whether this template adds to the list of active formatting elements
271
	*
272
	* @return bool
273
	*/
274
	public function isFormattingElement()
275
	{
276
		return $this->isFormattingElement;
277
	}
278
279
	/**
280
	* Return whether this template uses the "empty" content model
281
	*
282
	* @return bool
283
	*/
284
	public function isEmpty()
285
	{
286
		return $this->isEmpty;
287
	}
288
289
	/**
290
	* Return whether this template lets content through via an xsl:apply-templates element
291
	*
292
	* @return bool
293
	*/
294
	public function isPassthrough()
295
	{
296
		return $this->isPassthrough;
297
	}
298
299
	/**
300
	* Return whether this template uses the "transparent" content model
301
	*
302
	* @return bool
303
	*/
304
	public function isTransparent()
305
	{
306
		return $this->isTransparent;
307
	}
308
309
	/**
310
	* Return whether all branches have an ancestor that is a void element
311
	*
312
	* @return bool
313
	*/
314
	public function isVoid()
315
	{
316
		return $this->isVoid;
317
	}
318
319
	/**
320
	* Return whether this template preserves the whitespace in its descendants
321
	*
322
	* @return bool
323
	*/
324
	public function preservesNewLines()
325
	{
326
		return $this->preservesNewLines;
327
	}
328
329
	/**
330
	* Analyses the content of the whole template and set $this->contentBitfield accordingly
331
	*/
332
	protected function analyseContent()
333
	{
334
		// Get all non-XSL elements
335
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
336
		foreach ($this->xpath->query($query) as $node)
337
		{
338
			$this->contentBitfield |= ElementInspector::getCategoryBitfield($node);
339
			$this->hasElements = true;
340
		}
341
342
		// Test whether this template is passthrough
343
		$this->isPassthrough = (bool) $this->evaluate('count(//xsl:apply-templates)');
344
	}
345
346
	/**
347
	* Records the HTML elements (and their bitfield) rendered at the root of the template
348
	*/
349
	protected function analyseRootNodes()
350
	{
351
		// Get every non-XSL element with no non-XSL ancestor. This should return us the first
352
		// HTML element of every branch
353
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]'
354
		       . '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
355
		foreach ($this->xpath->query($query) as $node)
356
		{
357
			// Store the root node of this branch
358
			$this->rootNodes[] = $node;
359
360
			// If any root node is a block-level element, we'll mark the template as such
361
			if ($this->elementIsBlock($node))
362
			{
363
				$this->isBlock = true;
364
			}
365
366
			$this->rootBitfields[] = ElementInspector::getCategoryBitfield($node);
367
		}
368
369
		// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
370
		// out: nodes with a non-XSL ancestor,
371
		$predicate = '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
372
373
		// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
374
		$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
375
376
		$query = '//text()[normalize-space() != ""]' . $predicate
377
		       . '|'
378
		       . '//xsl:text[normalize-space() != ""]' . $predicate
379
		       . '|'
380
		       . '//xsl:value-of' . $predicate;
381
382
		$this->hasRootText = (bool) $this->evaluate('count(' . $query . ')');
383
	}
384
385
	/**
386
	* Analyses each branch that leads to an <xsl:apply-templates/> tag
387
	*/
388
	protected function analyseBranches()
389
	{
390
		$this->branches = [];
391
		foreach ($this->xpath->query('//xsl:apply-templates') as $applyTemplates)
392
		{
393
			$query            = 'ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
394
			$this->branches[] = iterator_to_array($this->xpath->query($query, $applyTemplates));
395
		}
396
397
		$this->computeAllowsChildElements();
398
		$this->computeAllowsText();
399
		$this->computeBitfields();
400
		$this->computeFormattingElement();
401
		$this->computeIsEmpty();
402
		$this->computeIsTransparent();
403
		$this->computeIsVoid();
404
		$this->computePreservesNewLines();
405
		$this->storeLeafNodes();
406
	}
407
408
	/**
409
	* Test whether any branch of this template has an element that has given property
410
	*
411
	* @param  string $methodName
412
	* @return bool
413
	*/
414
	protected function anyBranchHasProperty($methodName)
415
	{
416
		foreach ($this->branches as $branch)
417
		{
418
			foreach ($branch as $element)
419
			{
420
				if (ElementInspector::$methodName($element))
421
				{
422
					return true;
423
				}
424
			}
425
		}
426
427
		return false;
428
	}
429
430
	/**
431
	* Compute the allowChildBitfields and denyDescendantBitfield properties
432
	*
433
	* @return void
434
	*/
435
	protected function computeBitfields()
436
	{
437
		if (empty($this->branches))
438
		{
439
			$this->allowChildBitfields = ["\0"];
440
441
			return;
442
		}
443
		foreach ($this->branches as $branch)
444
		{
445
			/**
446
			* @var string allowChild bitfield for current branch. Starts with the value associated
447
			*             with <div> in order to approximate a value if the whole branch uses the
448
			*             transparent content model
449
			*/
450
			$branchBitfield = $this->defaultBranchBitfield;
451
452
			foreach ($branch as $element)
453
			{
454
				if (!ElementInspector::isTransparent($element))
455
				{
456
					// If the element isn't transparent, we reset its bitfield
457
					$branchBitfield = "\0";
458
				}
459
460
				// allowChild rules are cumulative if transparent, and reset above otherwise
461
				$branchBitfield |= ElementInspector::getAllowChildBitfield($element);
462
463
				// denyDescendant rules are cumulative
464
				$this->denyDescendantBitfield |= ElementInspector::getDenyDescendantBitfield($element);
465
			}
466
467
			// Add this branch's bitfield to the list
468
			$this->allowChildBitfields[] = $branchBitfield;
469
		}
470
	}
471
472
	/**
473
	* Compute the allowsChildElements property
474
	*
475
	* A template allows child Elements if it has at least one xsl:apply-templates and none of its
476
	* ancestors have the text-only ("to") property
477
	*
478
	* @return void
479
	*/
480
	protected function computeAllowsChildElements()
481
	{
482
		$this->allowsChildElements = ($this->anyBranchHasProperty('isTextOnly')) ? false : !empty($this->branches);
483
	}
484
485
	/**
486
	* Compute the allowsText property
487
	*
488
	* A template is said to allow text if none of the leaf elements disallow text
489
	*
490
	* @return void
491
	*/
492
	protected function computeAllowsText()
493
	{
494
		foreach (array_filter($this->branches) as $branch)
495
		{
496
			if (ElementInspector::disallowsText(end($branch)))
497
			{
498
				$this->allowsText = false;
499
500
				return;
501
			}
502
		}
503
		$this->allowsText = true;
504
	}
505
506
	/**
507
	* Compute the isFormattingElement property
508
	*
509
	* A template is said to be a formatting element if all (non-zero) of its branches are entirely
510
	* composed of formatting elements
511
	*
512
	* @return void
513
	*/
514
	protected function computeFormattingElement()
515
	{
516
		foreach ($this->branches as $branch)
517
		{
518
			foreach ($branch as $element)
519
			{
520
				if (!ElementInspector::isFormattingElement($element) && !$this->isFormattingSpan($element))
521
				{
522
					$this->isFormattingElement = false;
523
524
					return;
525
				}
526
			}
527
		}
528
		$this->isFormattingElement = (bool) count(array_filter($this->branches));
529
	}
530
531
	/**
532
	* Compute the isEmpty property
533
	*
534
	* A template is said to be empty if it has no xsl:apply-templates elements or any there is a empty
535
	* element ancestor to an xsl:apply-templates element
536
	*
537
	* @return void
538
	*/
539
	protected function computeIsEmpty()
540
	{
541
		$this->isEmpty = ($this->anyBranchHasProperty('isEmpty')) || empty($this->branches);
542
	}
543
544
	/**
545
	* Compute the isTransparent property
546
	*
547
	* A template is said to be transparent if it has at least one branch and no non-transparent
548
	* elements in its path
549
	*
550
	* @return void
551
	*/
552
	protected function computeIsTransparent()
553
	{
554
		foreach ($this->branches as $branch)
555
		{
556
			foreach ($branch as $element)
557
			{
558
				if (!ElementInspector::isTransparent($element))
559
				{
560
					$this->isTransparent = false;
561
562
					return;
563
				}
564
			}
565
		}
566
		$this->isTransparent = !empty($this->branches);
567
	}
568
569
	/**
570
	* Compute the isVoid property
571
	*
572
	* A template is said to be void if it has no xsl:apply-templates elements or any there is a void
573
	* element ancestor to an xsl:apply-templates element
574
	*
575
	* @return void
576
	*/
577
	protected function computeIsVoid()
578
	{
579
		$this->isVoid = ($this->anyBranchHasProperty('isVoid')) || empty($this->branches);
580
	}
581
582
	/**
583
	* Compute the preservesNewLines property
584
	*
585
	* @return void
586
	*/
587
	protected function computePreservesNewLines()
588
	{
589
		foreach ($this->branches as $branch)
590
		{
591
			$style = '';
592
			foreach ($branch as $element)
593
			{
594
				$style .= $this->getStyle($element, true);
595
			}
596
597
			if (preg_match('(.*white-space\\s*:\\s*(no|pre))is', $style, $m) && strtolower($m[1]) === 'pre')
598
			{
599
				$this->preservesNewLines = true;
600
601
				return;
602
			}
603
		}
604
		$this->preservesNewLines = false;
605
	}
606
607
	/**
608
	* Test whether given element is a block-level element
609
	*
610
	* @param  DOMElement $element
611
	* @return bool
612
	*/
613
	protected function elementIsBlock(DOMElement $element)
614
	{
615
		$style = $this->getStyle($element);
616
		if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
617
		{
618
			return true;
619
		}
620
		if (preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style))
621
		{
622
			return false;
623
		}
624
625
		return ElementInspector::isBlock($element);
626
	}
627
628
	/**
629
	* Retrieve and return the inline style assigned to given element
630
	*
631
	* @param  DOMElement $node Context node
632
	* @param  bool       $deep Whether to retrieve the content of all xsl:attribute descendants
633
	* @return string
634
	*/
635
	protected function getStyle(DOMElement $node, $deep = false)
636
	{
637
		$style = '';
638
		if (ElementInspector::preservesWhitespace($node))
639
		{
640
			$style .= 'white-space:pre;';
641
		}
642
		$style .= $node->getAttribute('style');
643
644
		// Add the content of any descendant/child xsl:attribute named "style"
645
		$query = (($deep) ? './/' : './') . 'xsl:attribute[@name="style"]';
646
		foreach ($this->xpath->query($query, $node) as $attribute)
647
		{
648
			$style .= ';' . $attribute->textContent;
649
		}
650
651
		return $style;
652
	}
653
654
	/**
655
	* Test whether given node is a span element used for formatting
656
	*
657
	* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
658
	* and no other attributes
659
	*
660
	* @param  DOMElement $node
661
	* @return boolean
662
	*/
663
	protected function isFormattingSpan(DOMElement $node)
664
	{
665
		if ($node->nodeName !== 'span')
666
		{
667
			return false;
668
		}
669
670
		if ($node->getAttribute('class') === '' && $node->getAttribute('style') === '')
671
		{
672
			return false;
673
		}
674
675
		foreach ($node->attributes as $attrName => $attribute)
676
		{
677
			if ($attrName !== 'class' && $attrName !== 'style')
678
			{
679
				return false;
680
			}
681
		}
682
683
		return true;
684
	}
685
686
	/**
687
	* Store the names of every leaf node
688
	*
689
	* A leaf node is defined as the closest non-XSL ancestor to an xsl:apply-templates element
690
	*
691
	* @return void
692
	*/
693
	protected function storeLeafNodes()
694
	{
695
		foreach (array_filter($this->branches) as $branch)
696
		{
697
			$this->leafNodes[] = end($branch);
698
		}
699
	}
700
701
	/**
702
	* Test whether two bitfields have any bits in common
703
	*
704
	* @param  string $bitfield1
705
	* @param  string $bitfield2
706
	* @return bool
707
	*/
708
	protected static function match($bitfield1, $bitfield2)
709
	{
710
		return (trim($bitfield1 & $bitfield2, "\0") !== '');
711
	}
712
}