Completed
Push — master ( 19d82a...a1b60a )
by Josh
20:14
created

TemplateInspector   D

Complexity

Total Complexity 82

Size/Duplication

Total Lines 930
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 0
Metric Value
wmc 82
lcom 1
cbo 1
dl 0
loc 930
rs 4.4444
c 0
b 0
f 0

26 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 9 1
C allowsChild() 0 26 7
A allowsDescendant() 0 16 4
A allowsChildElements() 0 4 1
A allowsText() 0 4 1
B closesParent() 0 22 5
A getDOM() 0 4 1
A isBlock() 0 4 1
A isFormattingElement() 0 4 1
A isEmpty() 0 4 1
A isIframe() 0 4 1
A isPassthrough() 0 4 1
A isTransparent() 0 4 1
A isVoid() 0 4 1
A preservesNewLines() 0 4 1
A analyseContent() 0 14 2
B analyseRootNodes() 0 47 5
F analyseBranches() 0 206 21
A elementIsBlock() 0 14 3
A evaluate() 0 4 1
A getStyle() 0 15 2
A getXSLElements() 0 4 1
C isFormattingSpan() 0 23 7
C getBitfield() 0 42 7
A hasProperty() 0 14 4
A match() 0 4 1

How to fix   Complexity   

Complex Class

Complex classes like TemplateInspector often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use TemplateInspector, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMDocument;
11
use DOMElement;
12
use DOMXPath;
13
14
/**
15
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
16
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
17
* content model.
18
*
19
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
20
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
21
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
22
*
23
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
24
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
25
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
26
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
27
*
28
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
29
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
30
* @see  /scripts/patchTemplateInspector.php
31
*/
32
class TemplateInspector
33
{
34
	/**
35
	* @var string[] allowChild bitfield for each branch
36
	*/
37
	protected $allowChildBitfields = [];
38
39
	/**
40
	* @var bool Whether elements are allowed as children
41
	*/
42
	protected $allowsChildElements = true;
43
44
	/**
45
	* @var bool Whether text nodes are allowed as children
46
	*/
47
	protected $allowsText = true;
48
49
	/**
50
	* @var string OR-ed bitfield representing all of the categories used by this template
51
	*/
52
	protected $contentBitfield = "\0";
53
54
	/**
55
	* @var string denyDescendant bitfield
56
	*/
57
	protected $denyDescendantBitfield = "\0";
58
59
	/**
60
	* @var DOMDocument Document containing the template
61
	*/
62
	protected $dom;
63
64
	/**
65
	* @var bool Whether this template contains any HTML elements
66
	*/
67
	protected $hasElements = false;
68
69
	/**
70
	* @var bool Whether this template renders non-whitespace text nodes at its root
71
	*/
72
	protected $hasRootText = false;
73
74
	/**
75
	* @var bool Whether this template should be considered a block-level element
76
	*/
77
	protected $isBlock = false;
78
79
	/**
80
	* @var bool Whether the template uses the "empty" content model
81
	*/
82
	protected $isEmpty = true;
83
84
	/**
85
	* @var bool Whether this template adds to the list of active formatting elements
86
	*/
87
	protected $isFormattingElement = false;
88
89
	/**
90
	* @var bool Whether this template lets content through via an xsl:apply-templates element
91
	*/
92
	protected $isPassthrough = false;
93
94
	/**
95
	* @var bool Whether all branches use the transparent content model
96
	*/
97
	protected $isTransparent = false;
98
99
	/**
100
	* @var bool Whether all branches have an ancestor that is a void element
101
	*/
102
	protected $isVoid = true;
103
104
	/**
105
	* @var array Names of every last HTML element that precedes an <xsl:apply-templates/> node
106
	*/
107
	protected $leafNodes = [];
108
109
	/**
110
	* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
111
	*/
112
	protected $preservesNewLines = false;
113
114
	/**
115
	* @var array Bitfield of the first HTML element of every branch
116
	*/
117
	protected $rootBitfields = [];
118
119
	/**
120
	* @var array Names of every HTML element that have no HTML parent
121
	*/
122
	protected $rootNodes = [];
123
124
	/**
125
	* @var DOMXPath XPath engine associated with $this->dom
126
	*/
127
	protected $xpath;
128
129
	/**
130
	* Constructor
131
	*
132
	* @param  string $template Template content
133
	*/
134
	public function __construct($template)
135
	{
136
		$this->dom   = TemplateHelper::loadTemplate($template);
137
		$this->xpath = new DOMXPath($this->dom);
138
139
		$this->analyseRootNodes();
140
		$this->analyseBranches();
141
		$this->analyseContent();
142
	}
143
144
	/**
145
	* Return whether this template allows a given child
146
	*
147
	* @param  TemplateInspector $child
148
	* @return bool
149
	*/
150
	public function allowsChild(TemplateInspector $child)
151
	{
152
		// Sometimes, a template can technically be allowed as a child but denied as a descendant
153
		if (!$this->allowsDescendant($child))
154
		{
155
			return false;
156
		}
157
158
		foreach ($child->rootBitfields as $rootBitfield)
159
		{
160
			foreach ($this->allowChildBitfields as $allowChildBitfield)
161
			{
162
				if (!self::match($rootBitfield, $allowChildBitfield))
163
				{
164
					return false;
165
				}
166
			}
167
		}
168
169
		if (!$this->allowsText && $child->hasRootText)
170
		{
171
			return false;
172
		}
173
174
		return true;
175
	}
176
177
	/**
178
	* Return whether this template allows a given descendant
179
	*
180
	* @param  TemplateInspector $descendant
181
	* @return bool
182
	*/
183
	public function allowsDescendant(TemplateInspector $descendant)
184
	{
185
		// Test whether the descendant is explicitly disallowed
186
		if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
187
		{
188
			return false;
189
		}
190
191
		// Test whether the descendant contains any elements and we disallow elements
192
		if (!$this->allowsChildElements && $descendant->hasElements)
193
		{
194
			return false;
195
		}
196
197
		return true;
198
	}
199
200
	/**
201
	* Return whether this template allows elements as children
202
	*
203
	* @return bool
204
	*/
205
	public function allowsChildElements()
206
	{
207
		return $this->allowsChildElements;
208
	}
209
210
	/**
211
	* Return whether this template allows text nodes as children
212
	*
213
	* @return bool
214
	*/
215
	public function allowsText()
216
	{
217
		return $this->allowsText;
218
	}
219
220
	/**
221
	* Return whether this template automatically closes given parent template
222
	*
223
	* @param  TemplateInspector $parent
224
	* @return bool
225
	*/
226
	public function closesParent(TemplateInspector $parent)
227
	{
228
		foreach ($this->rootNodes as $rootName)
229
		{
230
			if (empty(self::$htmlElements[$rootName]['cp']))
231
			{
232
				continue;
233
			}
234
235
			foreach ($parent->leafNodes as $leafName)
236
			{
237
				if (in_array($leafName, self::$htmlElements[$rootName]['cp'], true))
238
				{
239
					// If any of this template's root node closes one of the parent's leaf node, we
240
					// consider that this template closes the other one
241
					return true;
242
				}
243
			}
244
		}
245
246
		return false;
247
	}
248
249
	/**
250
	* Return the source template as a DOMDocument
251
	*
252
	* NOTE: the document should not be modified
253
	*
254
	* @return DOMDocument
255
	*/
256
	public function getDOM()
257
	{
258
		return $this->dom;
259
	}
260
261
	/**
262
	* Return whether this template should be considered a block-level element
263
	*
264
	* @return bool
265
	*/
266
	public function isBlock()
267
	{
268
		return $this->isBlock;
269
	}
270
271
	/**
272
	* Return whether this template adds to the list of active formatting elements
273
	*
274
	* @return bool
275
	*/
276
	public function isFormattingElement()
277
	{
278
		return $this->isFormattingElement;
279
	}
280
281
	/**
282
	* Return whether this template uses the "empty" content model
283
	*
284
	* @return bool
285
	*/
286
	public function isEmpty()
287
	{
288
		return $this->isEmpty;
289
	}
290
291
	/**
292
	* Return whether this template represents a single iframe
293
	*
294
	* @return bool
295
	*/
296
	public function isIframe()
297
	{
298
		return ($this->rootNodes === ['iframe']);
299
	}
300
301
	/**
302
	* Return whether this template lets content through via an xsl:apply-templates element
303
	*
304
	* @return bool
305
	*/
306
	public function isPassthrough()
307
	{
308
		return $this->isPassthrough;
309
	}
310
311
	/**
312
	* Return whether this template uses the "transparent" content model
313
	*
314
	* @return bool
315
	*/
316
	public function isTransparent()
317
	{
318
		return $this->isTransparent;
319
	}
320
321
	/**
322
	* Return whether all branches have an ancestor that is a void element
323
	*
324
	* @return bool
325
	*/
326
	public function isVoid()
327
	{
328
		return $this->isVoid;
329
	}
330
331
	/**
332
	* Return whether this template preserves the whitespace in its descendants
333
	*
334
	* @return bool
335
	*/
336
	public function preservesNewLines()
337
	{
338
		return $this->preservesNewLines;
339
	}
340
341
	/**
342
	* Analyses the content of the whole template and set $this->contentBitfield accordingly
343
	*/
344
	protected function analyseContent()
345
	{
346
		// Get all non-XSL elements
347
		$query = '//*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]';
348
349
		foreach ($this->xpath->query($query) as $node)
350
		{
351
			$this->contentBitfield |= $this->getBitfield($node->localName, 'c', $node);
352
			$this->hasElements = true;
353
		}
354
355
		// Test whether this template is passthrough
356
		$this->isPassthrough = (bool) $this->xpath->evaluate('count(//xsl:apply-templates)');
357
	}
358
359
	/**
360
	* Records the HTML elements (and their bitfield) rendered at the root of the template
361
	*/
362
	protected function analyseRootNodes()
363
	{
364
		// Get every non-XSL element with no non-XSL ancestor. This should return us the first
365
		// HTML element of every branch
366
		$query = '//*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]'
367
		       . '[not(ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"])]';
368
369
		foreach ($this->xpath->query($query) as $node)
370
		{
371
			$elName = $node->localName;
372
373
			// Save the actual name of the root node
374
			$this->rootNodes[] = $elName;
375
376
			if (!isset(self::$htmlElements[$elName]))
377
			{
378
				// Unknown elements are treated as if they were a <span> element
379
				$elName = 'span';
380
			}
381
382
			// If any root node is a block-level element, we'll mark the template as such
383
			if ($this->elementIsBlock($elName, $node))
384
			{
385
				$this->isBlock = true;
386
			}
387
388
			$this->rootBitfields[] = $this->getBitfield($elName, 'c', $node);
389
		}
390
391
		// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
392
		// out: nodes with a non-XSL ancestor,
393
		$predicate = '[not(ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"])]';
394
395
		// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
396
		$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
397
398
		$query = '//text()[normalize-space() != ""]' . $predicate
399
		       . '|'
400
		       . '//xsl:text[normalize-space() != ""]' . $predicate
401
		       . '|'
402
		       . '//xsl:value-of' . $predicate;
403
404
		if ($this->evaluate($query, $this->dom->documentElement))
405
		{
406
			$this->hasRootText = true;
407
		}
408
	}
409
410
	/**
411
	* Analyses each branch that leads to an <xsl:apply-templates/> tag
412
	*/
413
	protected function analyseBranches()
414
	{
415
		/**
416
		* @var array allowChild bitfield for each branch
417
		*/
418
		$branchBitfields = [];
419
420
		/**
421
		* @var bool Whether this template should be considered a formatting element
422
		*/
423
		$isFormattingElement = true;
424
425
		// Consider this template transparent unless we find out there are no branches or that one
426
		// of the branches is not transparent
427
		$this->isTransparent = true;
428
429
		// For each <xsl:apply-templates/> element...
430
		foreach ($this->getXSLElements('apply-templates') as $applyTemplates)
431
		{
432
			// ...we retrieve all non-XSL ancestors
433
			$nodes = $this->xpath->query(
434
				'ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]',
435
				$applyTemplates
436
			);
437
438
			/**
439
			* @var bool Whether this branch allows elements
440
			*/
441
			$allowsChildElements = true;
442
443
			/**
444
			* @var bool Whether this branch allows text nodes
445
			*/
446
			$allowsText = true;
447
448
			/**
449
			* @var string allowChild bitfield for current branch. Starts with the value associated
450
			*             with <div> in order to approximate a value if the whole branch uses the
451
			*             transparent content model
452
			*/
453
			$branchBitfield = self::$htmlElements['div']['ac'];
454
455
			/**
456
			* @var bool Whether this branch denies all non-text descendants
457
			*/
458
			$isEmpty = false;
459
460
			/**
461
			* @var bool Whether this branch contains a void element
462
			*/
463
			$isVoid = false;
464
465
			/**
466
			* @var string Name of the last node of this branch
467
			*/
468
			$leafNode = null;
469
470
			/**
471
			* @var boolean Whether this branch preserves new lines
472
			*/
473
			$preservesNewLines = false;
474
475
			foreach ($nodes as $node)
476
			{
477
				$elName = $leafNode = $node->localName;
478
479
				if (!isset(self::$htmlElements[$elName]))
480
				{
481
					// Unknown elements are treated as if they were a <span> element
482
					$elName = 'span';
483
				}
484
485
				// Test whether the element is void
486
				if ($this->hasProperty($elName, 'v', $node))
487
				{
488
					$isVoid = true;
489
				}
490
491
				// Test whether the element uses the "empty" content model
492
				if ($this->hasProperty($elName, 'e', $node))
493
				{
494
					$isEmpty = true;
495
				}
496
497
				if (!$this->hasProperty($elName, 't', $node))
498
				{
499
					// If the element isn't transparent, we reset its bitfield
500
					$branchBitfield = "\0";
501
502
					// Also, it means that the template itself isn't transparent
503
					$this->isTransparent = false;
504
				}
505
506
				// Test whether this element is a formatting element
507
				if (!$this->hasProperty($elName, 'fe', $node)
508
				 && !$this->isFormattingSpan($node))
509
				{
510
					$isFormattingElement = false;
511
				}
512
513
				// Test whether this branch allows elements
514
				$allowsChildElements = !$this->hasProperty($elName, 'to', $node);
515
516
				// Test whether this branch allows text nodes
517
				$allowsText = !$this->hasProperty($elName, 'nt', $node);
518
519
				// allowChild rules are cumulative if transparent, and reset above otherwise
520
				$branchBitfield |= $this->getBitfield($elName, 'ac', $node);
521
522
				// denyDescendant rules are cumulative
523
				$this->denyDescendantBitfield |= $this->getBitfield($elName, 'dd', $node);
524
525
				// Test whether this branch preserves whitespace by inspecting the current element
526
				// and the value of its style attribute. Technically, this block of code also tests
527
				// this element's descendants' style attributes but the result is the same as we
528
				// need to check every element of this branch in order
529
				$style = '';
530
531
				if ($this->hasProperty($elName, 'pre', $node))
532
				{
533
					$style .= 'white-space:pre;';
534
				}
535
536
				if ($node->hasAttribute('style'))
537
				{
538
					$style .= $node->getAttribute('style') . ';';
539
				}
540
541
				$attributes = $this->xpath->query('.//xsl:attribute[@name="style"]', $node);
542
				foreach ($attributes as $attribute)
543
				{
544
					$style .= $attribute->textContent;
545
				}
546
547
				preg_match_all(
548
					'/white-space\\s*:\\s*(no|pre)/i',
549
					strtolower($style),
550
					$matches
551
				);
552
				foreach ($matches[1] as $match)
553
				{
554
					// TRUE:  "pre", "pre-line" and "pre-wrap"
555
					// FALSE: "normal", "nowrap"
556
					$preservesNewLines = ($match === 'pre');
557
				}
558
			}
559
560
			// Add this branch's bitfield to the list
561
			$branchBitfields[] = $branchBitfield;
562
563
			// Save the name of the last node processed
564
			if (isset($leafNode))
565
			{
566
				$this->leafNodes[] = $leafNode;
567
			}
568
569
			// If any branch disallows elements, the template disallows elements
570
			if (!$allowsChildElements)
571
			{
572
				$this->allowsChildElements = false;
573
			}
574
575
			// If any branch disallows text, the template disallows text
576
			if (!$allowsText)
577
			{
578
				$this->allowsText = false;
579
			}
580
581
			// If any branch is not empty, the template is not empty
582
			if (!$isEmpty)
583
			{
584
				$this->isEmpty = false;
585
			}
586
587
			// If any branch is not void, the template is not void
588
			if (!$isVoid)
589
			{
590
				$this->isVoid = false;
591
			}
592
593
			// If any branch preserves new lines, the template preserves new lines
594
			if ($preservesNewLines)
595
			{
596
				$this->preservesNewLines = true;
597
			}
598
		}
599
600
		if (empty($branchBitfields))
601
		{
602
			// No branches => not transparent and no child elements
603
			$this->allowChildBitfields = ["\0"];
604
			$this->allowsChildElements = false;
605
			$this->isTransparent       = false;
606
		}
607
		else
608
		{
609
			$this->allowChildBitfields = $branchBitfields;
610
611
			// Set the isFormattingElement property to our final value, but only if this template
612
			// had any branches
613
			if (!empty($this->leafNodes))
614
			{
615
				$this->isFormattingElement = $isFormattingElement;
616
			}
617
		}
618
	}
619
620
	/**
621
	* Test whether given element is a block-level element
622
	*
623
	* @param  string     $elName Element name
624
	* @param  DOMElement $node   Context node
625
	* @return bool
626
	*/
627
	protected function elementIsBlock($elName, DOMElement $node)
628
	{
629
		$style = $this->getStyle($node);
630
		if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
631
		{
632
			return true;
633
		}
634
		if (preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style))
635
		{
636
			return false;
637
		}
638
639
		return $this->hasProperty($elName, 'b', $node);
640
	}
641
642
	/**
643
	* Evaluate a boolean XPath query
644
	*
645
	* @param  string     $query XPath query
646
	* @param  DOMElement $node  Context node
647
	* @return boolean
648
	*/
649
	protected function evaluate($query, DOMElement $node)
650
	{
651
		return $this->xpath->evaluate('boolean(' . $query . ')', $node);
652
	}
653
654
	/**
655
	* Retrieve and return the inline style assigned to given element
656
	*
657
	* @param  DOMElement $node Context node
658
	* @return string
659
	*/
660
	protected function getStyle(DOMElement $node)
661
	{
662
		// Start with the inline attribute
663
		$style = $node->getAttribute('style');
664
665
		// Add the content of any xsl:attribute named "style". This will miss optional attributes
666
		$xpath = new DOMXPath($node->ownerDocument);
667
		$query = 'xsl:attribute[@name="style"]';
668
		foreach ($xpath->query($query, $node) as $attribute)
669
		{
670
			$style .= ';' . $attribute->textContent;
671
		}
672
673
		return $style;
674
	}
675
676
	/**
677
	* Get all XSL elements of given name
678
	*
679
	* @param  string      $elName XSL element's name, e.g. "apply-templates"
680
	* @return \DOMNodeList
681
	*/
682
	protected function getXSLElements($elName)
683
	{
684
		return $this->dom->getElementsByTagNameNS('http://www.w3.org/1999/XSL/Transform', $elName);
685
	}
686
687
	/**
688
	* Test whether given node is a span element used for formatting
689
	*
690
	* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
691
	* and no other attributes
692
	*
693
	* @param  DOMElement $node
694
	* @return boolean
695
	*/
696
	protected function isFormattingSpan(DOMElement $node)
697
	{
698
		if ($node->nodeName !== 'span')
699
		{
700
			return false;
701
		}
702
703
		if ($node->getAttribute('class') === ''
704
		 && $node->getAttribute('style') === '')
705
		{
706
			return false;
707
		}
708
709
		foreach ($node->attributes as $attrName => $attribute)
710
		{
711
			if ($attrName !== 'class' && $attrName !== 'style')
712
			{
713
				return false;
714
			}
715
		}
716
717
		return true;
718
	}
719
720
	/**
721
	* "What is this?" you might ask. This is basically a compressed version of the HTML5 content
722
	* models and rules, with some liberties taken.
723
	*
724
	* For each element, up to three bitfields are defined: "c", "ac" and "dd". Bitfields are stored
725
	* as raw bytes, formatted using the octal notation to keep the sources ASCII.
726
	*
727
	*   "c" represents the categories the element belongs to. The categories are comprised of HTML5
728
	*   content models (such as "phrasing content" or "interactive content") plus a few special
729
	*   categories created to cover the parts of the specs that refer to "a group of X and Y
730
	*   elements" rather than a specific content model.
731
	*
732
	*   "ac" represents the categories that are allowed as children of given element.
733
	*
734
	*   "dd" represents the categories that must not appear as a descendant of given element.
735
	*
736
	* Sometimes, HTML5 specifies some restrictions on when an element can accept certain children,
737
	* or what categories the element belongs to. For example, an <img> element is only part of the
738
	* "interactive content" category if it has a "usemap" attribute. Those restrictions are
739
	* expressed as an XPath expression and stored using the concatenation of the key of the bitfield
740
	* plus the bit number of the category. For instance, if "interactive content" got assigned to
741
	* bit 2, the definition of the <img> element will contain a key "c2" with value "@usemap".
742
	*
743
	* Additionally, other flags are set:
744
	*
745
	*   "t" indicates that the element uses the "transparent" content model.
746
	*   "e" indicates that the element uses the "empty" content model.
747
	*   "v" indicates that the element is a void element.
748
	*   "nt" indicates that the element does not accept text nodes. (no text)
749
	*   "to" indicates that the element should only contain text. (text-only)
750
	*   "fe" indicates that the element is a formatting element. It will automatically be reopened
751
	*   when closed by an end tag of a different name.
752
	*   "b" indicates that the element is not phrasing content, which makes it likely to act like
753
	*   a block element.
754
	*
755
	* Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its
756
	* predecessor. Those are used to generate closeParent rules and are stored in the "cp" key.
757
	*
758
	* @var array
759
	* @see /scripts/patchTemplateInspector.php
760
	*/
761
	protected static $htmlElements = [
762
		'a'=>['c'=>"\17\0\0\0\0\1",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\0\1",'t'=>1,'fe'=>1],
763
		'abbr'=>['c'=>"\7",'ac'=>"\4"],
764
		'address'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\45",'b'=>1,'cp'=>['p']],
765
		'article'=>['c'=>"\3\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
766
		'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
767
		'audio'=>['c'=>"\57",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
768
		'b'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
769
		'base'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
770
		'bdi'=>['c'=>"\7",'ac'=>"\4"],
771
		'bdo'=>['c'=>"\7",'ac'=>"\4"],
772
		'blockquote'=>['c'=>"\203",'ac'=>"\1",'b'=>1,'cp'=>['p']],
773
		'body'=>['c'=>"\200\0\4",'ac'=>"\1",'b'=>1],
774
		'br'=>['c'=>"\5",'nt'=>1,'e'=>1,'v'=>1],
775
		'button'=>['c'=>"\117",'ac'=>"\4",'dd'=>"\10"],
776
		'canvas'=>['c'=>"\47",'ac'=>"\0",'t'=>1],
777
		'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\200",'b'=>1],
778
		'cite'=>['c'=>"\7",'ac'=>"\4"],
779
		'code'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
780
		'col'=>['c'=>"\0\0\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
781
		'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\20",'ac20'=>'not(@span)','nt'=>1,'e'=>1,'e0'=>'@span','b'=>1],
782
		'data'=>['c'=>"\7",'ac'=>"\4"],
783
		'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\10"],
784
		'dd'=>['c'=>"\0\0\200",'ac'=>"\1",'b'=>1,'cp'=>['dd','dt']],
785
		'del'=>['c'=>"\5",'ac'=>"\0",'t'=>1],
786
		'details'=>['c'=>"\213",'ac'=>"\1\0\0\2",'b'=>1,'cp'=>['p']],
787
		'dfn'=>['c'=>"\7\0\0\0\40",'ac'=>"\4",'dd'=>"\0\0\0\0\40"],
788
		'div'=>['c'=>"\3",'ac'=>"\1",'b'=>1,'cp'=>['p']],
789
		'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\200\200",'nt'=>1,'b'=>1,'cp'=>['p']],
790
		'dt'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['dd','dt']],
791
		'em'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
792
		'embed'=>['c'=>"\57",'nt'=>1,'e'=>1,'v'=>1],
793
		'fieldset'=>['c'=>"\303",'ac'=>"\1\0\0\20",'b'=>1,'cp'=>['p']],
794
		'figcaption'=>['c'=>"\0\0\0\0\0\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
795
		'figure'=>['c'=>"\203",'ac'=>"\1\0\0\0\0\4",'b'=>1,'cp'=>['p']],
796
		'footer'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
797
		'form'=>['c'=>"\3\0\0\0\20",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
798
		'h1'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
799
		'h2'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
800
		'h3'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
801
		'h4'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
802
		'h5'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
803
		'h6'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
804
		'head'=>['c'=>"\0\0\4",'ac'=>"\20",'nt'=>1,'b'=>1],
805
		'header'=>['c'=>"\3\40\0\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
806
		'hr'=>['c'=>"\1\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']],
807
		'html'=>['c'=>"\0",'ac'=>"\0\0\4",'nt'=>1,'b'=>1],
808
		'i'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
809
		'iframe'=>['c'=>"\57",'ac'=>"\4"],
810
		'img'=>['c'=>"\57\20\10",'c3'=>'@usemap','nt'=>1,'e'=>1,'v'=>1],
811
		'input'=>['c'=>"\17\20",'c3'=>'@type!="hidden"','c12'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','nt'=>1,'e'=>1,'v'=>1],
812
		'ins'=>['c'=>"\7",'ac'=>"\0",'t'=>1],
813
		'kbd'=>['c'=>"\7",'ac'=>"\4"],
814
		'keygen'=>['c'=>"\117",'nt'=>1,'e'=>1,'v'=>1],
815
		'label'=>['c'=>"\17\20\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"],
816
		'legend'=>['c'=>"\0\0\0\20",'ac'=>"\4",'b'=>1],
817
		'li'=>['c'=>"\0\0\0\0\200",'ac'=>"\1",'b'=>1,'cp'=>['li']],
818
		'link'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
819
		'main'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'b'=>1,'cp'=>['p']],
820
		'mark'=>['c'=>"\7",'ac'=>"\4"],
821
		'media element'=>['c'=>"\0\0\0\0\0\2",'nt'=>1,'b'=>1],
822
		'menu'=>['c'=>"\1\100",'ac'=>"\0\300",'nt'=>1,'b'=>1,'cp'=>['p']],
823
		'menuitem'=>['c'=>"\0\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
824
		'meta'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
825
		'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"],
826
		'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
827
		'noscript'=>['c'=>"\25",'nt'=>1],
828
		'object'=>['c'=>"\147",'ac'=>"\0\0\0\0\1",'t'=>1],
829
		'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'nt'=>1,'b'=>1,'cp'=>['p']],
830
		'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\10",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']],
831
		'option'=>['c'=>"\0\0\2\10",'b'=>1,'cp'=>['option']],
832
		'output'=>['c'=>"\107",'ac'=>"\4"],
833
		'p'=>['c'=>"\3",'ac'=>"\4",'b'=>1,'cp'=>['p']],
834
		'param'=>['c'=>"\0\0\0\0\1",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
835
		'picture'=>['c'=>"\45",'ac'=>"\0\200\10",'nt'=>1],
836
		'pre'=>['c'=>"\3",'ac'=>"\4",'pre'=>1,'b'=>1,'cp'=>['p']],
837
		'progress'=>['c'=>"\7\0\1\1",'ac'=>"\4",'dd'=>"\0\0\0\1"],
838
		'q'=>['c'=>"\7",'ac'=>"\4"],
839
		'rb'=>['c'=>"\0\10",'ac'=>"\4",'b'=>1],
840
		'rp'=>['c'=>"\0\10\100",'ac'=>"\4",'b'=>1,'cp'=>['rp','rt']],
841
		'rt'=>['c'=>"\0\10\100",'ac'=>"\4",'b'=>1,'cp'=>['rp','rt']],
842
		'rtc'=>['c'=>"\0\10",'ac'=>"\4\0\100",'b'=>1],
843
		'ruby'=>['c'=>"\7",'ac'=>"\4\10"],
844
		's'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
845
		'samp'=>['c'=>"\7",'ac'=>"\4"],
846
		'script'=>['c'=>"\25\200",'to'=>1],
847
		'section'=>['c'=>"\3\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
848
		'select'=>['c'=>"\117",'ac'=>"\0\200\2",'nt'=>1],
849
		'small'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
850
		'source'=>['c'=>"\0\0\10\4",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
851
		'span'=>['c'=>"\7",'ac'=>"\4"],
852
		'strong'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
853
		'style'=>['c'=>"\20",'to'=>1,'b'=>1],
854
		'sub'=>['c'=>"\7",'ac'=>"\4"],
855
		'summary'=>['c'=>"\0\0\0\2",'ac'=>"\4\1",'b'=>1],
856
		'sup'=>['c'=>"\7",'ac'=>"\4"],
857
		'table'=>['c'=>"\3\0\0\200",'ac'=>"\0\202",'nt'=>1,'b'=>1,'cp'=>['p']],
858
		'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1,'cp'=>['tbody','td','tfoot','th','thead','tr']],
859
		'td'=>['c'=>"\200\0\40",'ac'=>"\1",'b'=>1,'cp'=>['td','th']],
860
		'template'=>['c'=>"\25\200\20",'nt'=>1],
861
		'textarea'=>['c'=>"\117",'pre'=>1,'to'=>1],
862
		'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']],
863
		'th'=>['c'=>"\0\0\40",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['td','th']],
864
		'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1],
865
		'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime'],
866
		'title'=>['c'=>"\20",'to'=>1,'b'=>1],
867
		'tr'=>['c'=>"\0\2\0\0\100",'ac'=>"\0\200\40",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']],
868
		'track'=>['c'=>"\0\0\0\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
869
		'u'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
870
		'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'nt'=>1,'b'=>1,'cp'=>['p']],
871
		'var'=>['c'=>"\7",'ac'=>"\4"],
872
		'video'=>['c'=>"\57",'c3'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
873
		'wbr'=>['c'=>"\5",'nt'=>1,'e'=>1,'v'=>1]
874
	];
875
876
	/**
877
	* Get the bitfield value for a given element name in a given context
878
	*
879
	* @param  string     $elName Name of the HTML element
880
	* @param  string     $k      Bitfield name: either 'c', 'ac' or 'dd'
881
	* @param  DOMElement $node   Context node (not necessarily the same as $elName)
882
	* @return string
883
	*/
884
	protected function getBitfield($elName, $k, DOMElement $node)
885
	{
886
		if (!isset(self::$htmlElements[$elName][$k]))
887
		{
888
			return "\0";
889
		}
890
891
		$bitfield = self::$htmlElements[$elName][$k];
892
		foreach (str_split($bitfield, 1) as $byteNumber => $char)
893
		{
894
			$byteValue = ord($char);
895
			for ($bitNumber = 0; $bitNumber < 8; ++$bitNumber)
896
			{
897
				$bitValue = 1 << $bitNumber;
898
				if (!($byteValue & $bitValue))
899
				{
900
					// The bit is not set
901
					continue;
902
				}
903
904
				$n = $byteNumber * 8 + $bitNumber;
905
906
				// Test for an XPath condition for that category
907
				if (isset(self::$htmlElements[$elName][$k . $n]))
908
				{
909
					$xpath = 'boolean(' . self::$htmlElements[$elName][$k . $n] . ')';
910
911
					// If the XPath condition is not fulfilled...
912
					if (!$this->evaluate($xpath, $node))
913
					{
914
						// ...turn off the corresponding bit
915
						$byteValue ^= $bitValue;
916
917
						// Update the original bitfield
918
						$bitfield[$byteNumber] = chr($byteValue);
919
					}
920
				}
921
			}
922
		}
923
924
		return $bitfield;
925
	}
926
927
	/**
928
	* Test whether given element has given property in context
929
	*
930
	* @param  string     $elName   Element name
931
	* @param  string     $propName Property name, see self::$htmlElements
932
	* @param  DOMElement $node     Context node
933
	* @return bool
934
	*/
935
	protected function hasProperty($elName, $propName, DOMElement $node)
936
	{
937
		if (!empty(self::$htmlElements[$elName][$propName]))
938
		{
939
			// Test the XPath condition
940
			if (!isset(self::$htmlElements[$elName][$propName . '0'])
941
			 || $this->evaluate(self::$htmlElements[$elName][$propName . '0'], $node))
942
			{
943
				return true;
944
			}
945
		}
946
947
		return false;
948
	}
949
950
	/**
951
	* Test whether two bitfields have any bits in common
952
	*
953
	* @param  string $bitfield1
954
	* @param  string $bitfield2
955
	* @return bool
956
	*/
957
	protected static function match($bitfield1, $bitfield2)
958
	{
959
		return (trim($bitfield1 & $bitfield2, "\0") !== '');
960
	}
961
}