Completed
Push — master ( 7d71e7...f5f494 )
by Josh
20:31
created

TemplateInspector::getXSLElements()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2016 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMDocument;
11
use DOMElement;
12
use DOMXPath;
13
14
/**
15
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
16
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
17
* content model.
18
*
19
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
20
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
21
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
22
*
23
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
24
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
25
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
26
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
27
*
28
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
29
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
30
* @see  /scripts/patchTemplateInspector.php
31
*/
32
class TemplateInspector
33
{
34
	/**
35
	* @var string allowChild bitfield (all branches)
36
	*/
37
	protected $allowChildBitfield = "\0";
38
39
	/**
40
	* @var bool Whether elements are allowed as children
41
	*/
42
	protected $allowsChildElements = true;
43
44
	/**
45
	* @var bool Whether text nodes are allowed as children
46
	*/
47
	protected $allowsText = true;
48
49
	/**
50
	* @var string OR-ed bitfield representing all of the categories used by this template
51
	*/
52
	protected $contentBitfield = "\0";
53
54
	/**
55
	* @var string denyDescendant bitfield
56
	*/
57
	protected $denyDescendantBitfield = "\0";
58
59
	/**
60
	* @var DOMDocument Document containing the template
61
	*/
62
	protected $dom;
63
64
	/**
65
	* @var bool Whether this template contains any HTML elements
66
	*/
67
	protected $hasElements = false;
68
69
	/**
70
	* @var bool Whether this template renders non-whitespace text nodes at its root
71
	*/
72
	protected $hasRootText = false;
73
74
	/**
75
	* @var bool Whether this template should be considered a block-level element
76
	*/
77
	protected $isBlock = false;
78
79
	/**
80
	* @var bool Whether the template uses the "empty" content model
81
	*/
82
	protected $isEmpty = true;
83
84
	/**
85
	* @var bool Whether this template adds to the list of active formatting elements
86
	*/
87
	protected $isFormattingElement = false;
88
89
	/**
90
	* @var bool Whether this template lets content through via an xsl:apply-templates element
91
	*/
92
	protected $isPassthrough = false;
93
94
	/**
95
	* @var bool Whether all branches use the transparent content model
96
	*/
97
	protected $isTransparent = false;
98
99
	/**
100
	* @var bool Whether all branches have an ancestor that is a void element
101
	*/
102
	protected $isVoid = true;
103
104
	/**
105
	* @var array Names of every last HTML element that precedes an <xsl:apply-templates/> node
106
	*/
107
	protected $leafNodes = [];
108
109
	/**
110
	* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
111
	*/
112
	protected $preservesNewLines = false;
113
114
	/**
115
	* @var array Bitfield of the first HTML element of every branch
116
	*/
117
	protected $rootBitfields = [];
118
119
	/**
120
	* @var array Names of every HTML element that have no HTML parent
121
	*/
122
	protected $rootNodes = [];
123
124
	/**
125
	* @var DOMXPath XPath engine associated with $this->dom
126
	*/
127
	protected $xpath;
128
129
	/**
130
	* Constructor
131
	*
132
	* @param  string $template Template content
133
	*/
134
	public function __construct($template)
135
	{
136
		$this->dom   = TemplateHelper::loadTemplate($template);
137
		$this->xpath = new DOMXPath($this->dom);
138
139
		$this->analyseRootNodes();
140
		$this->analyseBranches();
141
		$this->analyseContent();
142
	}
143
144
	/**
145
	* Return whether this template allows a given child
146
	*
147
	* @param  self $child
148
	* @return bool
149
	*/
150
	public function allowsChild(self $child)
151
	{
152
		// Sometimes, a template can technically be allowed as a child but denied as a descendant
153
		if (!$this->allowsDescendant($child))
154
		{
155
			return false;
156
		}
157
158
		foreach ($child->rootBitfields as $rootBitfield)
159
		{
160
			if (!self::match($rootBitfield, $this->allowChildBitfield))
161
			{
162
				return false;
163
			}
164
		}
165
166
		if (!$this->allowsText && $child->hasRootText)
167
		{
168
			return false;
169
		}
170
171
		return true;
172
	}
173
174
	/**
175
	* Return whether this template allows a given descendant
176
	*
177
	* @param  self $descendant
178
	* @return bool
179
	*/
180
	public function allowsDescendant(self $descendant)
181
	{
182
		// Test whether the descendant is explicitly disallowed
183
		if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
184
		{
185
			return false;
186
		}
187
188
		// Test whether the descendant contains any elements and we disallow elements
189
		if (!$this->allowsChildElements && $descendant->hasElements)
190
		{
191
			return false;
192
		}
193
194
		return true;
195
	}
196
197
	/**
198
	* Return whether this template allows elements as children
199
	*
200
	* @return bool
201
	*/
202
	public function allowsChildElements()
203
	{
204
		return $this->allowsChildElements;
205
	}
206
207
	/**
208
	* Return whether this template allows text nodes as children
209
	*
210
	* @return bool
211
	*/
212
	public function allowsText()
213
	{
214
		return $this->allowsText;
215
	}
216
217
	/**
218
	* Return whether this template automatically closes given parent template
219
	*
220
	* @param  self $parent
221
	* @return bool
222
	*/
223
	public function closesParent(self $parent)
224
	{
225
		foreach ($this->rootNodes as $rootName)
226
		{
227
			if (empty(self::$htmlElements[$rootName]['cp']))
228
			{
229
				continue;
230
			}
231
232
			foreach ($parent->leafNodes as $leafName)
233
			{
234
				if (in_array($leafName, self::$htmlElements[$rootName]['cp'], true))
235
				{
236
					// If any of this template's root node closes one of the parent's leaf node, we
237
					// consider that this template closes the other one
238
					return true;
239
				}
240
			}
241
		}
242
243
		return false;
244
	}
245
246
	/**
247
	* Return the source template as a DOMDocument
248
	*
249
	* NOTE: the document should not be modified
250
	*
251
	* @return DOMDocument
252
	*/
253
	public function getDOM()
254
	{
255
		return $this->dom;
256
	}
257
258
	/**
259
	* Return whether this template should be considered a block-level element
260
	*
261
	* @return bool
262
	*/
263
	public function isBlock()
264
	{
265
		return $this->isBlock;
266
	}
267
268
	/**
269
	* Return whether this template adds to the list of active formatting elements
270
	*
271
	* @return bool
272
	*/
273
	public function isFormattingElement()
274
	{
275
		return $this->isFormattingElement;
276
	}
277
278
	/**
279
	* Return whether this template uses the "empty" content model
280
	*
281
	* @return bool
282
	*/
283
	public function isEmpty()
284
	{
285
		return $this->isEmpty;
286
	}
287
288
	/**
289
	* Return whether this template lets content through via an xsl:apply-templates element
290
	*
291
	* @return bool
292
	*/
293
	public function isPassthrough()
294
	{
295
		return $this->isPassthrough;
296
	}
297
298
	/**
299
	* Return whether this template uses the "transparent" content model
300
	*
301
	* @return bool
302
	*/
303
	public function isTransparent()
304
	{
305
		return $this->isTransparent;
306
	}
307
308
	/**
309
	* Return whether all branches have an ancestor that is a void element
310
	*
311
	* @return bool
312
	*/
313
	public function isVoid()
314
	{
315
		return $this->isVoid;
316
	}
317
318
	/**
319
	* Return whether this template preserves the whitespace in its descendants
320
	*
321
	* @return bool
322
	*/
323
	public function preservesNewLines()
324
	{
325
		return $this->preservesNewLines;
326
	}
327
328
	/**
329
	* Analyses the content of the whole template and set $this->contentBitfield accordingly
330
	*/
331
	protected function analyseContent()
332
	{
333
		// Get all non-XSL elements
334
		$query = '//*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]';
335
336
		foreach ($this->xpath->query($query) as $node)
337
		{
338
			$this->contentBitfield |= $this->getBitfield($node->localName, 'c', $node);
339
			$this->hasElements = true;
340
		}
341
342
		// Test whether this template is passthrough
343
		$this->isPassthrough = (bool) $this->xpath->evaluate('count(//xsl:apply-templates)');
344
	}
345
346
	/**
347
	* Records the HTML elements (and their bitfield) rendered at the root of the template
348
	*/
349
	protected function analyseRootNodes()
350
	{
351
		// Get every non-XSL element with no non-XSL ancestor. This should return us the first
352
		// HTML element of every branch
353
		$query = '//*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]'
354
		       . '[not(ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"])]';
355
356
		foreach ($this->xpath->query($query) as $node)
357
		{
358
			$elName = $node->localName;
359
360
			// Save the actual name of the root node
361
			$this->rootNodes[] = $elName;
362
363
			if (!isset(self::$htmlElements[$elName]))
364
			{
365
				// Unknown elements are treated as if they were a <span> element
366
				$elName = 'span';
367
			}
368
369
			// If any root node is a block-level element, we'll mark the template as such
370
			if ($this->elementIsBlock($elName, $node))
371
			{
372
				$this->isBlock = true;
373
			}
374
375
			$this->rootBitfields[] = $this->getBitfield($elName, 'c', $node);
376
		}
377
378
		// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
379
		// out: nodes with a non-XSL ancestor,
380
		$predicate = '[not(ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"])]';
381
382
		// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
383
		$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
384
385
		$query = '//text()[normalize-space() != ""]' . $predicate
386
		       . '|'
387
		       . '//xsl:text[normalize-space() != ""]' . $predicate
388
		       . '|'
389
		       . '//xsl:value-of' . $predicate;
390
391
		if ($this->evaluate($query, $this->dom->documentElement))
392
		{
393
			$this->hasRootText = true;
394
		}
395
	}
396
397
	/**
398
	* Analyses each branch that leads to an <xsl:apply-templates/> tag
399
	*/
400
	protected function analyseBranches()
401
	{
402
		/**
403
		* @var array allowChild bitfield for each branch
404
		*/
405
		$branchBitfields = [];
406
407
		/**
408
		* @var bool Whether this template should be considered a formatting element
409
		*/
410
		$isFormattingElement = true;
411
412
		// Consider this template transparent unless we find out there are no branches or that one
413
		// of the branches is not transparent
414
		$this->isTransparent = true;
415
416
		// For each <xsl:apply-templates/> element...
417
		foreach ($this->getXSLElements('apply-templates') as $applyTemplates)
418
		{
419
			// ...we retrieve all non-XSL ancestors
420
			$nodes = $this->xpath->query(
421
				'ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]',
422
				$applyTemplates
423
			);
424
425
			/**
426
			* @var bool Whether this branch allows elements
427
			*/
428
			$allowsChildElements = true;
429
430
			/**
431
			* @var bool Whether this branch allows text nodes
432
			*/
433
			$allowsText = true;
434
435
			/**
436
			* @var string allowChild bitfield for current branch. Starts with the value associated
437
			*             with <div> in order to approximate a value if the whole branch uses the
438
			*             transparent content model
439
			*/
440
			$branchBitfield = self::$htmlElements['div']['ac'];
441
442
			/**
443
			* @var bool Whether this branch denies all non-text descendants
444
			*/
445
			$isEmpty = false;
446
447
			/**
448
			* @var bool Whether this branch contains a void element
449
			*/
450
			$isVoid = false;
451
452
			/**
453
			* @var string Name of the last node of this branch
454
			*/
455
			$leafNode = null;
456
457
			/**
458
			* @var boolean Whether this branch preserves new lines
459
			*/
460
			$preservesNewLines = false;
461
462
			foreach ($nodes as $node)
463
			{
464
				$elName = $leafNode = $node->localName;
465
466
				if (!isset(self::$htmlElements[$elName]))
467
				{
468
					// Unknown elements are treated as if they were a <span> element
469
					$elName = 'span';
470
				}
471
472
				// Test whether the element is void
473
				if ($this->hasProperty($elName, 'v', $node))
474
				{
475
					$isVoid = true;
476
				}
477
478
				// Test whether the element uses the "empty" content model
479
				if ($this->hasProperty($elName, 'e', $node))
480
				{
481
					$isEmpty = true;
482
				}
483
484
				if (!$this->hasProperty($elName, 't', $node))
485
				{
486
					// If the element isn't transparent, we reset its bitfield
487
					$branchBitfield = "\0";
488
489
					// Also, it means that the template itself isn't transparent
490
					$this->isTransparent = false;
491
				}
492
493
				// Test whether this element is a formatting element
494
				if (!$this->hasProperty($elName, 'fe', $node)
495
				 && !$this->isFormattingSpan($node))
496
				{
497
					$isFormattingElement = false;
498
				}
499
500
				// Test whether this branch allows elements
501
				$allowsChildElements = !$this->hasProperty($elName, 'to', $node);
502
503
				// Test whether this branch allows text nodes
504
				$allowsText = !$this->hasProperty($elName, 'nt', $node);
505
506
				// allowChild rules are cumulative if transparent, and reset above otherwise
507
				$branchBitfield |= $this->getBitfield($elName, 'ac', $node);
508
509
				// denyDescendant rules are cumulative
510
				$this->denyDescendantBitfield |= $this->getBitfield($elName, 'dd', $node);
511
512
				// Test whether this branch preserves whitespace by inspecting the current element
513
				// and the value of its style attribute. Technically, this block of code also tests
514
				// this element's descendants' style attributes but the result is the same as we
515
				// need to check every element of this branch in order
516
				$style = '';
517
518
				if ($this->hasProperty($elName, 'pre', $node))
519
				{
520
					$style .= 'white-space:pre;';
521
				}
522
523
				if ($node->hasAttribute('style'))
524
				{
525
					$style .= $node->getAttribute('style') . ';';
526
				}
527
528
				$attributes = $this->xpath->query('.//xsl:attribute[@name="style"]', $node);
529
				foreach ($attributes as $attribute)
530
				{
531
					$style .= $attribute->textContent;
532
				}
533
534
				preg_match_all(
535
					'/white-space\\s*:\\s*(no|pre)/i',
536
					strtolower($style),
537
					$matches
538
				);
539
				foreach ($matches[1] as $match)
540
				{
541
					// TRUE:  "pre", "pre-line" and "pre-wrap"
542
					// FALSE: "normal", "nowrap"
543
					$preservesNewLines = ($match === 'pre');
544
				}
545
			}
546
547
			// Add this branch's bitfield to the list
548
			$branchBitfields[] = $branchBitfield;
549
550
			// Save the name of the last node processed
551
			if (isset($leafNode))
552
			{
553
				$this->leafNodes[] = $leafNode;
554
			}
555
556
			// If any branch disallows elements, the template disallows elements
557
			if (!$allowsChildElements)
558
			{
559
				$this->allowsChildElements = false;
560
			}
561
562
			// If any branch disallows text, the template disallows text
563
			if (!$allowsText)
564
			{
565
				$this->allowsText = false;
566
			}
567
568
			// If any branch is not empty, the template is not empty
569
			if (!$isEmpty)
570
			{
571
				$this->isEmpty = false;
572
			}
573
574
			// If any branch is not void, the template is not void
575
			if (!$isVoid)
576
			{
577
				$this->isVoid = false;
578
			}
579
580
			// If any branch preserves new lines, the template preserves new lines
581
			if ($preservesNewLines)
582
			{
583
				$this->preservesNewLines = true;
584
			}
585
		}
586
587
		if (empty($branchBitfields))
588
		{
589
			// No branches => not transparent and no child elements
590
			$this->allowsChildElements = false;
591
			$this->isTransparent       = false;
592
		}
593
		else
594
		{
595
			// Take the bitfield of each branch and reduce them to a single ANDed bitfield
596
			$this->allowChildBitfield = $branchBitfields[0];
597
			foreach ($branchBitfields as $branchBitfield)
598
			{
599
				$this->allowChildBitfield &= $branchBitfield;
600
			}
601
602
			// Set the isFormattingElement property to our final value, but only if this template
603
			// had any branches
604
			if (!empty($this->leafNodes))
605
			{
606
				$this->isFormattingElement = $isFormattingElement;
607
			}
608
		}
609
	}
610
611
	/**
612
	* Test whether given element is a block-level element
613
	*
614
	* @param  string     $elName Element name
615
	* @param  DOMElement $node   Context node
616
	* @return bool
617
	*/
618
	protected function elementIsBlock($elName, DOMElement $node)
619
	{
620
		$style = $this->getStyle($node);
621
		if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
622
		{
623
			return true;
624
		}
625
		if (preg_match('(\\bdisplay\\s*:\\s*inline)i', $style))
626
		{
627
			return false;
628
		}
629
630
		return $this->hasProperty($elName, 'b', $node);
631
	}
632
633
	/**
634
	* Evaluate a boolean XPath query
635
	*
636
	* @param  string     $query XPath query
637
	* @param  DOMElement $node  Context node
638
	* @return boolean
639
	*/
640
	protected function evaluate($query, DOMElement $node)
641
	{
642
		return $this->xpath->evaluate('boolean(' . $query . ')', $node);
643
	}
644
645
	/**
646
	* Retrieve and return the inline style assigned to given element
647
	*
648
	* @param  DOMElement $node Context node
649
	* @return string
650
	*/
651
	protected function getStyle(DOMElement $node)
652
	{
653
		// Start with the inline attribute
654
		$style = $node->getAttribute('style');
655
656
		// Add the content of any xsl:attribute named "style". This will miss optional attributes
657
		$xpath = new DOMXPath($node->ownerDocument);
658
		$query = 'xsl:attribute[@name="style"]';
659
		foreach ($xpath->query($query, $node) as $attribute)
660
		{
661
			$style .= ';' . $attribute->textContent;
662
		}
663
664
		return $style;
665
	}
666
667
	/**
668
	* Get all XSL elements of given name
669
	*
670
	* @param  string      $elName XSL element's name, e.g. "apply-templates"
671
	* @return \DOMNodeList
672
	*/
673
	protected function getXSLElements($elName)
674
	{
675
		return $this->dom->getElementsByTagNameNS('http://www.w3.org/1999/XSL/Transform', $elName);
676
	}
677
678
	/**
679
	* Test whether given node is a span element used for formatting
680
	*
681
	* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
682
	* and no other attributes
683
	*
684
	* @param  DOMElement $node
685
	* @return boolean
686
	*/
687
	protected function isFormattingSpan(DOMElement $node)
688
	{
689
		if ($node->nodeName !== 'span')
690
		{
691
			return false;
692
		}
693
694
		if ($node->getAttribute('class') === ''
695
		 && $node->getAttribute('style') === '')
696
		{
697
			return false;
698
		}
699
700
		foreach ($node->attributes as $attrName => $attribute)
701
		{
702
			if ($attrName !== 'class' && $attrName !== 'style')
703
			{
704
				return false;
705
			}
706
		}
707
708
		return true;
709
	}
710
711
	/**
712
	* "What is this?" you might ask. This is basically a compressed version of the HTML5 content
713
	* models and rules, with some liberties taken.
714
	*
715
	* For each element, up to three bitfields are defined: "c", "ac" and "dd". Bitfields are stored
716
	* as raw bytes, formatted using the octal notation to keep the sources ASCII.
717
	*
718
	*   "c" represents the categories the element belongs to. The categories are comprised of HTML5
719
	*   content models (such as "phrasing content" or "interactive content") plus a few special
720
	*   categories created to cover the parts of the specs that refer to "a group of X and Y
721
	*   elements" rather than a specific content model.
722
	*
723
	*   "ac" represents the categories that are allowed as children of given element.
724
	*
725
	*   "dd" represents the categories that must not appear as a descendant of given element.
726
	*
727
	* Sometimes, HTML5 specifies some restrictions on when an element can accept certain children,
728
	* or what categories the element belongs to. For example, an <img> element is only part of the
729
	* "interactive content" category if it has a "usemap" attribute. Those restrictions are
730
	* expressed as an XPath expression and stored using the concatenation of the key of the bitfield
731
	* plus the bit number of the category. For instance, if "interactive content" got assigned to
732
	* bit 2, the definition of the <img> element will contain a key "c2" with value "@usemap".
733
	*
734
	* Additionally, other flags are set:
735
	*
736
	*   "t" indicates that the element uses the "transparent" content model.
737
	*   "e" indicates that the element uses the "empty" content model.
738
	*   "v" indicates that the element is a void element.
739
	*   "nt" indicates that the element does not accept text nodes. (no text)
740
	*   "to" indicates that the element should only contain text. (text-only)
741
	*   "fe" indicates that the element is a formatting element. It will automatically be reopened
742
	*   when closed by an end tag of a different name.
743
	*   "b" indicates that the element is not phrasing content, which makes it likely to act like
744
	*   a block element.
745
	*
746
	* Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its
747
	* predecessor. Those are used to generate closeParent rules and are stored in the "cp" key.
748
	*
749
	* @var array
750
	* @see /scripts/patchTemplateInspector.php
751
	*/
752
	protected static $htmlElements = [
753
		'a'=>['c'=>"\17\0\0\0\0\1",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\0\1",'t'=>1,'fe'=>1],
754
		'abbr'=>['c'=>"\7",'ac'=>"\4"],
755
		'address'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\45",'b'=>1,'cp'=>['p']],
756
		'article'=>['c'=>"\3\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
757
		'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
758
		'audio'=>['c'=>"\57",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
759
		'b'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
760
		'base'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
761
		'bdi'=>['c'=>"\7",'ac'=>"\4"],
762
		'bdo'=>['c'=>"\7",'ac'=>"\4"],
763
		'blockquote'=>['c'=>"\203",'ac'=>"\1",'b'=>1,'cp'=>['p']],
764
		'body'=>['c'=>"\200\0\4",'ac'=>"\1",'b'=>1],
765
		'br'=>['c'=>"\5",'nt'=>1,'e'=>1,'v'=>1],
766
		'button'=>['c'=>"\117",'ac'=>"\4",'dd'=>"\10"],
767
		'canvas'=>['c'=>"\47",'ac'=>"\0",'t'=>1],
768
		'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\200",'b'=>1],
769
		'cite'=>['c'=>"\7",'ac'=>"\4"],
770
		'code'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
771
		'col'=>['c'=>"\0\0\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
772
		'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\20",'ac20'=>'not(@span)','nt'=>1,'e'=>1,'e0'=>'@span','b'=>1],
773
		'data'=>['c'=>"\7",'ac'=>"\4"],
774
		'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\10"],
775
		'dd'=>['c'=>"\0\0\200",'ac'=>"\1",'b'=>1,'cp'=>['dd','dt']],
776
		'del'=>['c'=>"\5",'ac'=>"\0",'t'=>1],
777
		'details'=>['c'=>"\213",'ac'=>"\1\0\0\2",'b'=>1,'cp'=>['p']],
778
		'dfn'=>['c'=>"\7\0\0\0\40",'ac'=>"\4",'dd'=>"\0\0\0\0\40"],
779
		'div'=>['c'=>"\3",'ac'=>"\1",'b'=>1,'cp'=>['p']],
780
		'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\200\200",'nt'=>1,'b'=>1,'cp'=>['p']],
781
		'dt'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['dd','dt']],
782
		'em'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
783
		'embed'=>['c'=>"\57",'nt'=>1,'e'=>1,'v'=>1],
784
		'fieldset'=>['c'=>"\303",'ac'=>"\1\0\0\20",'b'=>1,'cp'=>['p']],
785
		'figcaption'=>['c'=>"\0\0\0\0\0\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
786
		'figure'=>['c'=>"\203",'ac'=>"\1\0\0\0\0\4",'b'=>1,'cp'=>['p']],
787
		'footer'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
788
		'form'=>['c'=>"\3\0\0\0\20",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
789
		'h1'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
790
		'h2'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
791
		'h3'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
792
		'h4'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
793
		'h5'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
794
		'h6'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
795
		'head'=>['c'=>"\0\0\4",'ac'=>"\20",'nt'=>1,'b'=>1],
796
		'header'=>['c'=>"\3\40\0\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
797
		'hr'=>['c'=>"\1\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']],
798
		'html'=>['c'=>"\0",'ac'=>"\0\0\4",'nt'=>1,'b'=>1],
799
		'i'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
800
		'iframe'=>['c'=>"\57",'ac'=>"\4"],
801
		'img'=>['c'=>"\57\20\10",'c3'=>'@usemap','nt'=>1,'e'=>1,'v'=>1],
802
		'input'=>['c'=>"\17\20",'c3'=>'@type!="hidden"','c12'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','nt'=>1,'e'=>1,'v'=>1],
803
		'ins'=>['c'=>"\7",'ac'=>"\0",'t'=>1],
804
		'kbd'=>['c'=>"\7",'ac'=>"\4"],
805
		'keygen'=>['c'=>"\117",'nt'=>1,'e'=>1,'v'=>1],
806
		'label'=>['c'=>"\17\20\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"],
807
		'legend'=>['c'=>"\0\0\0\20",'ac'=>"\4",'b'=>1],
808
		'li'=>['c'=>"\0\0\0\0\200",'ac'=>"\1",'b'=>1,'cp'=>['li']],
809
		'link'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
810
		'main'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'b'=>1,'cp'=>['p']],
811
		'mark'=>['c'=>"\7",'ac'=>"\4"],
812
		'media element'=>['c'=>"\0\0\0\0\0\2",'nt'=>1,'b'=>1],
813
		'menu'=>['c'=>"\1\100",'ac'=>"\0\300",'nt'=>1,'b'=>1,'cp'=>['p']],
814
		'menuitem'=>['c'=>"\0\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
815
		'meta'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
816
		'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"],
817
		'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
818
		'noscript'=>['c'=>"\25",'nt'=>1],
819
		'object'=>['c'=>"\147",'ac'=>"\0\0\0\0\1",'t'=>1],
820
		'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'nt'=>1,'b'=>1,'cp'=>['p']],
821
		'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\10",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']],
822
		'option'=>['c'=>"\0\0\2\10",'b'=>1,'cp'=>['option']],
823
		'output'=>['c'=>"\107",'ac'=>"\4"],
824
		'p'=>['c'=>"\3",'ac'=>"\4",'b'=>1,'cp'=>['p']],
825
		'param'=>['c'=>"\0\0\0\0\1",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
826
		'picture'=>['c'=>"\45",'ac'=>"\0\200\10",'nt'=>1],
827
		'pre'=>['c'=>"\3",'ac'=>"\4",'pre'=>1,'b'=>1,'cp'=>['p']],
828
		'progress'=>['c'=>"\7\0\1\1",'ac'=>"\4",'dd'=>"\0\0\0\1"],
829
		'q'=>['c'=>"\7",'ac'=>"\4"],
830
		'rb'=>['c'=>"\0\10",'ac'=>"\4",'b'=>1],
831
		'rp'=>['c'=>"\0\10\100",'ac'=>"\4",'b'=>1,'cp'=>['rp','rt']],
832
		'rt'=>['c'=>"\0\10\100",'ac'=>"\4",'b'=>1,'cp'=>['rp','rt']],
833
		'rtc'=>['c'=>"\0\10",'ac'=>"\4\0\100",'b'=>1],
834
		'ruby'=>['c'=>"\7",'ac'=>"\4\10"],
835
		's'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
836
		'samp'=>['c'=>"\7",'ac'=>"\4"],
837
		'script'=>['c'=>"\25\200",'to'=>1],
838
		'section'=>['c'=>"\3\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
839
		'select'=>['c'=>"\117",'ac'=>"\0\200\2",'nt'=>1],
840
		'small'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
841
		'source'=>['c'=>"\0\0\10\4",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
842
		'span'=>['c'=>"\7",'ac'=>"\4"],
843
		'strong'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
844
		'style'=>['c'=>"\20",'to'=>1,'b'=>1],
845
		'sub'=>['c'=>"\7",'ac'=>"\4"],
846
		'summary'=>['c'=>"\0\0\0\2",'ac'=>"\4\1",'b'=>1],
847
		'sup'=>['c'=>"\7",'ac'=>"\4"],
848
		'table'=>['c'=>"\3\0\0\200",'ac'=>"\0\202",'nt'=>1,'b'=>1,'cp'=>['p']],
849
		'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1,'cp'=>['tbody','td','tfoot','th','thead','tr']],
850
		'td'=>['c'=>"\200\0\40",'ac'=>"\1",'b'=>1,'cp'=>['td','th']],
851
		'template'=>['c'=>"\25\200\20",'nt'=>1],
852
		'textarea'=>['c'=>"\117",'pre'=>1,'to'=>1],
853
		'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']],
854
		'th'=>['c'=>"\0\0\40",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['td','th']],
855
		'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1],
856
		'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime'],
857
		'title'=>['c'=>"\20",'to'=>1,'b'=>1],
858
		'tr'=>['c'=>"\0\2\0\0\100",'ac'=>"\0\200\40",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']],
859
		'track'=>['c'=>"\0\0\0\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
860
		'u'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
861
		'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'nt'=>1,'b'=>1,'cp'=>['p']],
862
		'var'=>['c'=>"\7",'ac'=>"\4"],
863
		'video'=>['c'=>"\57",'c3'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
864
		'wbr'=>['c'=>"\5",'nt'=>1,'e'=>1,'v'=>1]
865
	];
866
867
	/**
868
	* Get the bitfield value for a given element name in a given context
869
	*
870
	* @param  string     $elName Name of the HTML element
871
	* @param  string     $k      Bitfield name: either 'c', 'ac' or 'dd'
872
	* @param  DOMElement $node   Context node (not necessarily the same as $elName)
873
	* @return string
874
	*/
875
	protected function getBitfield($elName, $k, DOMElement $node)
876
	{
877
		if (!isset(self::$htmlElements[$elName][$k]))
878
		{
879
			return "\0";
880
		}
881
882
		$bitfield = self::$htmlElements[$elName][$k];
883
		foreach (str_split($bitfield, 1) as $byteNumber => $char)
884
		{
885
			$byteValue = ord($char);
886
			for ($bitNumber = 0; $bitNumber < 8; ++$bitNumber)
887
			{
888
				$bitValue = 1 << $bitNumber;
889
				if (!($byteValue & $bitValue))
890
				{
891
					// The bit is not set
892
					continue;
893
				}
894
895
				$n = $byteNumber * 8 + $bitNumber;
896
897
				// Test for an XPath condition for that category
898
				if (isset(self::$htmlElements[$elName][$k . $n]))
899
				{
900
					$xpath = 'boolean(' . self::$htmlElements[$elName][$k . $n] . ')';
901
902
					// If the XPath condition is not fulfilled...
903
					if (!$this->evaluate($xpath, $node))
904
					{
905
						// ...turn off the corresponding bit
906
						$byteValue ^= $bitValue;
907
908
						// Update the original bitfield
909
						$bitfield[$byteNumber] = chr($byteValue);
910
					}
911
				}
912
			}
913
		}
914
915
		return $bitfield;
916
	}
917
918
	/**
919
	* Test whether given element has given property in context
920
	*
921
	* @param  string     $elName   Element name
922
	* @param  string     $propName Property name, see self::$htmlElements
923
	* @param  DOMElement $node     Context node
924
	* @return bool
925
	*/
926
	protected function hasProperty($elName, $propName, DOMElement $node)
927
	{
928
		if (!empty(self::$htmlElements[$elName][$propName]))
929
		{
930
			// Test the XPath condition
931
			if (!isset(self::$htmlElements[$elName][$propName . '0'])
932
			 || $this->evaluate(self::$htmlElements[$elName][$propName . '0'], $node))
933
			{
934
				return true;
935
			}
936
		}
937
938
		return false;
939
	}
940
941
	/**
942
	* Test whether two bitfields have any bits in common
943
	*
944
	* @param  string $bitfield1
945
	* @param  string $bitfield2
946
	* @return bool
947
	*/
948
	protected static function match($bitfield1, $bitfield2)
949
	{
950
		return (trim($bitfield1 & $bitfield2, "\0") !== '');
951
	}
952
}