Completed
Push — master ( 20ba78...d37d19 )
by Josh
13:27
created

TemplateInspector::isVoid()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMDocument;
11
use DOMElement;
12
use DOMXPath;
13
14
/**
15
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
16
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
17
* content model.
18
*
19
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
20
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
21
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
22
*
23
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
24
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
25
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
26
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
27
*
28
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
29
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
30
* @see  /scripts/patchTemplateInspector.php
31
*/
32
class TemplateInspector
33
{
34
	/**
35
	* @var string[] allowChild bitfield for each branch
36
	*/
37
	protected $allowChildBitfields = [];
38
39
	/**
40
	* @var bool Whether elements are allowed as children
41
	*/
42
	protected $allowsChildElements = true;
43
44
	/**
45
	* @var bool Whether text nodes are allowed as children
46
	*/
47
	protected $allowsText = true;
48
49
	/**
50
	* @var string OR-ed bitfield representing all of the categories used by this template
51
	*/
52
	protected $contentBitfield = "\0";
53
54
	/**
55
	* @var string denyDescendant bitfield
56
	*/
57
	protected $denyDescendantBitfield = "\0";
58
59
	/**
60
	* @var DOMDocument Document containing the template
61
	*/
62
	protected $dom;
63
64
	/**
65
	* @var bool Whether this template contains any HTML elements
66
	*/
67
	protected $hasElements = false;
68
69
	/**
70
	* @var bool Whether this template renders non-whitespace text nodes at its root
71
	*/
72
	protected $hasRootText = false;
73
74
	/**
75
	* @var bool Whether this template should be considered a block-level element
76
	*/
77
	protected $isBlock = false;
78
79
	/**
80
	* @var bool Whether the template uses the "empty" content model
81
	*/
82
	protected $isEmpty = true;
83
84
	/**
85
	* @var bool Whether this template adds to the list of active formatting elements
86
	*/
87
	protected $isFormattingElement = false;
88
89
	/**
90
	* @var bool Whether this template lets content through via an xsl:apply-templates element
91
	*/
92
	protected $isPassthrough = false;
93
94
	/**
95
	* @var bool Whether all branches use the transparent content model
96
	*/
97
	protected $isTransparent = false;
98
99
	/**
100
	* @var bool Whether all branches have an ancestor that is a void element
101
	*/
102
	protected $isVoid = true;
103
104
	/**
105
	* @var array Names of every last HTML element that precedes an <xsl:apply-templates/> node
106
	*/
107
	protected $leafNodes = [];
108
109
	/**
110
	* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
111
	*/
112
	protected $preservesNewLines = false;
113
114
	/**
115
	* @var array Bitfield of the first HTML element of every branch
116
	*/
117
	protected $rootBitfields = [];
118
119
	/**
120
	* @var array Names of every HTML element that have no HTML parent
121
	*/
122
	protected $rootNodes = [];
123
124
	/**
125
	* @var DOMXPath XPath engine associated with $this->dom
126
	*/
127
	protected $xpath;
128
129
	/**
130
	* Constructor
131
	*
132
	* @param  string $template Template content
133
	*/
134
	public function __construct($template)
135
	{
136
		$this->dom   = TemplateHelper::loadTemplate($template);
137
		$this->xpath = new DOMXPath($this->dom);
138
139
		$this->analyseRootNodes();
140
		$this->analyseBranches();
141
		$this->analyseContent();
142
	}
143
144
	/**
145
	* Return whether this template allows a given child
146
	*
147
	* @param  TemplateInspector $child
148
	* @return bool
149
	*/
150
	public function allowsChild(TemplateInspector $child)
151
	{
152
		// Sometimes, a template can technically be allowed as a child but denied as a descendant
153
		if (!$this->allowsDescendant($child))
154
		{
155
			return false;
156
		}
157
158
		foreach ($child->rootBitfields as $rootBitfield)
159
		{
160
			foreach ($this->allowChildBitfields as $allowChildBitfield)
161
			{
162
				if (!self::match($rootBitfield, $allowChildBitfield))
163
				{
164
					return false;
165
				}
166
			}
167
		}
168
169
		if (!$this->allowsText && $child->hasRootText)
170
		{
171
			return false;
172
		}
173
174
		return true;
175
	}
176
177
	/**
178
	* Return whether this template allows a given descendant
179
	*
180
	* @param  TemplateInspector $descendant
181
	* @return bool
182
	*/
183
	public function allowsDescendant(TemplateInspector $descendant)
184
	{
185
		// Test whether the descendant is explicitly disallowed
186
		if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
187
		{
188
			return false;
189
		}
190
191
		// Test whether the descendant contains any elements and we disallow elements
192
		if (!$this->allowsChildElements && $descendant->hasElements)
193
		{
194
			return false;
195
		}
196
197
		return true;
198
	}
199
200
	/**
201
	* Return whether this template allows elements as children
202
	*
203
	* @return bool
204
	*/
205
	public function allowsChildElements()
206
	{
207
		return $this->allowsChildElements;
208
	}
209
210
	/**
211
	* Return whether this template allows text nodes as children
212
	*
213
	* @return bool
214
	*/
215
	public function allowsText()
216
	{
217
		return $this->allowsText;
218
	}
219
220
	/**
221
	* Return whether this template automatically closes given parent template
222
	*
223
	* @param  TemplateInspector $parent
224
	* @return bool
225
	*/
226
	public function closesParent(TemplateInspector $parent)
227
	{
228
		foreach ($this->rootNodes as $rootName)
229
		{
230
			if (empty(self::$htmlElements[$rootName]['cp']))
231
			{
232
				continue;
233
			}
234
235
			foreach ($parent->leafNodes as $leafName)
236
			{
237
				if (in_array($leafName, self::$htmlElements[$rootName]['cp'], true))
238
				{
239
					// If any of this template's root node closes one of the parent's leaf node, we
240
					// consider that this template closes the other one
241
					return true;
242
				}
243
			}
244
		}
245
246
		return false;
247
	}
248
249
	/**
250
	* Evaluate an XPath expression
251
	*
252
	* @param  string     $expr XPath expression
253
	* @param  DOMElement $node Context node
254
	* @return mixed
255
	*/
256
	public function evaluate($expr, DOMElement $node = null)
257
	{
258
		return $this->xpath->evaluate($expr, $node);
259
	}
260
261
	/**
262
	* Return whether this template should be considered a block-level element
263
	*
264
	* @return bool
265
	*/
266
	public function isBlock()
267
	{
268
		return $this->isBlock;
269
	}
270
271
	/**
272
	* Return whether this template adds to the list of active formatting elements
273
	*
274
	* @return bool
275
	*/
276
	public function isFormattingElement()
277
	{
278
		return $this->isFormattingElement;
279
	}
280
281
	/**
282
	* Return whether this template uses the "empty" content model
283
	*
284
	* @return bool
285
	*/
286
	public function isEmpty()
287
	{
288
		return $this->isEmpty;
289
	}
290
291
	/**
292
	* Return whether this template lets content through via an xsl:apply-templates element
293
	*
294
	* @return bool
295
	*/
296
	public function isPassthrough()
297
	{
298
		return $this->isPassthrough;
299
	}
300
301
	/**
302
	* Return whether this template uses the "transparent" content model
303
	*
304
	* @return bool
305
	*/
306
	public function isTransparent()
307
	{
308
		return $this->isTransparent;
309
	}
310
311
	/**
312
	* Return whether all branches have an ancestor that is a void element
313
	*
314
	* @return bool
315
	*/
316
	public function isVoid()
317
	{
318
		return $this->isVoid;
319
	}
320
321
	/**
322
	* Return whether this template preserves the whitespace in its descendants
323
	*
324
	* @return bool
325
	*/
326
	public function preservesNewLines()
327
	{
328
		return $this->preservesNewLines;
329
	}
330
331
	/**
332
	* Analyses the content of the whole template and set $this->contentBitfield accordingly
333
	*/
334
	protected function analyseContent()
335
	{
336
		// Get all non-XSL elements
337
		$query = '//*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]';
338
339
		foreach ($this->xpath->query($query) as $node)
340
		{
341
			$this->contentBitfield |= $this->getBitfield($node->localName, 'c', $node);
342
			$this->hasElements = true;
343
		}
344
345
		// Test whether this template is passthrough
346
		$this->isPassthrough = (bool) $this->evaluate('count(//xsl:apply-templates)');
347
	}
348
349
	/**
350
	* Records the HTML elements (and their bitfield) rendered at the root of the template
351
	*/
352
	protected function analyseRootNodes()
353
	{
354
		// Get every non-XSL element with no non-XSL ancestor. This should return us the first
355
		// HTML element of every branch
356
		$query = '//*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]'
357
		       . '[not(ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"])]';
358
359
		foreach ($this->xpath->query($query) as $node)
360
		{
361
			$elName = $node->localName;
362
363
			// Save the actual name of the root node
364
			$this->rootNodes[] = $elName;
365
366
			if (!isset(self::$htmlElements[$elName]))
367
			{
368
				// Unknown elements are treated as if they were a <span> element
369
				$elName = 'span';
370
			}
371
372
			// If any root node is a block-level element, we'll mark the template as such
373
			if ($this->elementIsBlock($elName, $node))
374
			{
375
				$this->isBlock = true;
376
			}
377
378
			$this->rootBitfields[] = $this->getBitfield($elName, 'c', $node);
379
		}
380
381
		// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
382
		// out: nodes with a non-XSL ancestor,
383
		$predicate = '[not(ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"])]';
384
385
		// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
386
		$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
387
388
		$query = '//text()[normalize-space() != ""]' . $predicate
389
		       . '|'
390
		       . '//xsl:text[normalize-space() != ""]' . $predicate
391
		       . '|'
392
		       . '//xsl:value-of' . $predicate;
393
394
		if ($this->evaluate('count(' . $query . ')'))
395
		{
396
			$this->hasRootText = true;
397
		}
398
	}
399
400
	/**
401
	* Analyses each branch that leads to an <xsl:apply-templates/> tag
402
	*/
403
	protected function analyseBranches()
404
	{
405
		/**
406
		* @var bool Whether this template should be considered a formatting element
407
		*/
408
		$isFormattingElement = true;
409
410
		// Consider this template transparent unless we find out there are no branches or that one
411
		// of the branches is not transparent
412
		$this->isTransparent = true;
413
414
		// For each <xsl:apply-templates/> element...
415
		foreach ($this->getXSLElements('apply-templates') as $applyTemplates)
416
		{
417
			// ...we retrieve all non-XSL ancestors
418
			$nodes = $this->xpath->query(
419
				'ancestor::*[namespace-uri() != "http://www.w3.org/1999/XSL/Transform"]',
420
				$applyTemplates
421
			);
422
423
			/**
424
			* @var bool Whether this branch allows elements
425
			*/
426
			$allowsChildElements = true;
427
428
			/**
429
			* @var bool Whether this branch allows text nodes
430
			*/
431
			$allowsText = true;
432
433
			/**
434
			* @var string allowChild bitfield for current branch. Starts with the value associated
435
			*             with <div> in order to approximate a value if the whole branch uses the
436
			*             transparent content model
437
			*/
438
			$branchBitfield = self::$htmlElements['div']['ac'];
439
440
			/**
441
			* @var bool Whether this branch denies all non-text descendants
442
			*/
443
			$isEmpty = false;
444
445
			/**
446
			* @var bool Whether this branch contains a void element
447
			*/
448
			$isVoid = false;
449
450
			/**
451
			* @var string Name of the last node of this branch
452
			*/
453
			$leafNode = null;
454
455
			/**
456
			* @var boolean Whether this branch preserves new lines
457
			*/
458
			$preservesNewLines = false;
459
460
			foreach ($nodes as $node)
461
			{
462
				$elName = $leafNode = $node->localName;
463
464
				if (!isset(self::$htmlElements[$elName]))
465
				{
466
					// Unknown elements are treated as if they were a <span> element
467
					$elName = 'span';
468
				}
469
470
				// Test whether the element is void
471
				if ($this->hasProperty($elName, 'v', $node))
472
				{
473
					$isVoid = true;
474
				}
475
476
				// Test whether the element uses the "empty" content model
477
				if ($this->hasProperty($elName, 'e', $node))
478
				{
479
					$isEmpty = true;
480
				}
481
482
				if (!$this->hasProperty($elName, 't', $node))
483
				{
484
					// If the element isn't transparent, we reset its bitfield
485
					$branchBitfield = "\0";
486
487
					// Also, it means that the template itself isn't transparent
488
					$this->isTransparent = false;
489
				}
490
491
				// Test whether this element is a formatting element
492
				if (!$this->hasProperty($elName, 'fe', $node)
493
				 && !$this->isFormattingSpan($node))
494
				{
495
					$isFormattingElement = false;
496
				}
497
498
				// Test whether this branch allows elements
499
				$allowsChildElements = !$this->hasProperty($elName, 'to', $node);
500
501
				// Test whether this branch allows text nodes
502
				$allowsText = !$this->hasProperty($elName, 'nt', $node);
503
504
				// allowChild rules are cumulative if transparent, and reset above otherwise
505
				$branchBitfield |= $this->getBitfield($elName, 'ac', $node);
506
507
				// denyDescendant rules are cumulative
508
				$this->denyDescendantBitfield |= $this->getBitfield($elName, 'dd', $node);
509
510
				// Test whether this branch preserves whitespace by inspecting the current element
511
				// and the value of its style attribute. Technically, this block of code also tests
512
				// this element's descendants' style attributes but the result is the same as we
513
				// need to check every element of this branch in order
514
				$style = '';
515
516
				if ($this->hasProperty($elName, 'pre', $node))
517
				{
518
					$style .= 'white-space:pre;';
519
				}
520
521
				if ($node->hasAttribute('style'))
522
				{
523
					$style .= $node->getAttribute('style') . ';';
524
				}
525
526
				$attributes = $this->xpath->query('.//xsl:attribute[@name="style"]', $node);
527
				foreach ($attributes as $attribute)
528
				{
529
					$style .= $attribute->textContent;
530
				}
531
532
				preg_match_all(
533
					'/white-space\\s*:\\s*(no|pre)/i',
534
					strtolower($style),
535
					$matches
536
				);
537
				foreach ($matches[1] as $match)
538
				{
539
					// TRUE:  "pre", "pre-line" and "pre-wrap"
540
					// FALSE: "normal", "nowrap"
541
					$preservesNewLines = ($match === 'pre');
542
				}
543
			}
544
545
			// Add this branch's bitfield to the list
546
			$this->allowChildBitfields[] = $branchBitfield;
547
548
			// Save the name of the last node processed
549
			if (isset($leafNode))
550
			{
551
				$this->leafNodes[] = $leafNode;
552
			}
553
554
			// If any branch disallows elements, the template disallows elements
555
			if (!$allowsChildElements)
556
			{
557
				$this->allowsChildElements = false;
558
			}
559
560
			// If any branch disallows text, the template disallows text
561
			if (!$allowsText)
562
			{
563
				$this->allowsText = false;
564
			}
565
566
			// If any branch is not empty, the template is not empty
567
			if (!$isEmpty)
568
			{
569
				$this->isEmpty = false;
570
			}
571
572
			// If any branch is not void, the template is not void
573
			if (!$isVoid)
574
			{
575
				$this->isVoid = false;
576
			}
577
578
			// If any branch preserves new lines, the template preserves new lines
579
			if ($preservesNewLines)
580
			{
581
				$this->preservesNewLines = true;
582
			}
583
		}
584
585
		if (empty($this->allowChildBitfields))
586
		{
587
			// No branches => not transparent and no child elements
588
			$this->allowChildBitfields = ["\0"];
589
			$this->allowsChildElements = false;
590
			$this->isTransparent       = false;
591
		}
592
		elseif (!empty($this->leafNodes))
593
		{
594
			// Set the isFormattingElement property to our final value, but only if this template
595
			// had any branches
596
			$this->isFormattingElement = $isFormattingElement;
597
		}
598
	}
599
600
	/**
601
	* Test whether given element is a block-level element
602
	*
603
	* @param  string     $elName Element name
604
	* @param  DOMElement $node   Context node
605
	* @return bool
606
	*/
607
	protected function elementIsBlock($elName, DOMElement $node)
608
	{
609
		$style = $this->getStyle($node);
610
		if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
611
		{
612
			return true;
613
		}
614
		if (preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style))
615
		{
616
			return false;
617
		}
618
619
		return $this->hasProperty($elName, 'b', $node);
620
	}
621
622
	/**
623
	* Retrieve and return the inline style assigned to given element
624
	*
625
	* @param  DOMElement $node Context node
626
	* @return string
627
	*/
628
	protected function getStyle(DOMElement $node)
629
	{
630
		// Start with the inline attribute
631
		$style = $node->getAttribute('style');
632
633
		// Add the content of any xsl:attribute named "style". This will miss optional attributes
634
		$xpath = new DOMXPath($node->ownerDocument);
635
		$query = 'xsl:attribute[@name="style"]';
636
		foreach ($xpath->query($query, $node) as $attribute)
637
		{
638
			$style .= ';' . $attribute->textContent;
639
		}
640
641
		return $style;
642
	}
643
644
	/**
645
	* Get all XSL elements of given name
646
	*
647
	* @param  string      $elName XSL element's name, e.g. "apply-templates"
648
	* @return \DOMNodeList
649
	*/
650
	protected function getXSLElements($elName)
651
	{
652
		return $this->dom->getElementsByTagNameNS('http://www.w3.org/1999/XSL/Transform', $elName);
653
	}
654
655
	/**
656
	* Test whether given node is a span element used for formatting
657
	*
658
	* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
659
	* and no other attributes
660
	*
661
	* @param  DOMElement $node
662
	* @return boolean
663
	*/
664
	protected function isFormattingSpan(DOMElement $node)
665
	{
666
		if ($node->nodeName !== 'span')
667
		{
668
			return false;
669
		}
670
671
		if ($node->getAttribute('class') === '' && $node->getAttribute('style') === '')
672
		{
673
			return false;
674
		}
675
676
		foreach ($node->attributes as $attrName => $attribute)
677
		{
678
			if ($attrName !== 'class' && $attrName !== 'style')
679
			{
680
				return false;
681
			}
682
		}
683
684
		return true;
685
	}
686
687
	/**
688
	* "What is this?" you might ask. This is basically a compressed version of the HTML5 content
689
	* models and rules, with some liberties taken.
690
	*
691
	* For each element, up to three bitfields are defined: "c", "ac" and "dd". Bitfields are stored
692
	* as raw bytes, formatted using the octal notation to keep the sources ASCII.
693
	*
694
	*   "c" represents the categories the element belongs to. The categories are comprised of HTML5
695
	*   content models (such as "phrasing content" or "interactive content") plus a few special
696
	*   categories created to cover the parts of the specs that refer to "a group of X and Y
697
	*   elements" rather than a specific content model.
698
	*
699
	*   "ac" represents the categories that are allowed as children of given element.
700
	*
701
	*   "dd" represents the categories that must not appear as a descendant of given element.
702
	*
703
	* Sometimes, HTML5 specifies some restrictions on when an element can accept certain children,
704
	* or what categories the element belongs to. For example, an <img> element is only part of the
705
	* "interactive content" category if it has a "usemap" attribute. Those restrictions are
706
	* expressed as an XPath expression and stored using the concatenation of the key of the bitfield
707
	* plus the bit number of the category. For instance, if "interactive content" got assigned to
708
	* bit 2, the definition of the <img> element will contain a key "c2" with value "@usemap".
709
	*
710
	* Additionally, other flags are set:
711
	*
712
	*   "t" indicates that the element uses the "transparent" content model.
713
	*   "e" indicates that the element uses the "empty" content model.
714
	*   "v" indicates that the element is a void element.
715
	*   "nt" indicates that the element does not accept text nodes. (no text)
716
	*   "to" indicates that the element should only contain text. (text-only)
717
	*   "fe" indicates that the element is a formatting element. It will automatically be reopened
718
	*   when closed by an end tag of a different name.
719
	*   "b" indicates that the element is not phrasing content, which makes it likely to act like
720
	*   a block element.
721
	*
722
	* Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its
723
	* predecessor. Those are used to generate closeParent rules and are stored in the "cp" key.
724
	*
725
	* @var array
726
	* @see /scripts/patchTemplateInspector.php
727
	*/
728
	protected static $htmlElements = [
729
		'a'=>['c'=>"\17\0\0\0\0\1",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\0\1",'t'=>1,'fe'=>1],
730
		'abbr'=>['c'=>"\7",'ac'=>"\4"],
731
		'address'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\45",'b'=>1,'cp'=>['p']],
732
		'article'=>['c'=>"\3\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
733
		'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
734
		'audio'=>['c'=>"\57",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
735
		'b'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
736
		'base'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
737
		'bdi'=>['c'=>"\7",'ac'=>"\4"],
738
		'bdo'=>['c'=>"\7",'ac'=>"\4"],
739
		'blockquote'=>['c'=>"\203",'ac'=>"\1",'b'=>1,'cp'=>['p']],
740
		'body'=>['c'=>"\200\0\4",'ac'=>"\1",'b'=>1],
741
		'br'=>['c'=>"\5",'nt'=>1,'e'=>1,'v'=>1],
742
		'button'=>['c'=>"\117",'ac'=>"\4",'dd'=>"\10"],
743
		'canvas'=>['c'=>"\47",'ac'=>"\0",'t'=>1],
744
		'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\200",'b'=>1],
745
		'cite'=>['c'=>"\7",'ac'=>"\4"],
746
		'code'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
747
		'col'=>['c'=>"\0\0\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
748
		'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\20",'ac20'=>'not(@span)','nt'=>1,'e'=>1,'e0'=>'@span','b'=>1],
749
		'data'=>['c'=>"\7",'ac'=>"\4"],
750
		'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\10"],
751
		'dd'=>['c'=>"\0\0\200",'ac'=>"\1",'b'=>1,'cp'=>['dd','dt']],
752
		'del'=>['c'=>"\5",'ac'=>"\0",'t'=>1],
753
		'details'=>['c'=>"\213",'ac'=>"\1\0\0\2",'b'=>1,'cp'=>['p']],
754
		'dfn'=>['c'=>"\7\0\0\0\40",'ac'=>"\4",'dd'=>"\0\0\0\0\40"],
755
		'div'=>['c'=>"\3",'ac'=>"\1",'b'=>1,'cp'=>['p']],
756
		'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\200\200",'nt'=>1,'b'=>1,'cp'=>['p']],
757
		'dt'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['dd','dt']],
758
		'em'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
759
		'embed'=>['c'=>"\57",'nt'=>1,'e'=>1,'v'=>1],
760
		'fieldset'=>['c'=>"\303",'ac'=>"\1\0\0\20",'b'=>1,'cp'=>['p']],
761
		'figcaption'=>['c'=>"\0\0\0\0\0\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
762
		'figure'=>['c'=>"\203",'ac'=>"\1\0\0\0\0\4",'b'=>1,'cp'=>['p']],
763
		'footer'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
764
		'form'=>['c'=>"\3\0\0\0\20",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
765
		'h1'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
766
		'h2'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
767
		'h3'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
768
		'h4'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
769
		'h5'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
770
		'h6'=>['c'=>"\3\1",'ac'=>"\4",'b'=>1,'cp'=>['p']],
771
		'head'=>['c'=>"\0\0\4",'ac'=>"\20",'nt'=>1,'b'=>1],
772
		'header'=>['c'=>"\3\40\0\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
773
		'hr'=>['c'=>"\1\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']],
774
		'html'=>['c'=>"\0",'ac'=>"\0\0\4",'nt'=>1,'b'=>1],
775
		'i'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
776
		'iframe'=>['c'=>"\57",'ac'=>"\4"],
777
		'img'=>['c'=>"\57\20\10",'c3'=>'@usemap','nt'=>1,'e'=>1,'v'=>1],
778
		'input'=>['c'=>"\17\20",'c3'=>'@type!="hidden"','c12'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','nt'=>1,'e'=>1,'v'=>1],
779
		'ins'=>['c'=>"\7",'ac'=>"\0",'t'=>1],
780
		'kbd'=>['c'=>"\7",'ac'=>"\4"],
781
		'keygen'=>['c'=>"\117",'nt'=>1,'e'=>1,'v'=>1],
782
		'label'=>['c'=>"\17\20\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"],
783
		'legend'=>['c'=>"\0\0\0\20",'ac'=>"\4",'b'=>1],
784
		'li'=>['c'=>"\0\0\0\0\200",'ac'=>"\1",'b'=>1,'cp'=>['li']],
785
		'link'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
786
		'main'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'b'=>1,'cp'=>['p']],
787
		'mark'=>['c'=>"\7",'ac'=>"\4"],
788
		'media element'=>['c'=>"\0\0\0\0\0\2",'nt'=>1,'b'=>1],
789
		'menu'=>['c'=>"\1\100",'ac'=>"\0\300",'nt'=>1,'b'=>1,'cp'=>['p']],
790
		'menuitem'=>['c'=>"\0\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
791
		'meta'=>['c'=>"\20",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
792
		'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"],
793
		'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
794
		'noscript'=>['c'=>"\25",'nt'=>1],
795
		'object'=>['c'=>"\147",'ac'=>"\0\0\0\0\1",'t'=>1],
796
		'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'nt'=>1,'b'=>1,'cp'=>['p']],
797
		'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\10",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']],
798
		'option'=>['c'=>"\0\0\2\10",'b'=>1,'cp'=>['option']],
799
		'output'=>['c'=>"\107",'ac'=>"\4"],
800
		'p'=>['c'=>"\3",'ac'=>"\4",'b'=>1,'cp'=>['p']],
801
		'param'=>['c'=>"\0\0\0\0\1",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
802
		'picture'=>['c'=>"\45",'ac'=>"\0\200\10",'nt'=>1],
803
		'pre'=>['c'=>"\3",'ac'=>"\4",'pre'=>1,'b'=>1,'cp'=>['p']],
804
		'progress'=>['c'=>"\7\0\1\1",'ac'=>"\4",'dd'=>"\0\0\0\1"],
805
		'q'=>['c'=>"\7",'ac'=>"\4"],
806
		'rb'=>['c'=>"\0\10",'ac'=>"\4",'b'=>1],
807
		'rp'=>['c'=>"\0\10\100",'ac'=>"\4",'b'=>1,'cp'=>['rp','rt']],
808
		'rt'=>['c'=>"\0\10\100",'ac'=>"\4",'b'=>1,'cp'=>['rp','rt']],
809
		'rtc'=>['c'=>"\0\10",'ac'=>"\4\0\100",'b'=>1],
810
		'ruby'=>['c'=>"\7",'ac'=>"\4\10"],
811
		's'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
812
		'samp'=>['c'=>"\7",'ac'=>"\4"],
813
		'script'=>['c'=>"\25\200",'to'=>1],
814
		'section'=>['c'=>"\3\4",'ac'=>"\1",'b'=>1,'cp'=>['p']],
815
		'select'=>['c'=>"\117",'ac'=>"\0\200\2",'nt'=>1],
816
		'small'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
817
		'source'=>['c'=>"\0\0\10\4",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
818
		'span'=>['c'=>"\7",'ac'=>"\4"],
819
		'strong'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
820
		'style'=>['c'=>"\20",'to'=>1,'b'=>1],
821
		'sub'=>['c'=>"\7",'ac'=>"\4"],
822
		'summary'=>['c'=>"\0\0\0\2",'ac'=>"\4\1",'b'=>1],
823
		'sup'=>['c'=>"\7",'ac'=>"\4"],
824
		'table'=>['c'=>"\3\0\0\200",'ac'=>"\0\202",'nt'=>1,'b'=>1,'cp'=>['p']],
825
		'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1,'cp'=>['tbody','td','tfoot','th','thead','tr']],
826
		'td'=>['c'=>"\200\0\40",'ac'=>"\1",'b'=>1,'cp'=>['td','th']],
827
		'template'=>['c'=>"\25\200\20",'nt'=>1],
828
		'textarea'=>['c'=>"\117",'pre'=>1,'to'=>1],
829
		'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']],
830
		'th'=>['c'=>"\0\0\40",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['td','th']],
831
		'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'nt'=>1,'b'=>1],
832
		'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime'],
833
		'title'=>['c'=>"\20",'to'=>1,'b'=>1],
834
		'tr'=>['c'=>"\0\2\0\0\100",'ac'=>"\0\200\40",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']],
835
		'track'=>['c'=>"\0\0\0\100",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
836
		'u'=>['c'=>"\7",'ac'=>"\4",'fe'=>1],
837
		'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'nt'=>1,'b'=>1,'cp'=>['p']],
838
		'var'=>['c'=>"\7",'ac'=>"\4"],
839
		'video'=>['c'=>"\57",'c3'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
840
		'wbr'=>['c'=>"\5",'nt'=>1,'e'=>1,'v'=>1]
841
	];
842
843
	/**
844
	* Get the bitfield value for a given element name in a given context
845
	*
846
	* @param  string     $elName Name of the HTML element
847
	* @param  string     $k      Bitfield name: either 'c', 'ac' or 'dd'
848
	* @param  DOMElement $node   Context node (not necessarily the same as $elName)
849
	* @return string
850
	*/
851
	protected function getBitfield($elName, $k, DOMElement $node)
852
	{
853
		if (!isset(self::$htmlElements[$elName][$k]))
854
		{
855
			return "\0";
856
		}
857
858
		$bitfield = self::$htmlElements[$elName][$k];
859
		foreach (str_split($bitfield, 1) as $byteNumber => $char)
860
		{
861
			$byteValue = ord($char);
862
			for ($bitNumber = 0; $bitNumber < 8; ++$bitNumber)
863
			{
864
				$bitValue = 1 << $bitNumber;
865
				if (!($byteValue & $bitValue))
866
				{
867
					// The bit is not set
868
					continue;
869
				}
870
871
				$n = $byteNumber * 8 + $bitNumber;
872
873
				// Test for an XPath condition for that category
874
				if (isset(self::$htmlElements[$elName][$k . $n]))
875
				{
876
					$xpath = 'boolean(' . self::$htmlElements[$elName][$k . $n] . ')';
877
878
					// If the XPath condition is not fulfilled...
879
					if (!$this->evaluate($xpath, $node))
880
					{
881
						// ...turn off the corresponding bit
882
						$byteValue ^= $bitValue;
883
884
						// Update the original bitfield
885
						$bitfield[$byteNumber] = chr($byteValue);
886
					}
887
				}
888
			}
889
		}
890
891
		return $bitfield;
892
	}
893
894
	/**
895
	* Test whether given element has given property in context
896
	*
897
	* @param  string     $elName   Element name
898
	* @param  string     $propName Property name, see self::$htmlElements
899
	* @param  DOMElement $node     Context node
900
	* @return bool
901
	*/
902
	protected function hasProperty($elName, $propName, DOMElement $node)
903
	{
904
		if (!empty(self::$htmlElements[$elName][$propName]))
905
		{
906
			// Test the XPath condition
907
			if (!isset(self::$htmlElements[$elName][$propName . '0'])
908
			 || $this->evaluate('boolean(' . self::$htmlElements[$elName][$propName . '0'] . ')', $node))
909
			{
910
				return true;
911
			}
912
		}
913
914
		return false;
915
	}
916
917
	/**
918
	* Test whether two bitfields have any bits in common
919
	*
920
	* @param  string $bitfield1
921
	* @param  string $bitfield2
922
	* @return bool
923
	*/
924
	protected static function match($bitfield1, $bitfield2)
925
	{
926
		return (trim($bitfield1 & $bitfield2, "\0") !== '');
927
	}
928
}