Completed
Branch TemplateInspector (ff8a27)
by Josh
05:46
created

TemplateInspector::anyBranchHasProperty()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 15
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 15
rs 9.2
c 0
b 0
f 0
cc 4
eloc 6
nc 4
nop 1
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMDocument;
11
use DOMElement;
12
use DOMXPath;
13
14
/**
15
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
16
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
17
* content model.
18
*
19
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
20
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
21
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
22
*
23
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
24
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
25
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
26
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
27
*
28
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
29
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
30
* @see  /scripts/patchTemplateInspector.php
31
*/
32
class TemplateInspector
33
{
34
	/**
35
	* XSL namespace
36
	*/
37
	const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
38
39
	/**
40
	* @var string[] allowChild bitfield for each branch
41
	*/
42
	protected $allowChildBitfields = [];
43
44
	/**
45
	* @var bool Whether elements are allowed as children
46
	*/
47
	protected $allowsChildElements;
48
49
	/**
50
	* @var bool Whether text nodes are allowed as children
51
	*/
52
	protected $allowsText;
53
54
	/**
55
	* @var array[] Array of array of DOMElement instances
56
	*/
57
	protected $branches;
58
59
	/**
60
	* @var string OR-ed bitfield representing all of the categories used by this template
61
	*/
62
	protected $contentBitfield = "\0";
63
64
	/**
65
	* @var string denyDescendant bitfield
66
	*/
67
	protected $denyDescendantBitfield = "\0";
68
69
	/**
70
	* @var DOMDocument Document containing the template
71
	*/
72
	protected $dom;
73
74
	/**
75
	* @var bool Whether this template contains any HTML elements
76
	*/
77
	protected $hasElements = false;
78
79
	/**
80
	* @var bool Whether this template renders non-whitespace text nodes at its root
81
	*/
82
	protected $hasRootText;
83
84
	/**
85
	* @var bool Whether this template should be considered a block-level element
86
	*/
87
	protected $isBlock = false;
88
89
	/**
90
	* @var bool Whether the template uses the "empty" content model
91
	*/
92
	protected $isEmpty;
93
94
	/**
95
	* @var bool Whether this template adds to the list of active formatting elements
96
	*/
97
	protected $isFormattingElement;
98
99
	/**
100
	* @var bool Whether this template lets content through via an xsl:apply-templates element
101
	*/
102
	protected $isPassthrough = false;
103
104
	/**
105
	* @var bool Whether all branches use the transparent content model
106
	*/
107
	protected $isTransparent = false;
108
109
	/**
110
	* @var bool Whether all branches have an ancestor that is a void element
111
	*/
112
	protected $isVoid;
113
114
	/**
115
	* @var array Names of every last HTML element that precedes an <xsl:apply-templates/> node
116
	*/
117
	protected $leafNodes = [];
118
119
	/**
120
	* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
121
	*/
122
	protected $preservesNewLines = false;
123
124
	/**
125
	* @var array Bitfield of the first HTML element of every branch
126
	*/
127
	protected $rootBitfields = [];
128
129
	/**
130
	* @var array Names of every HTML element that have no HTML parent
131
	*/
132
	protected $rootNodes = [];
133
134
	/**
135
	* @var DOMXPath XPath engine associated with $this->dom
136
	*/
137
	protected $xpath;
138
139
	/**
140
	* Constructor
141
	*
142
	* @param  string $template Template content
143
	*/
144
	public function __construct($template)
145
	{
146
		$this->dom   = TemplateHelper::loadTemplate($template);
147
		$this->xpath = new DOMXPath($this->dom);
148
149
		$this->analyseRootNodes();
150
		$this->analyseBranches();
151
		$this->analyseContent();
152
	}
153
154
	/**
155
	* Return whether this template allows a given child
156
	*
157
	* @param  TemplateInspector $child
158
	* @return bool
159
	*/
160
	public function allowsChild(TemplateInspector $child)
161
	{
162
		// Sometimes, a template can technically be allowed as a child but denied as a descendant
163
		if (!$this->allowsDescendant($child))
164
		{
165
			return false;
166
		}
167
168
		foreach ($child->rootBitfields as $rootBitfield)
169
		{
170
			foreach ($this->allowChildBitfields as $allowChildBitfield)
171
			{
172
				if (!self::match($rootBitfield, $allowChildBitfield))
173
				{
174
					return false;
175
				}
176
			}
177
		}
178
179
		return ($this->allowsText || !$child->hasRootText);
180
	}
181
182
	/**
183
	* Return whether this template allows a given descendant
184
	*
185
	* @param  TemplateInspector $descendant
186
	* @return bool
187
	*/
188
	public function allowsDescendant(TemplateInspector $descendant)
189
	{
190
		// Test whether the descendant is explicitly disallowed
191
		if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
192
		{
193
			return false;
194
		}
195
196
		// Test whether the descendant contains any elements and we disallow elements
197
		return ($this->allowsChildElements || !$descendant->hasElements);
198
	}
199
200
	/**
201
	* Return whether this template allows elements as children
202
	*
203
	* @return bool
204
	*/
205
	public function allowsChildElements()
206
	{
207
		return $this->allowsChildElements;
208
	}
209
210
	/**
211
	* Return whether this template allows text nodes as children
212
	*
213
	* @return bool
214
	*/
215
	public function allowsText()
216
	{
217
		return $this->allowsText;
218
	}
219
220
	/**
221
	* Return whether this template automatically closes given parent template
222
	*
223
	* @param  TemplateInspector $parent
224
	* @return bool
225
	*/
226
	public function closesParent(TemplateInspector $parent)
227
	{
228
		foreach ($this->rootNodes as $rootName)
229
		{
230
			if (empty(self::$htmlElements[$rootName]['cp']))
231
			{
232
				continue;
233
			}
234
235
			foreach ($parent->leafNodes as $leafName)
236
			{
237
				if (in_array($leafName, self::$htmlElements[$rootName]['cp'], true))
238
				{
239
					// If any of this template's root node closes one of the parent's leaf node, we
240
					// consider that this template closes the other one
241
					return true;
242
				}
243
			}
244
		}
245
246
		return false;
247
	}
248
249
	/**
250
	* Evaluate an XPath expression
251
	*
252
	* @param  string     $expr XPath expression
253
	* @param  DOMElement $node Context node
254
	* @return mixed
255
	*/
256
	public function evaluate($expr, DOMElement $node = null)
257
	{
258
		return $this->xpath->evaluate($expr, $node);
259
	}
260
261
	/**
262
	* Return whether this template should be considered a block-level element
263
	*
264
	* @return bool
265
	*/
266
	public function isBlock()
267
	{
268
		return $this->isBlock;
269
	}
270
271
	/**
272
	* Return whether this template adds to the list of active formatting elements
273
	*
274
	* @return bool
275
	*/
276
	public function isFormattingElement()
277
	{
278
		return $this->isFormattingElement;
279
	}
280
281
	/**
282
	* Return whether this template uses the "empty" content model
283
	*
284
	* @return bool
285
	*/
286
	public function isEmpty()
287
	{
288
		return $this->isEmpty;
289
	}
290
291
	/**
292
	* Return whether this template lets content through via an xsl:apply-templates element
293
	*
294
	* @return bool
295
	*/
296
	public function isPassthrough()
297
	{
298
		return $this->isPassthrough;
299
	}
300
301
	/**
302
	* Return whether this template uses the "transparent" content model
303
	*
304
	* @return bool
305
	*/
306
	public function isTransparent()
307
	{
308
		return $this->isTransparent;
309
	}
310
311
	/**
312
	* Return whether all branches have an ancestor that is a void element
313
	*
314
	* @return bool
315
	*/
316
	public function isVoid()
317
	{
318
		return $this->isVoid;
319
	}
320
321
	/**
322
	* Return whether this template preserves the whitespace in its descendants
323
	*
324
	* @return bool
325
	*/
326
	public function preservesNewLines()
327
	{
328
		return $this->preservesNewLines;
329
	}
330
331
	/**
332
	* Analyses the content of the whole template and set $this->contentBitfield accordingly
333
	*/
334
	protected function analyseContent()
335
	{
336
		// Get all non-XSL elements
337
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
338
		foreach ($this->xpath->query($query) as $node)
339
		{
340
			$this->contentBitfield |= $this->getBitfield($node, 'c');
341
			$this->hasElements = true;
342
		}
343
344
		// Test whether this template is passthrough
345
		$this->isPassthrough = (bool) $this->evaluate('count(//xsl:apply-templates)');
346
	}
347
348
	/**
349
	* Records the HTML elements (and their bitfield) rendered at the root of the template
350
	*/
351
	protected function analyseRootNodes()
352
	{
353
		// Get every non-XSL element with no non-XSL ancestor. This should return us the first
354
		// HTML element of every branch
355
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]'
356
		       . '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
357
		foreach ($this->xpath->query($query) as $node)
358
		{
359
			// Save the actual name of the root node
360
			$this->rootNodes[] = $node->localName;
361
362
			// If any root node is a block-level element, we'll mark the template as such
363
			if ($this->elementIsBlock($node))
364
			{
365
				$this->isBlock = true;
366
			}
367
368
			$this->rootBitfields[] = $this->getBitfield($node, 'c');
369
		}
370
371
		// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
372
		// out: nodes with a non-XSL ancestor,
373
		$predicate = '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
374
375
		// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
376
		$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
377
378
		$query = '//text()[normalize-space() != ""]' . $predicate
379
		       . '|'
380
		       . '//xsl:text[normalize-space() != ""]' . $predicate
381
		       . '|'
382
		       . '//xsl:value-of' . $predicate;
383
384
		$this->hasRootText = (bool) $this->evaluate('count(' . $query . ')');
385
	}
386
387
	/**
388
	* Analyses each branch that leads to an <xsl:apply-templates/> tag
389
	*/
390
	protected function analyseBranches()
391
	{
392
		$this->branches = [];
393
		foreach ($this->xpath->query('//xsl:apply-templates') as $applyTemplates)
394
		{
395
			$query            = 'ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
396
			$this->branches[] = iterator_to_array($this->xpath->query($query, $applyTemplates));
397
		}
398
399
		$this->computeAllowsChildElements();
400
		$this->computeAllowsText();
401
		$this->computeBitfields();
402
		$this->computeFormattingElement();
403
		$this->computeIsEmpty();
404
		$this->computeIsTransparent();
405
		$this->computeIsVoid();
406
		$this->storeLeafNodes();
407
408
		foreach ($this->branches as $branch)
409
		{
410
			/**
411
			* @var boolean Whether this branch preserves new lines
412
			*/
413
			$preservesNewLines = false;
414
415
			foreach ($branch as $node)
416
			{
417
				$elName = $node->localName;
418
419
				// Test whether this branch preserves whitespace by inspecting the current element
420
				// and the value of its style attribute. Technically, this block of code also tests
421
				// this element's descendants' style attributes but the result is the same as we
422
				// need to check every element of this branch in order
423
				$style = '';
424
425
				if ($this->hasProperty($elName, 'pre', $node))
426
				{
427
					$style .= 'white-space:pre;';
428
				}
429
430
				if ($node->hasAttribute('style'))
431
				{
432
					$style .= $node->getAttribute('style') . ';';
433
				}
434
435
				$attributes = $this->xpath->query('.//xsl:attribute[@name="style"]', $node);
436
				foreach ($attributes as $attribute)
437
				{
438
					$style .= $attribute->textContent;
439
				}
440
441
				preg_match_all(
442
					'/white-space\\s*:\\s*(no|pre)/i',
443
					strtolower($style),
444
					$matches
445
				);
446
				foreach ($matches[1] as $match)
447
				{
448
					// TRUE:  "pre", "pre-line" and "pre-wrap"
449
					// FALSE: "normal", "nowrap"
450
					$preservesNewLines = ($match === 'pre');
451
				}
452
			}
453
454
			// If any branch preserves new lines, the template preserves new lines
455
			if ($preservesNewLines)
456
			{
457
				$this->preservesNewLines = true;
458
			}
459
		}
460
	}
461
462
	/**
463
	* Test whether any branch of this template has an element that has given property
464
	*
465
	* @param  string $propName
466
	* @return bool
467
	*/
468
	protected function anyBranchHasProperty($propName)
469
	{
470
		foreach ($this->branches as $branch)
471
		{
472
			foreach ($branch as $element)
473
			{
474
				if ($this->hasProperty($element->nodeName, $propName, $element))
475
				{
476
					return true;
477
				}
478
			}
479
		}
480
481
		return false;
482
	}
483
484
	/**
485
	* Compute the allowChildBitfields and denyDescendantBitfield properties
486
	*
487
	* @return void
488
	*/
489
	protected function computeBitfields()
490
	{
491
		if (empty($this->branches))
492
		{
493
			$this->allowChildBitfields = ["\0"];
494
495
			return;
496
		}
497
		foreach ($this->branches as $branch)
498
		{
499
			/**
500
			* @var string allowChild bitfield for current branch. Starts with the value associated
501
			*             with <div> in order to approximate a value if the whole branch uses the
502
			*             transparent content model
503
			*/
504
			$branchBitfield = self::$htmlElements['div']['ac'];
505
506
			foreach ($branch as $element)
507
			{
508
				$elName = $element->localName;
509
				if (!$this->hasProperty($elName, 't', $element))
510
				{
511
					// If the element isn't transparent, we reset its bitfield
512
					$branchBitfield = "\0";
513
				}
514
515
				// allowChild rules are cumulative if transparent, and reset above otherwise
516
				$branchBitfield |= $this->getBitfield($element, 'ac');
517
518
				// denyDescendant rules are cumulative
519
				$this->denyDescendantBitfield |= $this->getBitfield($element, 'dd');
520
			}
521
522
			// Add this branch's bitfield to the list
523
			$this->allowChildBitfields[] = $branchBitfield;
524
		}
525
	}
526
527
	/**
528
	* Compute the allowsChildElements property
529
	*
530
	* A template allows child Elements if it has at least one xsl:apply-templates and none of its
531
	* ancestors have the text-only ("to") property
532
	*
533
	* @return void
534
	*/
535
	protected function computeAllowsChildElements()
536
	{
537
		$this->allowsChildElements = ($this->anyBranchHasProperty('to')) ? false : !empty($this->branches);
538
	}
539
540
	/**
541
	* Compute the allowsText property
542
	*
543
	* A template is said to allow text if none of the leaf elements disallow text
544
	*
545
	* @return void
546
	*/
547
	protected function computeAllowsText()
548
	{
549
		foreach (array_filter($this->branches) as $branch)
550
		{
551
			$element = end($branch);
552
			if ($this->hasProperty($element->nodeName, 'nt', $element))
553
			{
554
				$this->allowsText = false;
555
556
				return;
557
			}
558
		}
559
		$this->allowsText = true;
560
	}
561
562
	/**
563
	* Compute the isFormattingElement property
564
	*
565
	* A template is said to be a formatting element if all (non-zero) of its branches are entirely
566
	* composed of formatting elements
567
	*
568
	* @return void
569
	*/
570
	protected function computeFormattingElement()
571
	{
572
		foreach ($this->branches as $branch)
573
		{
574
			foreach ($branch as $element)
575
			{
576
				if (!$this->hasProperty($element->nodeName, 'fe', $element) && !$this->isFormattingSpan($element))
577
				{
578
					$this->isFormattingElement = false;
579
580
					return;
581
				}
582
			}
583
		}
584
		$this->isFormattingElement = (bool) count(array_filter($this->branches));
585
	}
586
587
	/**
588
	* Compute the isEmpty property
589
	*
590
	* A template is said to be empty if it has no xsl:apply-templates elements or any there is a empty
591
	* element ancestor to an xsl:apply-templates element
592
	*
593
	* @return void
594
	*/
595
	protected function computeIsEmpty()
596
	{
597
		$this->isEmpty = ($this->anyBranchHasProperty('e')) || empty($this->branches);
598
	}
599
600
	/**
601
	* Compute the isTransparent property
602
	*
603
	* A template is said to be transparent if it has at least one branch and no non-transparent
604
	* elements in its path
605
	*
606
	* @return void
607
	*/
608
	protected function computeIsTransparent()
609
	{
610
		foreach ($this->branches as $branch)
611
		{
612
			foreach ($branch as $element)
613
			{
614
				if (!$this->hasProperty($element->nodeName, 't', $element))
615
				{
616
					$this->isTransparent = false;
617
618
					return;
619
				}
620
			}
621
		}
622
		$this->isTransparent = !empty($this->branches);
623
	}
624
625
	/**
626
	* Compute the isVoid property
627
	*
628
	* A template is said to be void if it has no xsl:apply-templates elements or any there is a void
629
	* element ancestor to an xsl:apply-templates element
630
	*
631
	* @return void
632
	*/
633
	protected function computeIsVoid()
634
	{
635
		$this->isVoid = ($this->anyBranchHasProperty('v')) || empty($this->branches);
636
	}
637
638
	/**
639
	* Test whether given element is a block-level element
640
	*
641
	* @param  DOMElement $element
642
	* @return bool
643
	*/
644
	protected function elementIsBlock(DOMElement $element)
645
	{
646
		$style = $this->getStyle($element);
647
		if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
648
		{
649
			return true;
650
		}
651
		if (preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style))
652
		{
653
			return false;
654
		}
655
656
		return $this->hasProperty($element->nodeName, 'b', $element);
657
	}
658
659
	/**
660
	* Retrieve and return the inline style assigned to given element
661
	*
662
	* @param  DOMElement $node Context node
663
	* @return string
664
	*/
665
	protected function getStyle(DOMElement $node)
666
	{
667
		// Start with the inline attribute
668
		$style = $node->getAttribute('style');
669
670
		// Add the content of any xsl:attribute named "style". This will miss optional attributes
671
		$query = 'xsl:attribute[@name="style"]';
672
		foreach ($this->xpath->query($query, $node) as $attribute)
673
		{
674
			$style .= ';' . $attribute->textContent;
675
		}
676
677
		return $style;
678
	}
679
680
	/**
681
	* Test whether given node is a span element used for formatting
682
	*
683
	* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
684
	* and no other attributes
685
	*
686
	* @param  DOMElement $node
687
	* @return boolean
688
	*/
689
	protected function isFormattingSpan(DOMElement $node)
690
	{
691
		if ($node->nodeName !== 'span')
692
		{
693
			return false;
694
		}
695
696
		if ($node->getAttribute('class') === '' && $node->getAttribute('style') === '')
697
		{
698
			return false;
699
		}
700
701
		foreach ($node->attributes as $attrName => $attribute)
702
		{
703
			if ($attrName !== 'class' && $attrName !== 'style')
704
			{
705
				return false;
706
			}
707
		}
708
709
		return true;
710
	}
711
712
	/**
713
	* Store the names of every leaf node
714
	*
715
	* A leaf node is defined as the closest non-XSL ancestor to an xsl:apply-templates element
716
	*
717
	* @return void
718
	*/
719
	protected function storeLeafNodes()
720
	{
721
		foreach (array_filter($this->branches) as $branch)
722
		{
723
			$this->leafNodes[] = end($branch)->nodeName;
724
		}
725
	}
726
727
	/**
728
	* "What is this?" you might ask. This is basically a compressed version of the HTML5 content
729
	* models and rules, with some liberties taken.
730
	*
731
	* For each element, up to three bitfields are defined: "c", "ac" and "dd". Bitfields are stored
732
	* as raw bytes, formatted using the octal notation to keep the sources ASCII.
733
	*
734
	*   "c" represents the categories the element belongs to. The categories are comprised of HTML5
735
	*   content models (such as "phrasing content" or "interactive content") plus a few special
736
	*   categories created to cover the parts of the specs that refer to "a group of X and Y
737
	*   elements" rather than a specific content model.
738
	*
739
	*   "ac" represents the categories that are allowed as children of given element.
740
	*
741
	*   "dd" represents the categories that must not appear as a descendant of given element.
742
	*
743
	* Sometimes, HTML5 specifies some restrictions on when an element can accept certain children,
744
	* or what categories the element belongs to. For example, an <img> element is only part of the
745
	* "interactive content" category if it has a "usemap" attribute. Those restrictions are
746
	* expressed as an XPath expression and stored using the concatenation of the key of the bitfield
747
	* plus the bit number of the category. For instance, if "interactive content" got assigned to
748
	* bit 2, the definition of the <img> element will contain a key "c2" with value "@usemap".
749
	*
750
	* Additionally, other flags are set:
751
	*
752
	*   "t" indicates that the element uses the "transparent" content model.
753
	*   "e" indicates that the element uses the "empty" content model.
754
	*   "v" indicates that the element is a void element.
755
	*   "nt" indicates that the element does not accept text nodes. (no text)
756
	*   "to" indicates that the element should only contain text. (text-only)
757
	*   "fe" indicates that the element is a formatting element. It will automatically be reopened
758
	*   when closed by an end tag of a different name.
759
	*   "b" indicates that the element is not phrasing content, which makes it likely to act like
760
	*   a block element.
761
	*
762
	* Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its
763
	* predecessor. Those are used to generate closeParent rules and are stored in the "cp" key.
764
	*
765
	* @var array
766
	* @see /scripts/patchTemplateInspector.php
767
	*/
768
	protected static $htmlElements = [
769
		'a'=>['c'=>"\17\0\0\0\0\1",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\0\1",'t'=>1,'fe'=>1],
770
		'abbr'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
771
		'address'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\45",'b'=>1,'cp'=>['p']],
772
		'article'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
773
		'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
774
		'audio'=>['c'=>"\57",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
775
		'b'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
776
		'base'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
777
		'bdi'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
778
		'bdo'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
779
		'blockquote'=>['c'=>"\203",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
780
		'body'=>['c'=>"\200\0\4",'ac'=>"\1",'dd'=>"\0",'b'=>1],
781
		'br'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
782
		'button'=>['c'=>"\117",'ac'=>"\4",'dd'=>"\10"],
783
		'canvas'=>['c'=>"\47",'ac'=>"\0",'dd'=>"\0",'t'=>1],
784
		'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\200",'b'=>1],
785
		'cite'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
786
		'code'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
787
		'col'=>['c'=>"\0\0\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
788
		'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\20",'ac20'=>'not(@span)','dd'=>"\0",'nt'=>1,'e'=>1,'e0'=>'@span','b'=>1],
789
		'data'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
790
		'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\10",'dd'=>"\0"],
791
		'dd'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['dd','dt']],
792
		'del'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1],
793
		'details'=>['c'=>"\213",'ac'=>"\1\0\0\2",'dd'=>"\0",'b'=>1,'cp'=>['p']],
794
		'dfn'=>['c'=>"\7\0\0\0\40",'ac'=>"\4",'dd'=>"\0\0\0\0\40"],
795
		'div'=>['c'=>"\3",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
796
		'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\200\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
797
		'dt'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['dd','dt']],
798
		'em'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
799
		'embed'=>['c'=>"\57",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
800
		'fieldset'=>['c'=>"\303",'ac'=>"\1\0\0\20",'dd'=>"\0",'b'=>1,'cp'=>['p']],
801
		'figcaption'=>['c'=>"\0\0\0\0\0\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
802
		'figure'=>['c'=>"\203",'ac'=>"\1\0\0\0\0\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
803
		'footer'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
804
		'form'=>['c'=>"\3\0\0\0\20",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
805
		'h1'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
806
		'h2'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
807
		'h3'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
808
		'h4'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
809
		'h5'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
810
		'h6'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
811
		'head'=>['c'=>"\0\0\4",'ac'=>"\20",'dd'=>"\0",'nt'=>1,'b'=>1],
812
		'header'=>['c'=>"\3\40\0\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
813
		'hr'=>['c'=>"\1\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']],
814
		'html'=>['c'=>"\0",'ac'=>"\0\0\4",'dd'=>"\0",'nt'=>1,'b'=>1],
815
		'i'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
816
		'iframe'=>['c'=>"\57",'ac'=>"\4",'dd'=>"\0"],
817
		'img'=>['c'=>"\57\20\10",'c3'=>'@usemap','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
818
		'input'=>['c'=>"\17\20",'c3'=>'@type!="hidden"','c12'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
819
		'ins'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1],
820
		'kbd'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
821
		'keygen'=>['c'=>"\117",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
822
		'label'=>['c'=>"\17\20\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"],
823
		'legend'=>['c'=>"\0\0\0\20",'ac'=>"\4",'dd'=>"\0",'b'=>1],
824
		'li'=>['c'=>"\0\0\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['li']],
825
		'link'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
826
		'main'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
827
		'mark'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
828
		'media element'=>['c'=>"\0\0\0\0\0\2",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1],
829
		'menu'=>['c'=>"\1\100",'ac'=>"\0\300",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
830
		'menuitem'=>['c'=>"\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
831
		'meta'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
832
		'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"],
833
		'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
834
		'noscript'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1],
835
		'object'=>['c'=>"\147",'ac'=>"\0\0\0\0\1",'dd'=>"\0",'t'=>1],
836
		'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
837
		'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\10",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']],
838
		'option'=>['c'=>"\0\0\2\10",'ac'=>"\0",'dd'=>"\0",'b'=>1,'cp'=>['option']],
839
		'output'=>['c'=>"\107",'ac'=>"\4",'dd'=>"\0"],
840
		'p'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
841
		'param'=>['c'=>"\0\0\0\0\1",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
842
		'picture'=>['c'=>"\45",'ac'=>"\0\200\10",'dd'=>"\0",'nt'=>1],
843
		'pre'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'pre'=>1,'b'=>1,'cp'=>['p']],
844
		'progress'=>['c'=>"\7\0\1\1",'ac'=>"\4",'dd'=>"\0\0\0\1"],
845
		'q'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
846
		'rb'=>['c'=>"\0\10",'ac'=>"\4",'dd'=>"\0",'b'=>1],
847
		'rp'=>['c'=>"\0\10\100",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']],
848
		'rt'=>['c'=>"\0\10\100",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']],
849
		'rtc'=>['c'=>"\0\10",'ac'=>"\4\0\100",'dd'=>"\0",'b'=>1],
850
		'ruby'=>['c'=>"\7",'ac'=>"\4\10",'dd'=>"\0"],
851
		's'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
852
		'samp'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
853
		'script'=>['c'=>"\25\200",'ac'=>"\0",'dd'=>"\0",'to'=>1],
854
		'section'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
855
		'select'=>['c'=>"\117",'ac'=>"\0\200\2",'dd'=>"\0",'nt'=>1],
856
		'small'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
857
		'source'=>['c'=>"\0\0\10\4",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
858
		'span'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
859
		'strong'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
860
		'style'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1],
861
		'sub'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
862
		'summary'=>['c'=>"\0\0\0\2",'ac'=>"\4\1",'dd'=>"\0",'b'=>1],
863
		'sup'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
864
		'table'=>['c'=>"\3\0\0\200",'ac'=>"\0\202",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
865
		'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','tfoot','th','thead','tr']],
866
		'td'=>['c'=>"\200\0\40",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['td','th']],
867
		'template'=>['c'=>"\25\200\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1],
868
		'textarea'=>['c'=>"\117",'ac'=>"\0",'dd'=>"\0",'pre'=>1,'to'=>1],
869
		'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']],
870
		'th'=>['c'=>"\0\0\40",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['td','th']],
871
		'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1],
872
		'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime','dd'=>"\0"],
873
		'title'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1],
874
		'tr'=>['c'=>"\0\2\0\0\100",'ac'=>"\0\200\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']],
875
		'track'=>['c'=>"\0\0\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
876
		'u'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
877
		'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
878
		'var'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
879
		'video'=>['c'=>"\57",'c3'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
880
		'wbr'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1]
881
	];
882
883
	/**
884
	* Get the bitfield value for a given element in a given context
885
	*
886
	* @param  DOMElement $element Context node
887
	* @param  string     $k       Bitfield name: either 'c', 'ac' or 'dd'
888
	* @return string
889
	*/
890
	protected function getBitfield(DOMElement $element, $k)
891
	{
892
		$elName = $element->nodeName;
893
		if (!isset(self::$htmlElements[$elName]))
894
		{
895
			$elName = 'span';
896
		}
897
898
		$bitfield = self::$htmlElements[$elName][$k];
899
		foreach (str_split($bitfield, 1) as $byteNumber => $char)
900
		{
901
			$byteValue = ord($char);
902
			for ($bitNumber = 0; $bitNumber < 8; ++$bitNumber)
903
			{
904
				$bitValue = 1 << $bitNumber;
905
				if (!($byteValue & $bitValue))
906
				{
907
					// The bit is not set
908
					continue;
909
				}
910
911
				$n = $byteNumber * 8 + $bitNumber;
912
913
				// Test for an XPath condition for that category
914
				if (isset(self::$htmlElements[$elName][$k . $n]))
915
				{
916
					$xpath = 'boolean(' . self::$htmlElements[$elName][$k . $n] . ')';
917
918
					// If the XPath condition is not fulfilled...
919
					if (!$this->evaluate($xpath, $element))
920
					{
921
						// ...turn off the corresponding bit
922
						$byteValue ^= $bitValue;
923
924
						// Update the original bitfield
925
						$bitfield[$byteNumber] = chr($byteValue);
926
					}
927
				}
928
			}
929
		}
930
931
		return $bitfield;
932
	}
933
934
	/**
935
	* Test whether given element has given property in context
936
	*
937
	* @param  string     $elName   Element name
938
	* @param  string     $propName Property name, see self::$htmlElements
939
	* @param  DOMElement $node     Context node
940
	* @return bool
941
	*/
942
	protected function hasProperty($elName, $propName, DOMElement $node)
943
	{
944
		if (!empty(self::$htmlElements[$elName][$propName]))
945
		{
946
			// Test the XPath condition
947
			if (!isset(self::$htmlElements[$elName][$propName . '0'])
948
			 || $this->evaluate('boolean(' . self::$htmlElements[$elName][$propName . '0'] . ')', $node))
949
			{
950
				return true;
951
			}
952
		}
953
954
		return false;
955
	}
956
957
	/**
958
	* Test whether two bitfields have any bits in common
959
	*
960
	* @param  string $bitfield1
961
	* @param  string $bitfield2
962
	* @return bool
963
	*/
964
	protected static function match($bitfield1, $bitfield2)
965
	{
966
		return (trim($bitfield1 & $bitfield2, "\0") !== '');
967
	}
968
}