Completed
Branch TemplateInspector (5726eb)
by Josh
09:25
created

TemplateInspector::__get()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 0
cts 2
cp 0
rs 10
c 0
b 0
f 0
cc 3
eloc 2
nc 4
nop 1
crap 12
1
<?php
2
3
/**
4
* @package   s9e\TextFormatter
5
* @copyright Copyright (c) 2010-2017 The s9e Authors
6
* @license   http://www.opensource.org/licenses/mit-license.php The MIT License
7
*/
8
namespace s9e\TextFormatter\Configurator\Helpers;
9
10
use DOMDocument;
11
use DOMElement;
12
use DOMXPath;
13
14
/**
15
* This class helps the RulesGenerator by analyzing a given template in order to answer questions
16
* such as "can this tag be a child/descendant of that other tag?" and others related to the HTML5
17
* content model.
18
*
19
* We use the HTML5 specs to determine which children or descendants should be allowed or denied
20
* based on HTML5 content models. While it does not exactly match HTML5 content models, it gets
21
* pretty close. We also use HTML5 "optional end tag" rules to create closeParent rules.
22
*
23
* Currently, this method does not evaluate elements created with <xsl:element> correctly, or
24
* attributes created with <xsl:attribute> and may never will due to the increased complexity it
25
* would entail. Additionally, it does not evaluate the scope of <xsl:apply-templates/>. For
26
* instance, it will treat <xsl:apply-templates select="LI"/> as if it was <xsl:apply-templates/>
27
*
28
* @link http://dev.w3.org/html5/spec/content-models.html#content-models
29
* @link http://dev.w3.org/html5/spec/syntax.html#optional-tags
30
* @see  /scripts/patchTemplateInspector.php
31
*/
32
class TemplateInspector
33
{
34
	/**
35
	* XSL namespace
36
	*/
37
	const XMLNS_XSL = 'http://www.w3.org/1999/XSL/Transform';
38
39
	/**
40
	* @var string[] allowChild bitfield for each branch
41
	*/
42
	protected $allowChildBitfields = [];
43
44
	/**
45
	* @var bool Whether elements are allowed as children
46
	*/
47
	protected $allowsChildElements;
48
49
	/**
50
	* @var bool Whether text nodes are allowed as children
51
	*/
52
	protected $allowsText;
53
54
	/**
55
	* @var array[] Array of array of DOMElement instances
56
	*/
57
	protected $branches;
58
59
	/**
60
	* @var string OR-ed bitfield representing all of the categories used by this template
61
	*/
62
	protected $contentBitfield = "\0";
63
64
	/**
65
	* @var string denyDescendant bitfield
66
	*/
67
	protected $denyDescendantBitfield = "\0";
68
69
	/**
70
	* @var DOMDocument Document containing the template
71
	*/
72
	protected $dom;
73
74
	/**
75
	* @var bool Whether this template contains any HTML elements
76
	*/
77
	protected $hasElements = false;
78
79
	/**
80
	* @var bool Whether this template renders non-whitespace text nodes at its root
81
	*/
82
	protected $hasRootText;
83
84
	/**
85
	* @var bool Whether this template should be considered a block-level element
86
	*/
87
	protected $isBlock = false;
88
89
	/**
90
	* @var bool Whether the template uses the "empty" content model
91
	*/
92
	protected $isEmpty;
93
94
	/**
95
	* @var bool Whether this template adds to the list of active formatting elements
96
	*/
97
	protected $isFormattingElement;
98
99
	/**
100
	* @var bool Whether this template lets content through via an xsl:apply-templates element
101
	*/
102
	protected $isPassthrough = false;
103
104
	/**
105
	* @var bool Whether all branches use the transparent content model
106
	*/
107
	protected $isTransparent = false;
108
109
	/**
110
	* @var bool Whether all branches have an ancestor that is a void element
111
	*/
112
	protected $isVoid;
113
114
	/**
115
	* @var array Names of every last HTML element that precedes an <xsl:apply-templates/> node
116
	*/
117
	protected $leafNodes = [];
118
119
	/**
120
	* @var bool Whether any branch has an element that preserves new lines by default (e.g. <pre>)
121
	*/
122
	protected $preservesNewLines = false;
123
124
	/**
125
	* @var array Bitfield of the first HTML element of every branch
126
	*/
127
	protected $rootBitfields = [];
128
129
	/**
130
	* @var array Names of every HTML element that have no HTML parent
131
	*/
132
	protected $rootNodes = [];
133
134
	/**
135
	* @var DOMXPath XPath engine associated with $this->dom
136
	*/
137
	protected $xpath;
138
139
	/**
140
	* Constructor
141
	*
142
	* @param string $template Template content
143
	*/
144 93
	public function __construct($template)
145
	{
146 93
		$this->dom   = TemplateHelper::loadTemplate($template);
147 93
		$this->xpath = new DOMXPath($this->dom);
148
149 93
		$this->analyseRootNodes();
150 93
		$this->analyseBranches();
151 93
		$this->analyseContent();
152 93
	}
153
154
	/**
155
	* Return the value of a boolean property
156
	*
157
	* @param  string $k
158
	* @return bool
159
	*/
160
	public function __get($k)
161
	{
162
		return (isset($this->$propName) && is_bool($this->$propName)) ? $this->$propName : false;
0 ignored issues
show
Bug introduced by
The variable $propName does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
163
	}
164
165
	/**
166
	* Return whether this template allows a given child
167
	*
168
	* @param  TemplateInspector $child
169
	* @return bool
170
	*/
171 26
	public function allowsChild(TemplateInspector $child)
172
	{
173
		// Sometimes, a template can technically be allowed as a child but denied as a descendant
174 26
		if (!$this->allowsDescendant($child))
175
		{
176 3
			return false;
177
		}
178
179 23
		foreach ($child->rootBitfields as $rootBitfield)
180
		{
181 18
			foreach ($this->allowChildBitfields as $allowChildBitfield)
182
			{
183 18
				if (!self::match($rootBitfield, $allowChildBitfield))
184
				{
185 18
					return false;
186
				}
187
			}
188
		}
189
190 15
		return ($this->allowsText || !$child->hasRootText);
191
	}
192
193
	/**
194
	* Return whether this template allows a given descendant
195
	*
196
	* @param  TemplateInspector $descendant
197
	* @return bool
198
	*/
199 29
	public function allowsDescendant(TemplateInspector $descendant)
200
	{
201
		// Test whether the descendant is explicitly disallowed
202 29
		if (self::match($descendant->contentBitfield, $this->denyDescendantBitfield))
203
		{
204 3
			return false;
205
		}
206
207
		// Test whether the descendant contains any elements and we disallow elements
208 26
		return ($this->allowsChildElements || !$descendant->hasElements);
209
	}
210
211
	/**
212
	* Return whether this template automatically closes given parent template
213
	*
214
	* @param  TemplateInspector $parent
215
	* @return bool
216
	*/
217 10
	public function closesParent(TemplateInspector $parent)
218
	{
219 10
		foreach ($this->rootNodes as $rootName)
220
		{
221 10
			if (empty(self::$htmlElements[$rootName]['cp']))
222
			{
223 1
				continue;
224
			}
225
226 9
			foreach ($parent->leafNodes as $leafName)
227
			{
228 9
				if (in_array($leafName, self::$htmlElements[$rootName]['cp'], true))
229
				{
230
					// If any of this template's root node closes one of the parent's leaf node, we
231
					// consider that this template closes the other one
232 9
					return true;
233
				}
234
			}
235
		}
236
237 3
		return false;
238
	}
239
240
	/**
241
	* Evaluate an XPath expression
242
	*
243
	* @param  string     $expr XPath expression
244
	* @param  DOMElement $node Context node
245
	* @return mixed
246
	*/
247 93
	public function evaluate($expr, DOMElement $node = null)
248
	{
249 93
		return $this->xpath->evaluate($expr, $node);
250
	}
251
252
	/**
253
	* Analyses the content of the whole template and set $this->contentBitfield accordingly
254
	*/
255 93
	protected function analyseContent()
256
	{
257
		// Get all non-XSL elements
258 93
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
259 93
		foreach ($this->xpath->query($query) as $node)
260
		{
261 88
			$this->contentBitfield |= $this->getBitfield($node, 'c');
262 88
			$this->hasElements = true;
263
		}
264
265
		// Test whether this template is passthrough
266 93
		$this->isPassthrough = (bool) $this->evaluate('count(//xsl:apply-templates)');
267 93
	}
268
269
	/**
270
	* Records the HTML elements (and their bitfield) rendered at the root of the template
271
	*/
272 93
	protected function analyseRootNodes()
273
	{
274
		// Get every non-XSL element with no non-XSL ancestor. This should return us the first
275
		// HTML element of every branch
276 93
		$query = '//*[namespace-uri() != "' . self::XMLNS_XSL . '"]'
277 93
		       . '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
278 93
		foreach ($this->xpath->query($query) as $node)
279
		{
280
			// Save the actual name of the root node
281 88
			$this->rootNodes[] = $node->localName;
282
283
			// If any root node is a block-level element, we'll mark the template as such
284 88
			if ($this->elementIsBlock($node))
285
			{
286 45
				$this->isBlock = true;
287
			}
288
289 88
			$this->rootBitfields[] = $this->getBitfield($node, 'c');
290
		}
291
292
		// Test for non-whitespace text nodes at the root. For that we need a predicate that filters
293
		// out: nodes with a non-XSL ancestor,
294 93
		$predicate = '[not(ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"])]';
295
296
		// ..and nodes with an <xsl:attribute/>, <xsl:comment/> or <xsl:variable/> ancestor
297 93
		$predicate .= '[not(ancestor::xsl:attribute | ancestor::xsl:comment | ancestor::xsl:variable)]';
298
299 93
		$query = '//text()[normalize-space() != ""]' . $predicate
300 93
		       . '|'
301 93
		       . '//xsl:text[normalize-space() != ""]' . $predicate
302 93
		       . '|'
303 93
		       . '//xsl:value-of' . $predicate;
304
305 93
		$this->hasRootText = (bool) $this->evaluate('count(' . $query . ')');
306 93
	}
307
308
	/**
309
	* Analyses each branch that leads to an <xsl:apply-templates/> tag
310
	*/
311 93
	protected function analyseBranches()
312
	{
313 93
		$this->branches = [];
314 93
		foreach ($this->xpath->query('//xsl:apply-templates') as $applyTemplates)
315
		{
316 85
			$query            = 'ancestor::*[namespace-uri() != "' . self::XMLNS_XSL . '"]';
317 85
			$this->branches[] = iterator_to_array($this->xpath->query($query, $applyTemplates));
318
		}
319
320 93
		$this->computeAllowsChildElements();
321 93
		$this->computeAllowsText();
322 93
		$this->computeBitfields();
323 93
		$this->computeFormattingElement();
324 93
		$this->computeIsEmpty();
325 93
		$this->computeIsTransparent();
326 93
		$this->computeIsVoid();
327 93
		$this->computePreservesNewLines();
328 93
		$this->storeLeafNodes();
329 93
	}
330
331
	/**
332
	* Test whether any branch of this template has an element that has given property
333
	*
334
	* @param  string $propName
335
	* @return bool
336
	*/
337 93
	protected function anyBranchHasProperty($propName)
338
	{
339 93
		foreach ($this->branches as $branch)
340
		{
341 85
			foreach ($branch as $element)
342
			{
343 81
				if ($this->hasProperty($element->nodeName, $propName, $element))
344
				{
345 85
					return true;
346
				}
347
			}
348
		}
349
350 93
		return false;
351
	}
352
353
	/**
354
	* Compute the allowChildBitfields and denyDescendantBitfield properties
355
	*
356
	* @return void
357
	*/
358 93
	protected function computeBitfields()
359
	{
360 93
		if (empty($this->branches))
361
		{
362 86
			$this->allowChildBitfields = ["\0"];
363
364 86
			return;
365
		}
366 85
		foreach ($this->branches as $branch)
367
		{
368
			/**
369
			* @var string allowChild bitfield for current branch. Starts with the value associated
370
			*             with <div> in order to approximate a value if the whole branch uses the
371
			*             transparent content model
372
			*/
373 85
			$branchBitfield = self::$htmlElements['div']['ac'];
374
375 85
			foreach ($branch as $element)
376
			{
377 81
				$elName = $element->localName;
378 81
				if (!$this->hasProperty($elName, 't', $element))
379
				{
380
					// If the element isn't transparent, we reset its bitfield
381 74
					$branchBitfield = "\0";
382
				}
383
384
				// allowChild rules are cumulative if transparent, and reset above otherwise
385 81
				$branchBitfield |= $this->getBitfield($element, 'ac');
386
387
				// denyDescendant rules are cumulative
388 81
				$this->denyDescendantBitfield |= $this->getBitfield($element, 'dd');
389
			}
390
391
			// Add this branch's bitfield to the list
392 85
			$this->allowChildBitfields[] = $branchBitfield;
393
		}
394 85
	}
395
396
	/**
397
	* Compute the allowsChildElements property
398
	*
399
	* A template allows child Elements if it has at least one xsl:apply-templates and none of its
400
	* ancestors have the text-only ("to") property
401
	*
402
	* @return void
403
	*/
404 93
	protected function computeAllowsChildElements()
405
	{
406 93
		$this->allowsChildElements = ($this->anyBranchHasProperty('to')) ? false : !empty($this->branches);
407 93
	}
408
409
	/**
410
	* Compute the allowsText property
411
	*
412
	* A template is said to allow text if none of the leaf elements disallow text
413
	*
414
	* @return void
415
	*/
416 93
	protected function computeAllowsText()
417
	{
418 93
		foreach (array_filter($this->branches) as $branch)
419
		{
420 81
			$element = end($branch);
421 81
			if ($this->hasProperty($element->nodeName, 'nt', $element))
422
			{
423 16
				$this->allowsText = false;
424
425 81
				return;
426
			}
427
		}
428 93
		$this->allowsText = true;
429 93
	}
430
431
	/**
432
	* Compute the isFormattingElement property
433
	*
434
	* A template is said to be a formatting element if all (non-zero) of its branches are entirely
435
	* composed of formatting elements
436
	*
437
	* @return void
438
	*/
439 93
	protected function computeFormattingElement()
440
	{
441 93
		foreach ($this->branches as $branch)
442
		{
443 85
			foreach ($branch as $element)
444
			{
445 81
				if (!$this->hasProperty($element->nodeName, 'fe', $element) && !$this->isFormattingSpan($element))
446
				{
447 66
					$this->isFormattingElement = false;
448
449 85
					return;
450
				}
451
			}
452
		}
453 86
		$this->isFormattingElement = (bool) count(array_filter($this->branches));
454 86
	}
455
456
	/**
457
	* Compute the isEmpty property
458
	*
459
	* A template is said to be empty if it has no xsl:apply-templates elements or any there is a empty
460
	* element ancestor to an xsl:apply-templates element
461
	*
462
	* @return void
463
	*/
464 93
	protected function computeIsEmpty()
465
	{
466 93
		$this->isEmpty = ($this->anyBranchHasProperty('e')) || empty($this->branches);
467 93
	}
468
469
	/**
470
	* Compute the isTransparent property
471
	*
472
	* A template is said to be transparent if it has at least one branch and no non-transparent
473
	* elements in its path
474
	*
475
	* @return void
476
	*/
477 93
	protected function computeIsTransparent()
478
	{
479 93
		foreach ($this->branches as $branch)
480
		{
481 85
			foreach ($branch as $element)
482
			{
483 81
				if (!$this->hasProperty($element->nodeName, 't', $element))
484
				{
485 74
					$this->isTransparent = false;
486
487 85
					return;
488
				}
489
			}
490
		}
491 86
		$this->isTransparent = !empty($this->branches);
492 86
	}
493
494
	/**
495
	* Compute the isVoid property
496
	*
497
	* A template is said to be void if it has no xsl:apply-templates elements or any there is a void
498
	* element ancestor to an xsl:apply-templates element
499
	*
500
	* @return void
501
	*/
502 93
	protected function computeIsVoid()
503
	{
504 93
		$this->isVoid = ($this->anyBranchHasProperty('v')) || empty($this->branches);
505 93
	}
506
507
	/**
508
	* Compute the preservesNewLines property
509
	*
510
	* @return void
511
	*/
512 93
	protected function computePreservesNewLines()
513
	{
514 93
		foreach ($this->branches as $branch)
515
		{
516 85
			$style = '';
517 85
			foreach ($branch as $element)
518
			{
519 81
				$style .= $this->getStyle($element, true);
520
			}
521
522 85
			if (preg_match('(.*white-space\\s*:\\s*(no|pre))is', $style, $m) && strtolower($m[1]) === 'pre')
523
			{
524 6
				$this->preservesNewLines = true;
525
526 85
				return;
527
			}
528
		}
529 93
		$this->preservesNewLines = false;
530 93
	}
531
532
	/**
533
	* Test whether given element is a block-level element
534
	*
535
	* @param  DOMElement $element
536
	* @return bool
537
	*/
538 88
	protected function elementIsBlock(DOMElement $element)
539
	{
540 88
		$style = $this->getStyle($element);
541 88
		if (preg_match('(\\bdisplay\\s*:\\s*block)i', $style))
542
		{
543 1
			return true;
544
		}
545 87
		if (preg_match('(\\bdisplay\\s*:\\s*(?:inli|no)ne)i', $style))
546
		{
547 3
			return false;
548
		}
549
550 84
		return $this->hasProperty($element->nodeName, 'b', $element);
551
	}
552
553
	/**
554
	* Retrieve and return the inline style assigned to given element
555
	*
556
	* @param  DOMElement $node Context node
557
	* @param  bool       $deep Whether to retrieve the content of all xsl:attribute descendants
558
	* @return string
559
	*/
560 88
	protected function getStyle(DOMElement $node, $deep = false)
561
	{
562 88
		$style = '';
563 88
		if ($this->hasProperty($node->nodeName, 'pre', $node))
564
		{
565 4
			$style .= 'white-space:pre;';
566
		}
567 88
		$style .= $node->getAttribute('style');
568
569
		// Add the content of any descendant/child xsl:attribute named "style"
570 88
		$query = (($deep) ? './/' : './') . 'xsl:attribute[@name="style"]';
571 88
		foreach ($this->xpath->query($query, $node) as $attribute)
572
		{
573 2
			$style .= ';' . $attribute->textContent;
574
		}
575
576 88
		return $style;
577
	}
578
579
	/**
580
	* Test whether given node is a span element used for formatting
581
	*
582
	* Will return TRUE if the node is a span element with a class attribute and/or a style attribute
583
	* and no other attributes
584
	*
585
	* @param  DOMElement $node
586
	* @return boolean
587
	*/
588 72
	protected function isFormattingSpan(DOMElement $node)
589
	{
590 72
		if ($node->nodeName !== 'span')
591
		{
592 50
			return false;
593
		}
594
595 24
		if ($node->getAttribute('class') === '' && $node->getAttribute('style') === '')
596
		{
597 17
			return false;
598
		}
599
600 7
		foreach ($node->attributes as $attrName => $attribute)
601
		{
602 7
			if ($attrName !== 'class' && $attrName !== 'style')
603
			{
604 7
				return false;
605
			}
606
		}
607
608 6
		return true;
609
	}
610
611
	/**
612
	* Store the names of every leaf node
613
	*
614
	* A leaf node is defined as the closest non-XSL ancestor to an xsl:apply-templates element
615
	*
616
	* @return void
617
	*/
618 93
	protected function storeLeafNodes()
619
	{
620 93
		foreach (array_filter($this->branches) as $branch)
621
		{
622 81
			$this->leafNodes[] = end($branch)->nodeName;
623
		}
624 93
	}
625
626
	/**
627
	* "What is this?" you might ask. This is basically a compressed version of the HTML5 content
628
	* models and rules, with some liberties taken.
629
	*
630
	* For each element, up to three bitfields are defined: "c", "ac" and "dd". Bitfields are stored
631
	* as raw bytes, formatted using the octal notation to keep the sources ASCII.
632
	*
633
	*   "c" represents the categories the element belongs to. The categories are comprised of HTML5
634
	*   content models (such as "phrasing content" or "interactive content") plus a few special
635
	*   categories created to cover the parts of the specs that refer to "a group of X and Y
636
	*   elements" rather than a specific content model.
637
	*
638
	*   "ac" represents the categories that are allowed as children of given element.
639
	*
640
	*   "dd" represents the categories that must not appear as a descendant of given element.
641
	*
642
	* Sometimes, HTML5 specifies some restrictions on when an element can accept certain children,
643
	* or what categories the element belongs to. For example, an <img> element is only part of the
644
	* "interactive content" category if it has a "usemap" attribute. Those restrictions are
645
	* expressed as an XPath expression and stored using the concatenation of the key of the bitfield
646
	* plus the bit number of the category. For instance, if "interactive content" got assigned to
647
	* bit 2, the definition of the <img> element will contain a key "c2" with value "@usemap".
648
	*
649
	* Additionally, other flags are set:
650
	*
651
	*   "t" indicates that the element uses the "transparent" content model.
652
	*   "e" indicates that the element uses the "empty" content model.
653
	*   "v" indicates that the element is a void element.
654
	*   "nt" indicates that the element does not accept text nodes. (no text)
655
	*   "to" indicates that the element should only contain text. (text-only)
656
	*   "fe" indicates that the element is a formatting element. It will automatically be reopened
657
	*   when closed by an end tag of a different name.
658
	*   "b" indicates that the element is not phrasing content, which makes it likely to act like
659
	*   a block element.
660
	*
661
	* Finally, HTML5 defines "optional end tag" rules, where one element automatically closes its
662
	* predecessor. Those are used to generate closeParent rules and are stored in the "cp" key.
663
	*
664
	* @var array
665
	* @see /scripts/patchTemplateInspector.php
666
	*/
667
	protected static $htmlElements = [
668
		'a'=>['c'=>"\17\0\0\0\0\1",'c3'=>'@href','ac'=>"\0",'dd'=>"\10\0\0\0\0\1",'t'=>1,'fe'=>1],
669
		'abbr'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
670
		'address'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\45",'b'=>1,'cp'=>['p']],
671
		'article'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
672
		'aside'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
673
		'audio'=>['c'=>"\57",'c3'=>'@controls','c1'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
674
		'b'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
675
		'base'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
676
		'bdi'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
677
		'bdo'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
678
		'blockquote'=>['c'=>"\203",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
679
		'body'=>['c'=>"\200\0\4",'ac'=>"\1",'dd'=>"\0",'b'=>1],
680
		'br'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
681
		'button'=>['c'=>"\117",'ac'=>"\4",'dd'=>"\10"],
682
		'canvas'=>['c'=>"\47",'ac'=>"\0",'dd'=>"\0",'t'=>1],
683
		'caption'=>['c'=>"\0\2",'ac'=>"\1",'dd'=>"\0\0\0\200",'b'=>1],
684
		'cite'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
685
		'code'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
686
		'col'=>['c'=>"\0\0\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
687
		'colgroup'=>['c'=>"\0\2",'ac'=>"\0\0\20",'ac20'=>'not(@span)','dd'=>"\0",'nt'=>1,'e'=>1,'e0'=>'@span','b'=>1],
688
		'data'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
689
		'datalist'=>['c'=>"\5",'ac'=>"\4\200\0\10",'dd'=>"\0"],
690
		'dd'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['dd','dt']],
691
		'del'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'t'=>1],
692
		'details'=>['c'=>"\213",'ac'=>"\1\0\0\2",'dd'=>"\0",'b'=>1,'cp'=>['p']],
693
		'dfn'=>['c'=>"\7\0\0\0\40",'ac'=>"\4",'dd'=>"\0\0\0\0\40"],
694
		'div'=>['c'=>"\3",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
695
		'dl'=>['c'=>"\3",'c1'=>'dt and dd','ac'=>"\0\200\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
696
		'dt'=>['c'=>"\0\0\200",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['dd','dt']],
697
		'em'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
698
		'embed'=>['c'=>"\57",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
699
		'fieldset'=>['c'=>"\303",'ac'=>"\1\0\0\20",'dd'=>"\0",'b'=>1,'cp'=>['p']],
700
		'figcaption'=>['c'=>"\0\0\0\0\0\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
701
		'figure'=>['c'=>"\203",'ac'=>"\1\0\0\0\0\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
702
		'footer'=>['c'=>"\3\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
703
		'form'=>['c'=>"\3\0\0\0\20",'ac'=>"\1",'dd'=>"\0\0\0\0\20",'b'=>1,'cp'=>['p']],
704
		'h1'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
705
		'h2'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
706
		'h3'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
707
		'h4'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
708
		'h5'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
709
		'h6'=>['c'=>"\3\1",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
710
		'head'=>['c'=>"\0\0\4",'ac'=>"\20",'dd'=>"\0",'nt'=>1,'b'=>1],
711
		'header'=>['c'=>"\3\40\0\40",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
712
		'hr'=>['c'=>"\1\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1,'cp'=>['p']],
713
		'html'=>['c'=>"\0",'ac'=>"\0\0\4",'dd'=>"\0",'nt'=>1,'b'=>1],
714
		'i'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
715
		'iframe'=>['c'=>"\57",'ac'=>"\4",'dd'=>"\0"],
716
		'img'=>['c'=>"\57\20\10",'c3'=>'@usemap','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
717
		'input'=>['c'=>"\17\20",'c3'=>'@type!="hidden"','c12'=>'@type!="hidden" or @type="hidden"','c1'=>'@type!="hidden"','ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
718
		'ins'=>['c'=>"\7",'ac'=>"\0",'dd'=>"\0",'t'=>1],
719
		'kbd'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
720
		'keygen'=>['c'=>"\117",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1],
721
		'label'=>['c'=>"\17\20\0\0\4",'ac'=>"\4",'dd'=>"\0\0\1\0\4"],
722
		'legend'=>['c'=>"\0\0\0\20",'ac'=>"\4",'dd'=>"\0",'b'=>1],
723
		'li'=>['c'=>"\0\0\0\0\200",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['li']],
724
		'link'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
725
		'main'=>['c'=>"\3\0\0\0\10",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
726
		'mark'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
727
		'media element'=>['c'=>"\0\0\0\0\0\2",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'b'=>1],
728
		'menu'=>['c'=>"\1\100",'ac'=>"\0\300",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
729
		'menuitem'=>['c'=>"\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
730
		'meta'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
731
		'meter'=>['c'=>"\7\0\1\0\2",'ac'=>"\4",'dd'=>"\0\0\0\0\2"],
732
		'nav'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0\0\0\0\10",'b'=>1,'cp'=>['p']],
733
		'noscript'=>['c'=>"\25",'ac'=>"\0",'dd'=>"\0",'nt'=>1],
734
		'object'=>['c'=>"\147",'ac'=>"\0\0\0\0\1",'dd'=>"\0",'t'=>1],
735
		'ol'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
736
		'optgroup'=>['c'=>"\0\0\2",'ac'=>"\0\200\0\10",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['optgroup','option']],
737
		'option'=>['c'=>"\0\0\2\10",'ac'=>"\0",'dd'=>"\0",'b'=>1,'cp'=>['option']],
738
		'output'=>['c'=>"\107",'ac'=>"\4",'dd'=>"\0"],
739
		'p'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['p']],
740
		'param'=>['c'=>"\0\0\0\0\1",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
741
		'picture'=>['c'=>"\45",'ac'=>"\0\200\10",'dd'=>"\0",'nt'=>1],
742
		'pre'=>['c'=>"\3",'ac'=>"\4",'dd'=>"\0",'pre'=>1,'b'=>1,'cp'=>['p']],
743
		'progress'=>['c'=>"\7\0\1\1",'ac'=>"\4",'dd'=>"\0\0\0\1"],
744
		'q'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
745
		'rb'=>['c'=>"\0\10",'ac'=>"\4",'dd'=>"\0",'b'=>1],
746
		'rp'=>['c'=>"\0\10\100",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']],
747
		'rt'=>['c'=>"\0\10\100",'ac'=>"\4",'dd'=>"\0",'b'=>1,'cp'=>['rp','rt']],
748
		'rtc'=>['c'=>"\0\10",'ac'=>"\4\0\100",'dd'=>"\0",'b'=>1],
749
		'ruby'=>['c'=>"\7",'ac'=>"\4\10",'dd'=>"\0"],
750
		's'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
751
		'samp'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
752
		'script'=>['c'=>"\25\200",'ac'=>"\0",'dd'=>"\0",'to'=>1],
753
		'section'=>['c'=>"\3\4",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['p']],
754
		'select'=>['c'=>"\117",'ac'=>"\0\200\2",'dd'=>"\0",'nt'=>1],
755
		'small'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
756
		'source'=>['c'=>"\0\0\10\4",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
757
		'span'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
758
		'strong'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
759
		'style'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1],
760
		'sub'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
761
		'summary'=>['c'=>"\0\0\0\2",'ac'=>"\4\1",'dd'=>"\0",'b'=>1],
762
		'sup'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
763
		'table'=>['c'=>"\3\0\0\200",'ac'=>"\0\202",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
764
		'tbody'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','tfoot','th','thead','tr']],
765
		'td'=>['c'=>"\200\0\40",'ac'=>"\1",'dd'=>"\0",'b'=>1,'cp'=>['td','th']],
766
		'template'=>['c'=>"\25\200\20",'ac'=>"\0",'dd'=>"\0",'nt'=>1],
767
		'textarea'=>['c'=>"\117",'ac'=>"\0",'dd'=>"\0",'pre'=>1,'to'=>1],
768
		'tfoot'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['tbody','td','th','thead','tr']],
769
		'th'=>['c'=>"\0\0\40",'ac'=>"\1",'dd'=>"\0\5\0\40",'b'=>1,'cp'=>['td','th']],
770
		'thead'=>['c'=>"\0\2",'ac'=>"\0\200\0\0\100",'dd'=>"\0",'nt'=>1,'b'=>1],
771
		'time'=>['c'=>"\7",'ac'=>"\4",'ac2'=>'@datetime','dd'=>"\0"],
772
		'title'=>['c'=>"\20",'ac'=>"\0",'dd'=>"\0",'to'=>1,'b'=>1],
773
		'tr'=>['c'=>"\0\2\0\0\100",'ac'=>"\0\200\40",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['td','th','tr']],
774
		'track'=>['c'=>"\0\0\0\100",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1,'b'=>1],
775
		'u'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0",'fe'=>1],
776
		'ul'=>['c'=>"\3",'c1'=>'li','ac'=>"\0\200\0\0\200",'dd'=>"\0",'nt'=>1,'b'=>1,'cp'=>['p']],
777
		'var'=>['c'=>"\7",'ac'=>"\4",'dd'=>"\0"],
778
		'video'=>['c'=>"\57",'c3'=>'@controls','ac'=>"\0\0\0\104",'ac26'=>'not(@src)','dd'=>"\0\0\0\0\0\2",'dd41'=>'@src','t'=>1],
779
		'wbr'=>['c'=>"\5",'ac'=>"\0",'dd'=>"\0",'nt'=>1,'e'=>1,'v'=>1]
780
	];
781
782
	/**
783
	* Get the bitfield value for a given element in a given context
784
	*
785
	* @param  DOMElement $element Context node
786
	* @param  string     $k       Bitfield name: either 'c', 'ac' or 'dd'
787
	* @return string
788
	*/
789 88
	protected function getBitfield(DOMElement $element, $k)
790
	{
791 88
		$elName = $element->nodeName;
792 88
		if (!isset(self::$htmlElements[$elName]))
793
		{
794 2
			$elName = 'span';
795
		}
796
797 88
		$bitfield = self::$htmlElements[$elName][$k];
798 88
		foreach (str_split($bitfield, 1) as $byteNumber => $char)
799
		{
800 88
			$byteValue = ord($char);
801 88
			for ($bitNumber = 0; $bitNumber < 8; ++$bitNumber)
802
			{
803 88
				$bitValue = 1 << $bitNumber;
804 88
				if (!($byteValue & $bitValue))
805
				{
806
					// The bit is not set
807 88
					continue;
808
				}
809
810 88
				$n = $byteNumber * 8 + $bitNumber;
811
812
				// Test for an XPath condition for that category
813 88
				if (isset(self::$htmlElements[$elName][$k . $n]))
814
				{
815 18
					$xpath = 'boolean(' . self::$htmlElements[$elName][$k . $n] . ')';
816
817
					// If the XPath condition is not fulfilled...
818 18
					if (!$this->evaluate($xpath, $element))
819
					{
820
						// ...turn off the corresponding bit
821 16
						$byteValue ^= $bitValue;
822
823
						// Update the original bitfield
824 16
						$bitfield[$byteNumber] = chr($byteValue);
825
					}
826
				}
827
			}
828
		}
829
830 88
		return $bitfield;
831
	}
832
833
	/**
834
	* Test whether given element has given property in context
835
	*
836
	* @param  string     $elName   Element name
837
	* @param  string     $propName Property name, see self::$htmlElements
838
	* @param  DOMElement $node     Context node
839
	* @return bool
840
	*/
841 88
	protected function hasProperty($elName, $propName, DOMElement $node)
842
	{
843 88
		if (!empty(self::$htmlElements[$elName][$propName]))
844
		{
845
			// Test the XPath condition
846 60
			if (!isset(self::$htmlElements[$elName][$propName . '0'])
847 60
			 || $this->evaluate('boolean(' . self::$htmlElements[$elName][$propName . '0'] . ')', $node))
848
			{
849 60
				return true;
850
			}
851
		}
852
853 88
		return false;
854
	}
855
856
	/**
857
	* Test whether two bitfields have any bits in common
858
	*
859
	* @param  string $bitfield1
860
	* @param  string $bitfield2
861
	* @return bool
862
	*/
863 29
	protected static function match($bitfield1, $bitfield2)
864
	{
865 29
		return (trim($bitfield1 & $bitfield2, "\0") !== '');
866
	}
867
}