Completed
Push — master ( 30aab1...cadcfa )
by Asmir
07:00
created

DOMTreeBuilder::__construct()   B

Complexity

Conditions 5
Paths 12

Size

Total Lines 40

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 24
CRAP Score 5.0016

Importance

Changes 0
Metric Value
dl 0
loc 40
ccs 24
cts 25
cp 0.96
rs 8.9688
c 0
b 0
f 0
cc 5
nc 12
nop 2
crap 5.0016
1
<?php
2
namespace Masterminds\HTML5\Parser;
3
4
use Masterminds\HTML5\Elements;
5
6
/**
7
 * Create an HTML5 DOM tree from events.
8
 *
9
 * This attempts to create a DOM from events emitted by a parser. This
10
 * attempts (but does not guarantee) to up-convert older HTML documents
11
 * to HTML5. It does this by applying HTML5's rules, but it will not
12
 * change the architecture of the document itself.
13
 *
14
 * Many of the error correction and quirks features suggested in the specification
15
 * are implemented herein; however, not all of them are. Since we do not
16
 * assume a graphical user agent, no presentation-specific logic is conducted
17
 * during tree building.
18
 *
19
 * FIXME: The present tree builder does not exactly follow the state machine rules
20
 * for insert modes as outlined in the HTML5 spec. The processor needs to be
21
 * re-written to accomodate this. See, for example, the Go language HTML5
22
 * parser.
23
 */
24
class DOMTreeBuilder implements EventHandler
25
{
26
    /**
27
     * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0
28
     */
29
    const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
30
31
    const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
32
33
    const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
34
35
    const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
36
37
    const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
38
39
    const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
40
41
    const OPT_DISABLE_HTML_NS = 'disable_html_ns';
42
43
    const OPT_TARGET_DOC = 'target_document';
44
45
    const OPT_IMPLICIT_NS = 'implicit_namespaces';
46
47
    /**
48
     * Holds the HTML5 element names that causes a namespace switch
49
     *
50
     * @var array
51
     */
52
    protected $nsRoots = array(
53
        'html' => self::NAMESPACE_HTML,
54
        'svg' => self::NAMESPACE_SVG,
55
        'math' => self::NAMESPACE_MATHML
56
    );
57
58
    /**
59
     * Holds the always available namespaces (which does not require the XMLNS declaration).
60
     *
61
     * @var array
62
     */
63
    protected $implicitNamespaces = array(
64
        'xml' => self::NAMESPACE_XML,
65
        'xmlns' => self::NAMESPACE_XMLNS,
66
        'xlink' => self::NAMESPACE_XLINK
67
    );
68
69
    /**
70
     * Holds a stack of currently active namespaces.
71
     *
72
     * @var array
73
     */
74
    protected $nsStack = array();
75
76
    /**
77
     * Holds the number of namespaces declared by a node.
78
     *
79
     * @var array
80
     */
81
    protected $pushes = array();
82
83
    /**
84
     * Defined in 8.2.5.
85
     */
86
    const IM_INITIAL = 0;
87
88
    const IM_BEFORE_HTML = 1;
89
90
    const IM_BEFORE_HEAD = 2;
91
92
    const IM_IN_HEAD = 3;
93
94
    const IM_IN_HEAD_NOSCRIPT = 4;
95
96
    const IM_AFTER_HEAD = 5;
97
98
    const IM_IN_BODY = 6;
99
100
    const IM_TEXT = 7;
101
102
    const IM_IN_TABLE = 8;
103
104
    const IM_IN_TABLE_TEXT = 9;
105
106
    const IM_IN_CAPTION = 10;
107
108
    const IM_IN_COLUMN_GROUP = 11;
109
110
    const IM_IN_TABLE_BODY = 12;
111
112
    const IM_IN_ROW = 13;
113
114
    const IM_IN_CELL = 14;
115
116
    const IM_IN_SELECT = 15;
117
118
    const IM_IN_SELECT_IN_TABLE = 16;
119
120
    const IM_AFTER_BODY = 17;
121
122
    const IM_IN_FRAMESET = 18;
123
124
    const IM_AFTER_FRAMESET = 19;
125
126
    const IM_AFTER_AFTER_BODY = 20;
127
128
    const IM_AFTER_AFTER_FRAMESET = 21;
129
130
    const IM_IN_SVG = 22;
131
132
    const IM_IN_MATHML = 23;
133
134
    protected $options = array();
135
136
    protected $stack = array();
137
138
    protected $current; // Pointer in the tag hierarchy.
139
    protected $rules;
140
    protected $doc;
141
142
    protected $frag;
143
144
    protected $processor;
145
146
    protected $insertMode = 0;
147
148
    /**
149
     * Track if we are in an element that allows only inline child nodes
150
     * @var string|null
151
     */
152
    protected $onlyInline;
153
154
    /**
155
     * Quirks mode is enabled by default.
156
     * Any document that is missing the
157
     * DT will be considered to be in quirks mode.
158
     */
159
    protected $quirks = true;
160
161
    protected $errors = array();
162
163 110
    public function __construct($isFragment = false, array $options = array())
164
    {
165 110
        $this->options = $options;
166
167 110
        if (isset($options[self::OPT_TARGET_DOC])) {
168 1
            $this->doc = $options[self::OPT_TARGET_DOC];
169 1
        } else {
170 109
            $impl = new \DOMImplementation();
171
            // XXX:
172
            // Create the doctype. For now, we are always creating HTML5
173
            // documents, and attempting to up-convert any older DTDs to HTML5.
174 109
            $dt = $impl->createDocumentType('html');
175
            // $this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
176 109
            $this->doc = $impl->createDocument(null, null, $dt);
177
        }
178 110
        $this->errors = array();
179
180 110
        $this->current = $this->doc; // ->documentElement;
181
182
        // Create a rules engine for tags.
183 110
        $this->rules = new TreeBuildingRules($this->doc);
184
185 110
        $implicitNS = array();
186 110
        if (isset($this->options[self::OPT_IMPLICIT_NS])) {
187
            $implicitNS = $this->options[self::OPT_IMPLICIT_NS];
188 110
        } elseif (isset($this->options["implicitNamespaces"])) {
189 2
            $implicitNS = $this->options["implicitNamespaces"];
190 2
        }
191
192
        // Fill $nsStack with the defalut HTML5 namespaces, plus the "implicitNamespaces" array taken form $options
193 110
        array_unshift($this->nsStack, $implicitNS + array(
194
            '' => self::NAMESPACE_HTML
195 110
        ) + $this->implicitNamespaces);
196
197 110
        if ($isFragment) {
198 18
            $this->insertMode = static::IM_IN_BODY;
199 18
            $this->frag = $this->doc->createDocumentFragment();
200 18
            $this->current = $this->frag;
201 18
        }
202 110
    }
203
204
    /**
205
     * Get the document.
206
     */
207 100
    public function document()
208
    {
209 100
        return $this->doc;
210
    }
211
212
    /**
213
     * Get the DOM fragment for the body.
214
     *
215
     * This returns a DOMNodeList because a fragment may have zero or more
216
     * DOMNodes at its root.
217
     *
218
     * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context
219
     *
220
     * @return \DOMDocumentFragment
221
     */
222 18
    public function fragment()
223
    {
224 18
        return $this->frag;
225
    }
226
227
    /**
228
     * Provide an instruction processor.
229
     *
230
     * This is used for handling Processor Instructions as they are
231
     * inserted. If omitted, PI's are inserted directly into the DOM tree.
232
     */
233 1
    public function setInstructionProcessor(\Masterminds\HTML5\InstructionProcessor $proc)
234
    {
235 1
        $this->processor = $proc;
236 1
    }
237
238 94
    public function doctype($name, $idType = 0, $id = null, $quirks = false)
239
    {
240
        // This is used solely for setting quirks mode. Currently we don't
241
        // try to preserve the inbound DT. We convert it to HTML5.
242 94
        $this->quirks = $quirks;
243
244 94
        if ($this->insertMode > static::IM_INITIAL) {
245
            $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
246
247
            return;
248
        }
249
250 94
        $this->insertMode = static::IM_BEFORE_HTML;
251 94
    }
252
253
    /**
254
     * Process the start tag.
255
     *
256
     * @todo - XMLNS namespace handling (we need to parse, even if it's not valid)
257
     *       - XLink, MathML and SVG namespace handling
258
     *       - Omission rules: 8.1.2.4 Optional tags
259
     */
260 106
    public function startTag($name, $attributes = array(), $selfClosing = false)
261
    {
262
        // fprintf(STDOUT, $name);
263 106
        $lname = $this->normalizeTagName($name);
264
265
        // Make sure we have an html element.
266 106
        if (! $this->doc->documentElement && $name !== 'html' && ! $this->frag) {
267 3
            $this->startTag('html');
268 3
        }
269
270
        // Set quirks mode if we're at IM_INITIAL with no doctype.
271 106
        if ($this->insertMode == static::IM_INITIAL) {
272 5
            $this->quirks = true;
273 5
            $this->parseError("No DOCTYPE specified.");
274 5
        }
275
276
        // SPECIAL TAG HANDLING:
277
        // Spec says do this, and "don't ask."
278
        // find the spec where this is defined... looks problematic
279 106
        if ($name == 'image' && !($this->insertMode === static::IM_IN_SVG || $this->insertMode === static::IM_IN_MATHML)) {
280
            $name = 'img';
281
        }
282
283
        // Autoclose p tags where appropriate.
284 106
        if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) {
285 54
            $this->autoclose('p');
286 54
        }
287
288
        // Set insert mode:
289
        switch ($name) {
290 106
            case 'html':
291 99
                $this->insertMode = static::IM_BEFORE_HEAD;
292 99
                break;
293 100
            case 'head':
294 41
                if ($this->insertMode > static::IM_BEFORE_HEAD) {
295
                    $this->parseError("Unexpected head tag outside of head context.");
296
                } else {
297 41
                    $this->insertMode = static::IM_IN_HEAD;
298
                }
299 41
                break;
300 99
            case 'body':
301 84
                $this->insertMode = static::IM_IN_BODY;
302 84
                break;
303 93
            case 'svg':
304 8
                $this->insertMode = static::IM_IN_SVG;
305 8
                break;
306 93
            case 'math':
307 7
                $this->insertMode = static::IM_IN_MATHML;
308 7
                break;
309 90
            case 'noscript':
310 1
                if ($this->insertMode == static::IM_IN_HEAD) {
311 1
                    $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
312 1
                }
313 1
                break;
314
        }
315
316
        // Special case handling for SVG.
317 106
        if ($this->insertMode == static::IM_IN_SVG) {
318 8
            $lname = Elements::normalizeSvgElement($lname);
319 8
        }
320
321 106
        $pushes = 0;
322
        // when we found a tag thats appears inside $nsRoots, we have to switch the defalut namespace
323 106
        if (isset($this->nsRoots[$lname]) && $this->nsStack[0][''] !== $this->nsRoots[$lname]) {
324 15
            array_unshift($this->nsStack, array(
325 15
                '' => $this->nsRoots[$lname]
326 15
            ) + $this->nsStack[0]);
327 15
            $pushes ++;
328 15
        }
329 106
        $needsWorkaround = false;
330 106
        if (isset($this->options["xmlNamespaces"]) && $this->options["xmlNamespaces"]) {
331
            // when xmlNamespaces is true a and we found a 'xmlns' or 'xmlns:*' attribute, we should add a new item to the $nsStack
332 6
            foreach ($attributes as $aName => $aVal) {
333 5
                if ($aName === 'xmlns') {
334 3
                    $needsWorkaround = $aVal;
335 3
                    array_unshift($this->nsStack, array(
336
                        '' => $aVal
337 3
                    ) + $this->nsStack[0]);
338 3
                    $pushes ++;
339 5
                } elseif ((($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : '') === 'xmlns') {
340 3
                    array_unshift($this->nsStack, array(
341 3
                        substr($aName, $pos + 1) => $aVal
342 3
                    ) + $this->nsStack[0]);
343 3
                    $pushes ++;
344 3
                }
345 6
            }
346 6
        }
347
348 106
        if ($this->onlyInline && Elements::isA($lname, Elements::BLOCK_TAG)) {
349 2
        	$this->autoclose($this->onlyInline);
350 2
        	$this->onlyInline = null;
351 2
        }
352
353
        try {
354 106
            $prefix = ($pos = strpos($lname, ':')) ? substr($lname, 0, $pos) : '';
355
356
357 106
            if ($needsWorkaround!==false) {
358
359 3
                $xml = "<$lname xmlns=\"$needsWorkaround\" ".(strlen($prefix) && isset($this->nsStack[0][$prefix])?("xmlns:$prefix=\"".$this->nsStack[0][$prefix]."\""):"")."/>";
360
361 3
                $frag = new \DOMDocument('1.0', 'UTF-8');
362 3
                $frag->loadXML($xml);
363
364 3
                $ele = $this->doc->importNode($frag->documentElement, true);
365
366 3
            } else {
367 106
                if (!isset($this->nsStack[0][$prefix]) || ($prefix === "" && isset($this->options[self::OPT_DISABLE_HTML_NS]) && $this->options[self::OPT_DISABLE_HTML_NS])) {
368 2
                    $ele = $this->doc->createElement($lname);
369 2
                } else {
370 105
                    $ele = $this->doc->createElementNS($this->nsStack[0][$prefix], $lname);
371
                }
372
            }
373
374 106
        } catch (\DOMException $e) {
375
            $this->parseError("Illegal tag name: <$lname>. Replaced with <invalid>.");
376
            $ele = $this->doc->createElement('invalid');
377
        }
378
379 106
        if (Elements::isA($lname, Elements::BLOCK_ONLY_INLINE)) {
380 27
        	$this->onlyInline = $lname;
381 27
        }
382
383
        // When we add some namespacess, we have to track them. Later, when "endElement" is invoked, we have to remove them.
384
        // When we are on a void tag, we do not need to care about namesapce nesting.
385 106
        if ($pushes > 0 && !Elements::isA($name, Elements::VOID_TAG)) {
386
            // PHP tends to free the memory used by DOM,
387
            // to avoid spl_object_hash collisions whe have to avoid garbage collection of $ele storing it into $pushes
388
            // see https://bugs.php.net/bug.php?id=67459
389 17
            $this->pushes[spl_object_hash($ele)] = array($pushes, $ele);
390
391
            // SEE https://github.com/facebook/hhvm/issues/2962
392 17
            if (defined('HHVM_VERSION')) {
393
                $ele->setAttribute('html5-php-fake-id-attribute', spl_object_hash($ele));
394
            }
395 17
        }
396
397 106
        foreach ($attributes as $aName => $aVal) {
398
            // xmlns attributes can't be set
399 78
            if ($aName === 'xmlns') {
400 5
                continue;
401
            }
402
403 77
            if ($this->insertMode == static::IM_IN_SVG) {
404 8
                $aName = Elements::normalizeSvgAttribute($aName);
405 77
            } elseif ($this->insertMode == static::IM_IN_MATHML) {
406 4
                $aName = Elements::normalizeMathMlAttribute($aName);
407 4
            }
408
409
            try {
410 77
                $prefix = ($pos = strpos($aName, ':')) ? substr($aName, 0, $pos) : false;
411
412 77
                if ($prefix==='xmlns') {
413 4
                    $ele->setAttributeNs(self::NAMESPACE_XMLNS, $aName, $aVal);
414 77
                } elseif ($prefix!==false && isset($this->nsStack[0][$prefix])) {
415 6
                    $ele->setAttributeNs($this->nsStack[0][$prefix], $aName, $aVal);
416 6
                } else {
417 74
                    $ele->setAttribute($aName, $aVal);
418
                }
419 77
            } catch (\DOMException $e) {
420
                $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName");
421
                continue;
422
            }
423
424
            // This is necessary on a non-DTD schema, like HTML5.
425 77
            if ($aName == 'id') {
426 24
                $ele->setIdAttribute('id', true);
427 24
            }
428 106
        }
429
430
        // Some elements have special processing rules. Handle those separately.
431 106
        if ($this->rules->hasRules($name) && $this->frag !== $this->current) {
432 6
            $this->current = $this->rules->evaluate($ele, $this->current);
433 6
        }         // Otherwise, it's a standard element.
434
        else {
435 106
            $this->current->appendChild($ele);
436
437 106
            if (! Elements::isA($name, Elements::VOID_TAG)) {
438 106
                $this->current = $ele;
439 106
            }
440
441
            // Self-closing tags should only be respected on foreign elements
442
            // (and are implied on void elements)
443
            // See: https://www.w3.org/TR/html5/syntax.html#start-tags
444 106
            if (Elements::isHtml5Element($name)) {
445 105
                $selfClosing = false;
446 105
            }
447
        }
448
449
        // This is sort of a last-ditch attempt to correct for cases where no head/body
450
        // elements are provided.
451 106
        if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
452 5
            $this->insertMode = static::IM_IN_BODY;
453 5
        }
454
455
        // When we are on a void tag, we do not need to care about namesapce nesting,
456
        // but we have to remove the namespaces pushed to $nsStack.
457 106
        if ($pushes > 0 && Elements::isA($name, Elements::VOID_TAG)) {
458
            // remove the namespaced definded by current node
459
            for ($i = 0; $i < $pushes; $i ++) {
460
                array_shift($this->nsStack);
461
            }
462
        }
463
464 106
        if ($selfClosing) {
465 7
            $this->endTag($name);
466 7
        }
467
468
        // Return the element mask, which the tokenizer can then use to set
469
        // various processing rules.
470 106
        return Elements::element($name);
0 ignored issues
show
Bug Compatibility introduced by
The expression \Masterminds\HTML5\Elements::element($name); of type integer|boolean adds the type boolean to the return on line 470 which is incompatible with the return type declared by the interface Masterminds\HTML5\Parser\EventHandler::startTag of type integer.
Loading history...
471
    }
472
473 104
    public function endTag($name)
474
    {
475 104
        $lname = $this->normalizeTagName($name);
476
477
        // Ignore closing tags for unary elements.
478 104
        if (Elements::isA($name, Elements::VOID_TAG)) {
479
            return;
480
        }
481
482 104
        if ($this->insertMode <= static::IM_BEFORE_HTML) {
483
            // 8.2.5.4.2
484
            if (in_array($name, array(
485
                'html',
486
                'br',
487
                'head',
488
                'title'
489
            ))) {
490
                $this->startTag('html');
491
                $this->endTag($name);
492
                $this->insertMode = static::IM_BEFORE_HEAD;
493
494
                return;
495
            }
496
497
            // Ignore the tag.
498
            $this->parseError("Illegal closing tag at global scope.");
499
500
            return;
501
        }
502
503
        // Special case handling for SVG.
504 104
        if ($this->insertMode == static::IM_IN_SVG) {
505 8
            $lname = Elements::normalizeSvgElement($lname);
506 8
        }
507
508
        // See https://github.com/facebook/hhvm/issues/2962
509 104
        if (defined('HHVM_VERSION') && ($cid = $this->current->getAttribute('html5-php-fake-id-attribute'))) {
510
            $this->current->removeAttribute('html5-php-fake-id-attribute');
511
        } else {
512 104
            $cid = spl_object_hash($this->current);
513
        }
514
515
        // XXX: Not sure whether we need this anymore.
516
        // if ($name != $lname) {
517
        // return $this->quirksTreeResolver($lname);
518
        // }
519
520
        // XXX: HTML has no parent. What do we do, though,
521
        // if this element appears in the wrong place?
522 104
        if ($lname == 'html') {
523 95
            return;
524
        }
525
526
        // remove the namespaced definded by current node
527 98
        if (isset($this->pushes[$cid])) {
528 15
            for ($i = 0; $i < $this->pushes[$cid][0]; $i ++) {
529 15
                array_shift($this->nsStack);
530 15
            }
531 15
            unset($this->pushes[$cid]);
532 15
        }
533
534 98
        if (! $this->autoclose($lname)) {
535 2
            $this->parseError('Could not find closing tag for ' . $lname);
536 2
        }
537
538
        // switch ($this->insertMode) {
539
        switch ($lname) {
540 98
            case "head":
541 41
                $this->insertMode = static::IM_AFTER_HEAD;
542 41
                break;
543 97
            case "body":
544 85
                $this->insertMode = static::IM_AFTER_BODY;
545 85
                break;
546 82
            case "svg":
547 82
            case "mathml":
548 8
                $this->insertMode = static::IM_IN_BODY;
549 8
                break;
550
        }
551 98
    }
552
553 5
    public function comment($cdata)
554
    {
555
        // TODO: Need to handle case where comment appears outside of the HTML tag.
556 5
        $node = $this->doc->createComment($cdata);
557 5
        $this->current->appendChild($node);
558 5
    }
559
560 87
    public function text($data)
561
    {
562
        // XXX: Hmmm.... should we really be this strict?
563 87
        if ($this->insertMode < static::IM_IN_HEAD) {
564
            // Per '8.2.5.4.3 The "before head" insertion mode' the characters
565
            // " \t\n\r\f" should be ignored but no mention of a parse error. This is
566
            // practical as most documents contain these characters. Other text is not
567
            // expected here so recording a parse error is necessary.
568 56
            $dataTmp = trim($data, " \t\n\r\f");
569 56
            if (! empty($dataTmp)) {
570
                // fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
571 1
                $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
572 1
            }
573
574 56
            return;
575
        }
576
        // fprintf(STDOUT, "Appending text %s.", $data);
577 86
        $node = $this->doc->createTextNode($data);
578 86
        $this->current->appendChild($node);
579 86
    }
580
581 110
    public function eof()
582
    {
583
        // If the $current isn't the $root, do we need to do anything?
584 110
    }
585
586 11
    public function parseError($msg, $line = 0, $col = 0)
587
    {
588 11
        $this->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
589 11
    }
590
591 104
    public function getErrors()
592
    {
593 104
        return $this->errors;
594
    }
595
596 3
    public function cdata($data)
597
    {
598 3
        $node = $this->doc->createCDATASection($data);
599 3
        $this->current->appendChild($node);
600 3
    }
601
602 5
    public function processingInstruction($name, $data = null)
603
    {
604
        // XXX: Ignore initial XML declaration, per the spec.
605 5
        if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
606 1
            return;
607
        }
608
609
        // Important: The processor may modify the current DOM tree however
610
        // it sees fit.
611 5
        if (isset($this->processor)) {
612 1
            $res = $this->processor->process($this->current, $name, $data);
0 ignored issues
show
Bug introduced by
It seems like $this->current can also be of type object<DOMNode>; however, Masterminds\HTML5\InstructionProcessor::process() does only seem to accept object<DOMElement>, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
613 1
            if (! empty($res)) {
614 1
                $this->current = $res;
615 1
            }
616
617 1
            return;
618
        }
619
620
        // Otherwise, this is just a dumb PI element.
621 4
        $node = $this->doc->createProcessingInstruction($name, $data);
622
623 4
        $this->current->appendChild($node);
624 4
    }
625
626
    // ==========================================================================
627
    // UTILITIES
628
    // ==========================================================================
629
630
    /**
631
     * Apply normalization rules to a tag name.
632
     *
633
     * See sections 2.9 and 8.1.2.
634
     *
635
     * @param string $name
636
     *            The tag name.
637
     * @return string The normalized tag name.
638
     */
639 106
    protected function normalizeTagName($name)
640
    {
641
        /*
642
         * Section 2.9 suggests that we should not do this. if (strpos($name, ':') !== false) { // We know from the grammar that there must be at least one other // char besides :, since : is not a legal tag start. $parts = explode(':', $name); return array_pop($parts); }
643
         */
644 106
        return $name;
645
    }
646
647
    protected function quirksTreeResolver($name)
648
    {
649
        throw new \Exception("Not implemented.");
650
    }
651
652
    /**
653
     * Automatically climb the tree and close the closest node with the matching $tag.
654
     *
655
     * @param string $tagName
656
     *
657
     * @return bool
658
     */
659 98
    protected function autoclose($tagName)
660
    {
661 98
        $working = $this->current;
662
        do {
663 98
            if ($working->nodeType != XML_ELEMENT_NODE) {
664 54
                return false;
665
            }
666 98
            if ($working->tagName == $tagName) {
667 98
                $this->current = $working->parentNode;
668
669 98
                return true;
670
            }
671 51
        } while ($working = $working->parentNode);
672
        return false;
673
    }
674
675
    /**
676
     * Checks if the given tagname is an ancestor of the present candidate.
677
     *
678
     * If $this->current or anything above $this->current matches the given tag
679
     * name, this returns true.
680
     *
681
     * @param string $tagName
682
     *
683
     * @return bool
684
     */
685
    protected function isAncestor($tagName)
686
    {
687
        $candidate = $this->current;
688
        while ($candidate->nodeType === XML_ELEMENT_NODE) {
689
            if ($candidate->tagName == $tagName) {
690
                return true;
691
            }
692
            $candidate = $candidate->parentNode;
693
        }
694
695
        return false;
696
    }
697
698
    /**
699
     * Returns true if the immediate parent element is of the given tagname.
700
     *
701
     * @param string $tagName
702
     *
703
     * @return bool
704
     */
705
    protected function isParent($tagName)
706
    {
707
        return $this->current->tagName == $tagName;
708
    }
709
}
710