Completed
Push — master ( ed92f4...60669a )
by Daniel
03:02
created

thirdparty/html5lib/HTML5/TreeBuilder.php (193 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
/*
4
5
Copyright 2007 Jeroen van der Meer <http://jero.net/>
6
Copyright 2009 Edward Z. Yang <[email protected]>
7
8
Permission is hereby granted, free of charge, to any person obtaining a
9
copy of this software and associated documentation files (the
10
"Software"), to deal in the Software without restriction, including
11
without limitation the rights to use, copy, modify, merge, publish,
12
distribute, sublicense, and/or sell copies of the Software, and to
13
permit persons to whom the Software is furnished to do so, subject to
14
the following conditions:
15
16
The above copyright notice and this permission notice shall be included
17
in all copies or substantial portions of the Software.
18
19
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27
*/
28
29
// Tags for FIX ME!!!: (in order of priority)
30
//      XXX - should be fixed NAO!
31
//      XERROR - with regards to parse errors
32
//      XSCRIPT - with regards to scripting mode
33
//      XENCODING - with regards to encoding (for reparsing tests)
34
//      XDOM - DOM specific code (tagName is explicitly not marked).
35
//          this is not (yet) in helper functions.
36
37
class HTML5_TreeBuilder {
0 ignored issues
show
Coding Style Compatibility introduced by
PSR1 recommends that each class must be in a namespace of at least one level to avoid collisions.

You can fix this by adding a namespace to your class:

namespace YourVendor;

class YourClass { }

When choosing a vendor namespace, try to pick something that is not too generic to avoid conflicts with other libraries.

Loading history...
As per PSR2, the opening brace for this class should be on a new line.
Loading history...
38
    public $stack = array();
39
    public $context;
40
    public $content_model;
41
42
    private $mode;
43
    private $original_mode;
44
    private $secondary_mode;
45
    private $dom;
46
    // Whether or not normal insertion of nodes should actually foster
47
    // parent (used in one case in spec)
48
    private $foster_parent = false;
49
    private $a_formatting  = array();
50
51
    private $head_pointer = null;
52
    private $form_pointer = null;
53
54
    private $flag_frameset_ok = true;
55
    private $flag_force_quirks = false;
0 ignored issues
show
The property $flag_force_quirks is not used and could be removed.

This check marks private properties in classes that are never used. Those properties can be removed.

Loading history...
56
    private $ignored = false;
57
    private $quirks_mode = null;
58
    // this gets to 2 when we want to ignore the next lf character, and
59
    // is decrement at the beginning of each processed token (this way,
60
    // code can check for (bool)$ignore_lf_token, but it phases out
61
    // appropriately)
62
    private $ignore_lf_token = 0;
63
    private $fragment = false;
64
    private $root;
65
66
    private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
67
    private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
68
    // dl and ds are speculative
69
    private $special = array('address','area','article','aside','base','basefont','bgsound',
70
    'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
71
    'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
72
    'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
73
    'listing','menu','meta','nav','noembed','noframes','noscript','ol',
74
    'p','param','plaintext','pre','script','select','spacer','style',
75
    'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
76
77
    private $pendingTableCharacters;
78
    private $pendingTableCharactersDirty;
79
80
    // Tree construction modes
81
    const INITIAL           = 0;
82
    const BEFORE_HTML       = 1;
83
    const BEFORE_HEAD       = 2;
84
    const IN_HEAD           = 3;
85
    const IN_HEAD_NOSCRIPT  = 4;
86
    const AFTER_HEAD        = 5;
87
    const IN_BODY           = 6;
88
    const IN_CDATA_RCDATA   = 7;
89
    const IN_TABLE          = 8;
90
    const IN_TABLE_TEXT     = 9;
91
    const IN_CAPTION        = 10;
92
    const IN_COLUMN_GROUP   = 11;
93
    const IN_TABLE_BODY     = 12;
94
    const IN_ROW            = 13;
95
    const IN_CELL           = 14;
96
    const IN_SELECT         = 15;
97
    const IN_SELECT_IN_TABLE= 16;
98
    const IN_FOREIGN_CONTENT= 17;
99
    const AFTER_BODY        = 18;
100
    const IN_FRAMESET       = 19;
101
    const AFTER_FRAMESET    = 20;
102
    const AFTER_AFTER_BODY  = 21;
103
    const AFTER_AFTER_FRAMESET = 22;
104
105
    /**
106
     * Converts a magic number to a readable name. Use for debugging.
107
     */
108
    private function strConst($number) {
0 ignored issues
show
This method is not used, and could be removed.
Loading history...
109
        static $lookup;
110
        if (!$lookup) {
111
            $lookup = array();
112
            $r = new ReflectionClass('HTML5_TreeBuilder');
113
            $consts = $r->getConstants();
114
            foreach ($consts as $const => $num) {
115
                if (!is_int($num)) continue;
116
                $lookup[$num] = $const;
117
            }
118
        }
119
        return $lookup[$number];
120
    }
121
122
    // The different types of elements.
123
    const SPECIAL    = 100;
124
    const SCOPING    = 101;
125
    const FORMATTING = 102;
126
    const PHRASING   = 103;
127
128
    // Quirks modes in $quirks_mode
129
    const NO_QUIRKS             = 200;
130
    const QUIRKS_MODE           = 201;
131
    const LIMITED_QUIRKS_MODE   = 202;
132
133
    // Marker to be placed in $a_formatting
134
    const MARKER     = 300;
135
136
    // Namespaces for foreign content
137
    const NS_HTML   = null; // to prevent DOM from requiring NS on everything
138
    const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
139
    const NS_SVG    = 'http://www.w3.org/2000/svg';
140
    const NS_XLINK  = 'http://www.w3.org/1999/xlink';
141
    const NS_XML    = 'http://www.w3.org/XML/1998/namespace';
142
    const NS_XMLNS  = 'http://www.w3.org/2000/xmlns/';
143
144
    // Different types of scopes to test for elements
145
    const SCOPE = 0;
146
    const SCOPE_LISTITEM = 1;
147
    const SCOPE_TABLE = 2;
148
149
    public function __construct() {
150
        $this->mode = self::INITIAL;
151
        $this->dom = new DOMDocument;
152
153
        $this->dom->encoding = 'UTF-8';
154
        $this->dom->preserveWhiteSpace = true;
155
        $this->dom->substituteEntities = true;
156
        $this->dom->strictErrorChecking = false;
157
    }
158
159
    // Process tag tokens
160
    public function emitToken($token, $mode = null) {
161
        // XXX: ignore parse errors... why are we emitting them, again?
162
        if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
163
        if ($mode === null) $mode = $this->mode;
164
165
        /*
0 ignored issues
show
Unused Code Comprehensibility introduced by
64% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
166
        $backtrace = debug_backtrace();
167
        if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
168
        echo $this->strConst($mode);
169
        if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
170
        echo "\n  ";
171
        token_dump($token);
172
        $this->printStack();
173
        $this->printActiveFormattingElements();
174
        if ($this->foster_parent) echo "  -> this is a foster parent mode\n";
175
        if ($this->flag_frameset_ok) echo "  -> frameset ok\n";
176
        */
177
178
        if ($this->ignore_lf_token) $this->ignore_lf_token--;
179
        $this->ignored = false;
180
        // indenting is a little wonky, this can be changed later on
181
        switch ($mode) {
182
183
    case self::INITIAL:
184
185
        /* A character token that is one of U+0009 CHARACTER TABULATION,
186
         * U+000A LINE FEED (LF), U+000C FORM FEED (FF),  or U+0020 SPACE */
187
        if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
188
            /* Ignore the token. */
189
            $this->ignored = true;
190
        } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
191
            if (
0 ignored issues
show
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
192
                $token['name'] !== 'html' || !empty($token['public']) ||
193
                !empty($token['system']) || $token !== 'about:legacy-compat'
194
            ) {
195
                /* If the DOCTYPE token's name is not a case-sensitive match
196
                 * for the string "html", or if the token's public identifier
197
                 * is not missing, or if the token's system identifier is
198
                 * neither missing nor a case-sensitive match for the string
199
                 * "about:legacy-compat", then there is a parse error (this
200
                 * is the DOCTYPE parse error). */
201
                // DOCTYPE parse error
202
            }
203
            /* Append a DocumentType node to the Document node, with the name
204
             * attribute set to the name given in the DOCTYPE token, or the
205
             * empty string if the name was missing; the publicId attribute
206
             * set to the public identifier given in the DOCTYPE token, or
207
             * the empty string if the public identifier was missing; the
208
             * systemId attribute set to the system identifier given in the
209
             * DOCTYPE token, or the empty string if the system identifier
210
             * was missing; and the other attributes specific to
211
             * DocumentType objects set to null and empty lists as
212
             * appropriate. Associate the DocumentType node with the
213
             * Document object so that it is returned as the value of the
214
             * doctype attribute of the Document object. */
215
            if (!isset($token['public'])) $token['public'] = null;
216
            if (!isset($token['system'])) $token['system'] = null;
217
            // XDOM
218
            // Yes this is hacky. I'm kind of annoyed that I can't appendChild
219
            // a doctype to DOMDocument. Maybe I haven't chanted the right
220
            // syllables.
221
            $impl = new DOMImplementation();
222
            // This call can fail for particularly pathological cases (namely,
223
            // the qualifiedName parameter ($token['name']) could be missing.
224
            if ($token['name']) {
225
                $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
226
                $this->dom->appendChild($doctype);
227
            } else {
228
                // It looks like libxml's not actually *able* to express this case.
229
                // So... don't.
230
                $this->dom->emptyDoctype = true;
0 ignored issues
show
The property emptyDoctype does not seem to exist. Did you mean doctype?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
231
            }
232
            $public = is_null($token['public']) ? false : strtolower($token['public']);
0 ignored issues
show
As per coding-style, please use === null instead of is_null.
Loading history...
233
            $system = is_null($token['system']) ? false : strtolower($token['system']);
0 ignored issues
show
As per coding-style, please use === null instead of is_null.
Loading history...
234
            $publicStartsWithForQuirks = array(
235
             "+//silmaril//dtd html pro v0r11 19970101//",
236
             "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
237
             "-//as//dtd html 3.0 aswedit + extensions//",
238
             "-//ietf//dtd html 2.0 level 1//",
239
             "-//ietf//dtd html 2.0 level 2//",
240
             "-//ietf//dtd html 2.0 strict level 1//",
241
             "-//ietf//dtd html 2.0 strict level 2//",
242
             "-//ietf//dtd html 2.0 strict//",
243
             "-//ietf//dtd html 2.0//",
244
             "-//ietf//dtd html 2.1e//",
245
             "-//ietf//dtd html 3.0//",
246
             "-//ietf//dtd html 3.2 final//",
247
             "-//ietf//dtd html 3.2//",
248
             "-//ietf//dtd html 3//",
249
             "-//ietf//dtd html level 0//",
250
             "-//ietf//dtd html level 1//",
251
             "-//ietf//dtd html level 2//",
252
             "-//ietf//dtd html level 3//",
253
             "-//ietf//dtd html strict level 0//",
254
             "-//ietf//dtd html strict level 1//",
255
             "-//ietf//dtd html strict level 2//",
256
             "-//ietf//dtd html strict level 3//",
257
             "-//ietf//dtd html strict//",
258
             "-//ietf//dtd html//",
259
             "-//metrius//dtd metrius presentational//",
260
             "-//microsoft//dtd internet explorer 2.0 html strict//",
261
             "-//microsoft//dtd internet explorer 2.0 html//",
262
             "-//microsoft//dtd internet explorer 2.0 tables//",
263
             "-//microsoft//dtd internet explorer 3.0 html strict//",
264
             "-//microsoft//dtd internet explorer 3.0 html//",
265
             "-//microsoft//dtd internet explorer 3.0 tables//",
266
             "-//netscape comm. corp.//dtd html//",
267
             "-//netscape comm. corp.//dtd strict html//",
268
             "-//o'reilly and associates//dtd html 2.0//",
269
             "-//o'reilly and associates//dtd html extended 1.0//",
270
             "-//o'reilly and associates//dtd html extended relaxed 1.0//",
271
             "-//spyglass//dtd html 2.0 extended//",
272
             "-//sq//dtd html 2.0 hotmetal + extensions//",
273
             "-//sun microsystems corp.//dtd hotjava html//",
274
             "-//sun microsystems corp.//dtd hotjava strict html//",
275
             "-//w3c//dtd html 3 1995-03-24//",
276
             "-//w3c//dtd html 3.2 draft//",
277
             "-//w3c//dtd html 3.2 final//",
278
             "-//w3c//dtd html 3.2//",
279
             "-//w3c//dtd html 3.2s draft//",
280
             "-//w3c//dtd html 4.0 frameset//",
281
             "-//w3c//dtd html 4.0 transitional//",
282
             "-//w3c//dtd html experimental 19960712//",
283
             "-//w3c//dtd html experimental 970421//",
284
             "-//w3c//dtd w3 html//",
285
             "-//w3o//dtd w3 html 3.0//",
286
             "-//webtechs//dtd mozilla html 2.0//",
287
             "-//webtechs//dtd mozilla html//",
288
            );
289
            $publicSetToForQuirks = array(
290
             "-//w3o//dtd w3 html strict 3.0//",
291
             "-/w3c/dtd html 4.0 transitional/en",
292
             "html",
293
            );
294
            $publicStartsWithAndSystemForQuirks = array(
295
             "-//w3c//dtd html 4.01 frameset//",
296
             "-//w3c//dtd html 4.01 transitional//",
297
            );
298
            $publicStartsWithForLimitedQuirks = array(
299
             "-//w3c//dtd xhtml 1.0 frameset//",
300
             "-//w3c//dtd xhtml 1.0 transitional//",
301
            );
302
            $publicStartsWithAndSystemForLimitedQuirks = array(
303
             "-//w3c//dtd html 4.01 frameset//",
304
             "-//w3c//dtd html 4.01 transitional//",
305
            );
306
            // first, do easy checks
307
            if (
308
                !empty($token['force-quirks']) ||
309
                strtolower($token['name']) !== 'html'
310
            ) {
311
                $this->quirks_mode = self::QUIRKS_MODE;
312
            } else {
313
                do {
314
                    if ($system) {
315 View Code Duplication
                        foreach ($publicStartsWithAndSystemForQuirks as $x) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
316
                            if (strncmp($public, $x, strlen($x)) === 0) {
317
                                $this->quirks_mode = self::QUIRKS_MODE;
318
                                break;
319
                            }
320
                        }
321
                        if (!is_null($this->quirks_mode)) break;
0 ignored issues
show
As per coding-style, please use === null instead of is_null.
Loading history...
322 View Code Duplication
                        foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
323
                            if (strncmp($public, $x, strlen($x)) === 0) {
324
                                $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
325
                                break;
326
                            }
327
                        }
328
                        if (!is_null($this->quirks_mode)) break;
0 ignored issues
show
As per coding-style, please use === null instead of is_null.
Loading history...
329
                    }
330
                    foreach ($publicSetToForQuirks as $x) {
331
                        if ($public === $x) {
332
                            $this->quirks_mode = self::QUIRKS_MODE;
333
                            break;
334
                        }
335
                    }
336
                    if (!is_null($this->quirks_mode)) break;
0 ignored issues
show
As per coding-style, please use === null instead of is_null.
Loading history...
337 View Code Duplication
                    foreach ($publicStartsWithForLimitedQuirks as $x) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
338
                        if (strncmp($public, $x, strlen($x)) === 0) {
339
                            $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
340
                        }
341
                    }
342
                    if (!is_null($this->quirks_mode)) break;
0 ignored issues
show
As per coding-style, please use === null instead of is_null.
Loading history...
343
                    if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
344
                        $this->quirks_mode = self::QUIRKS_MODE;
345
                        break;
346
                    }
347 View Code Duplication
                    foreach ($publicStartsWithForQuirks as $x) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
348
                        if (strncmp($public, $x, strlen($x)) === 0) {
349
                            $this->quirks_mode = self::QUIRKS_MODE;
350
                            break;
351
                        }
352
                    }
353
                    if (is_null($this->quirks_mode)) {
0 ignored issues
show
As per coding-style, please use === null instead of is_null.
Loading history...
354
                        $this->quirks_mode = self::NO_QUIRKS;
355
                    }
356
                } while (false);
357
            }
358
            $this->mode = self::BEFORE_HTML;
359
        } else {
360
            // parse error
361
            /* Switch the insertion mode to "before html", then reprocess the
362
             * current token. */
363
            $this->mode = self::BEFORE_HTML;
364
            $this->quirks_mode = self::QUIRKS_MODE;
365
            $this->emitToken($token);
366
        }
367
        break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
368
369
    case self::BEFORE_HTML:
370
371
        /* A DOCTYPE token */
372
        if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
373
            // Parse error. Ignore the token.
374
            $this->ignored = true;
375
376
        /* A comment token */
377 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
378
            /* Append a Comment node to the Document object with the data
379
            attribute set to the data given in the comment token. */
380
            // XDOM
381
            $comment = $this->dom->createComment($token['data']);
382
            $this->dom->appendChild($comment);
383
384
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
385
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
386
        or U+0020 SPACE */
387
        } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
388
            /* Ignore the token. */
389
            $this->ignored = true;
390
391
        /* A start tag whose tag name is "html" */
392
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
393
            /* Create an element for the token in the HTML namespace. Append it 
394
             * to the Document  object. Put this element in the stack of open 
395
             * elements. */
396
            // XDOM
397
            $html = $this->insertElement($token, false);
398
            $this->dom->appendChild($html);
399
            $this->stack[] = $html;
400
401
            $this->mode = self::BEFORE_HEAD;
402
403
        } else {
404
            /* Create an html element. Append it to the Document object. Put
405
             * this element in the stack of open elements. */
406
            // XDOM
407
            $html = $this->dom->createElementNS(self::NS_HTML, 'html');
408
            $this->dom->appendChild($html);
409
            $this->stack[] = $html;
410
411
            /* Switch the insertion mode to "before head", then reprocess the
412
             * current token. */
413
            $this->mode = self::BEFORE_HEAD;
414
            $this->emitToken($token);
415
        }
416
        break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
417
418
    case self::BEFORE_HEAD:
419
420
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
421
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
422
        or U+0020 SPACE */
423
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
424
            /* Ignore the token. */
425
            $this->ignored = true;
426
427
        /* A comment token */
428
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
429
            /* Append a Comment node to the current node with the data attribute
430
            set to the data given in the comment token. */
431
            $this->insertComment($token['data']);
432
433
        /* A DOCTYPE token */
434
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
435
            /* Parse error. Ignore the token */
436
            $this->ignored = true;
437
            // parse error
438
439
        /* A start tag token with the tag name "html" */
440
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
441
            /* Process the token using the rules for the "in body"
442
             * insertion mode. */
443
            $this->processWithRulesFor($token, self::IN_BODY);
444
445
        /* A start tag token with the tag name "head" */
446
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
447
            /* Insert an HTML element for the token. */
448
            $element = $this->insertElement($token);
449
450
            /* Set the head element pointer to this new element node. */
451
            $this->head_pointer = $element;
452
453
            /* Change the insertion mode to "in head". */
454
            $this->mode = self::IN_HEAD;
455
456
        /* An end tag whose tag name is one of: "head", "body", "html", "br" */
457
        } elseif(
458
            $token['type'] === HTML5_Tokenizer::ENDTAG && (
459
                $token['name'] === 'head' || $token['name'] === 'body' ||
460
                $token['name'] === 'html' || $token['name'] === 'br'
461
        )) {
462
            /* Act as if a start tag token with the tag name "head" and no
463
             * attributes had been seen, then reprocess the current token. */
464
            $this->emitToken(array(
465
                'name' => 'head',
466
                'type' => HTML5_Tokenizer::STARTTAG,
467
                'attr' => array()
468
            ));
469
            $this->emitToken($token);
470
471
        /* Any other end tag */
472
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
473
            /* Parse error. Ignore the token. */
474
            $this->ignored = true;
475
476
        } else {
477
            /* Act as if a start tag token with the tag name "head" and no
478
             * attributes had been seen, then reprocess the current token.
479
             * Note: This will result in an empty head element being
480
             * generated, with the current token being reprocessed in the
481
             * "after head" insertion mode. */
482
            $this->emitToken(array(
483
                'name' => 'head',
484
                'type' => HTML5_Tokenizer::STARTTAG,
485
                'attr' => array()
486
            ));
487
            $this->emitToken($token);
488
        }
489
        break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
490
491
    case self::IN_HEAD:
492
493
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
494
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
495
        or U+0020 SPACE. */
496
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
497
            /* Insert the character into the current node. */
498
            $this->insertText($token['data']);
499
500
        /* A comment token */
501
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
502
            /* Append a Comment node to the current node with the data attribute
503
            set to the data given in the comment token. */
504
            $this->insertComment($token['data']);
505
506
        /* A DOCTYPE token */
507
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
508
            /* Parse error. Ignore the token. */
509
            $this->ignored = true;
510
            // parse error
511
512
        /* A start tag whose tag name is "html" */
513
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
514
        $token['name'] === 'html') {
515
            $this->processWithRulesFor($token, self::IN_BODY);
516
517
        /* A start tag whose tag name is one of: "base", "command", "link" */
518
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
519
        ($token['name'] === 'base' || $token['name'] === 'command' ||
520
        $token['name'] === 'link')) {
521
            /* Insert an HTML element for the token. Immediately pop the
522
             * current node off the stack of open elements. */
523
            $this->insertElement($token);
524
            array_pop($this->stack);
525
526
            // YYY: Acknowledge the token's self-closing flag, if it is set.
527
528
        /* A start tag whose tag name is "meta" */
529
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
530
            /* Insert an HTML element for the token. Immediately pop the
531
             * current node off the stack of open elements. */
532
            $this->insertElement($token);
533
            array_pop($this->stack);
534
535
            // XERROR: Acknowledge the token's self-closing flag, if it is set.
536
537
            // XENCODING: If the element has a charset attribute, and its value is a
538
            // supported encoding, and the confidence is currently tentative,
539
            // then change the encoding to the encoding given by the value of
540
            // the charset attribute.
541
            //
542
            // Otherwise, if the element has a content attribute, and applying
543
            // the algorithm for extracting an encoding from a Content-Type to
544
            // its value returns a supported encoding encoding, and the
545
            // confidence is currently tentative, then change the encoding to
546
            // the encoding encoding.
547
548
        /* A start tag with the tag name "title" */
549
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
550
            $this->insertRCDATAElement($token);
551
552
        /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
553
         * A start tag whose tag name is one of: "noframes", "style" */
554
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
555
        ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
556
            // XSCRIPT: Scripting flag not respected
557
            $this->insertCDATAElement($token);
558
559
        // XSCRIPT: Scripting flag disable not implemented
560
561
        /* A start tag with the tag name "script" */
562
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
563
            /* 1. Create an element for the token in the HTML namespace. */
564
            $node = $this->insertElement($token, false);
565
566
            /* 2. Mark the element as being "parser-inserted" */
567
            // Uhhh... XSCRIPT
568
569
            /* 3. If the parser was originally created for the HTML
570
             * fragment parsing algorithm, then mark the script element as 
571
             * "already executed". (fragment case) */
572
            // ditto... XSCRIPT
573
574
            /* 4. Append the new element to the current node  and push it onto 
575
             * the stack of open elements.  */
576
            end($this->stack)->appendChild($node);
577
            $this->stack[] = $node;
578
            // I guess we could squash these together
579
580
            /* 6. Let the original insertion mode be the current insertion mode. */
581
            $this->original_mode = $this->mode;
582
            /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
583
            $this->mode = self::IN_CDATA_RCDATA;
584
            /* 5. Switch the tokeniser's content model flag to the CDATA state. */
585
            $this->content_model = HTML5_Tokenizer::CDATA;
586
587
        /* An end tag with the tag name "head" */
588
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
589
            /* Pop the current node (which will be the head element) off the stack of open elements. */
590
            array_pop($this->stack);
591
592
            /* Change the insertion mode to "after head". */
593
            $this->mode = self::AFTER_HEAD;
594
595
        // Slight logic inversion here to minimize duplication
596
        /* A start tag with the tag name "head". */
597
        /* An end tag whose tag name is not one of: "body", "html", "br" */
598
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
599
        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
600
        $token['name'] !== 'body' && $token['name'] !== 'br')) {
601
            // Parse error. Ignore the token.
602
            $this->ignored = true;
603
604
        /* Anything else */
605
        } else {
606
            /* Act as if an end tag token with the tag name "head" had been
607
             * seen, and reprocess the current token. */
608
            $this->emitToken(array(
609
                'name' => 'head',
610
                'type' => HTML5_Tokenizer::ENDTAG
611
            ));
612
613
            /* Then, reprocess the current token. */
614
            $this->emitToken($token);
615
        }
616
        break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
617
618
    case self::IN_HEAD_NOSCRIPT:
619
        if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
620
            // parse error
621
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
622
            $this->processWithRulesFor($token, self::IN_BODY);
623
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
624
            /* Pop the current node (which will be a noscript element) from the
625
             * stack of open elements; the new current node will be a head
626
             * element. */
627
            array_pop($this->stack);
628
            $this->mode = self::IN_HEAD;
629
        } elseif (
630
            ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
631
            ($token['type'] === HTML5_Tokenizer::COMMENT) ||
632
            ($token['type'] === HTML5_Tokenizer::STARTTAG && (
633
                $token['name'] === 'link' || $token['name'] === 'meta' ||
634
                $token['name'] === 'noframes' || $token['name'] === 'style'))) {
635
            $this->processWithRulesFor($token, self::IN_HEAD);
636
        // inverted logic
637
        } elseif (
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
638
            ($token['type'] === HTML5_Tokenizer::STARTTAG && (
639
                $token['name'] === 'head' || $token['name'] === 'noscript')) ||
640
            ($token['type'] === HTML5_Tokenizer::ENDTAG &&
641
                $token['name'] !== 'br')) {
642
            // parse error
643
        } else {
644
            // parse error
645
            $this->emitToken(array(
646
                'type' => HTML5_Tokenizer::ENDTAG,
647
                'name' => 'noscript',
648
            ));
649
            $this->emitToken($token);
650
        }
651
        break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
652
653
    case self::AFTER_HEAD:
654
        /* Handle the token as follows: */
655
656
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
657
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
658
        or U+0020 SPACE */
659
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
660
            /* Append the character to the current node. */
661
            $this->insertText($token['data']);
662
663
        /* A comment token */
664
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
665
            /* Append a Comment node to the current node with the data attribute
666
            set to the data given in the comment token. */
667
            $this->insertComment($token['data']);
668
669
        } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
670
            // parse error
671
672
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
673
            $this->processWithRulesFor($token, self::IN_BODY);
674
675
        /* A start tag token with the tag name "body" */
676
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
677
            $this->insertElement($token);
678
679
            /* Set the frameset-ok flag to "not ok". */
680
            $this->flag_frameset_ok = false;
681
682
            /* Change the insertion mode to "in body". */
683
            $this->mode = self::IN_BODY;
684
685
        /* A start tag token with the tag name "frameset" */
686
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
687
            /* Insert a frameset element for the token. */
688
            $this->insertElement($token);
689
690
            /* Change the insertion mode to "in frameset". */
691
            $this->mode = self::IN_FRAMESET;
692
693
        /* A start tag token whose tag name is one of: "base", "link", "meta",
694
        "script", "style", "title" */
695
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
696
        array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
697
            // parse error
698
            /* Push the node pointed to by the head element pointer onto the
699
             * stack of open elements. */
700
            $this->stack[] = $this->head_pointer;
701
            $this->processWithRulesFor($token, self::IN_HEAD);
702
            array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
703
704
        // inversion of specification
705
        } elseif(
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
706
        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
707
        ($token['type'] === HTML5_Tokenizer::ENDTAG &&
708
            $token['name'] !== 'body' && $token['name'] !== 'html' &&
709
            $token['name'] !== 'br')) {
710
            // parse error
711
712
        /* Anything else */
713
        } else {
714
            $this->emitToken(array(
715
                'name' => 'body',
716
                'type' => HTML5_Tokenizer::STARTTAG,
717
                'attr' => array()
718
            ));
719
            $this->flag_frameset_ok = true;
720
            $this->emitToken($token);
721
        }
722
        break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
723
724
    case self::IN_BODY:
725
        /* Handle the token as follows: */
726
727
        switch($token['type']) {
728
            /* A character token */
729
            case HTML5_Tokenizer::CHARACTER:
730
            case HTML5_Tokenizer::SPACECHARACTER:
731
                /* Reconstruct the active formatting elements, if any. */
732
                $this->reconstructActiveFormattingElements();
733
734
                /* Append the token's character to the current node. */
735
                $this->insertText($token['data']);
736
737
                /* If the token is not one of U+0009 CHARACTER TABULATION,
738
                 * U+000A LINE FEED (LF), U+000C FORM FEED (FF),  or U+0020
739
                 * SPACE, then set the frameset-ok flag to "not ok". */
740
                // i.e., if any of the characters is not whitespace
741
                if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
742
                    $this->flag_frameset_ok = false;
743
                }
744
            break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
745
746
            /* A comment token */
747
            case HTML5_Tokenizer::COMMENT:
748
                /* Append a Comment node to the current node with the data
749
                attribute set to the data given in the comment token. */
750
                $this->insertComment($token['data']);
751
            break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
752
753
            case HTML5_Tokenizer::DOCTYPE:
754
                // parse error
755
            break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
756
757
            case HTML5_Tokenizer::EOF:
758
                // parse error
759
            break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
760
761
            case HTML5_Tokenizer::STARTTAG:
762
            switch($token['name']) {
763
                case 'html':
764
                    // parse error
765
                    /* For each attribute on the token, check to see if the
766
                     * attribute is already present on the top element of the
767
                     * stack of open elements. If it is not, add the attribute
768
                     * and its corresponding value to that element. */
769
                    foreach($token['attr'] as $attr) {
770
                        if(!$this->stack[0]->hasAttribute($attr['name'])) {
771
                            $this->stack[0]->setAttribute($attr['name'], $attr['value']);
772
                        }
773
                    }
774
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
775
776
                case 'base': case 'command': case 'link': case 'meta': case 'noframes':
777
                case 'script': case 'style': case 'title':
778
                    /* Process the token as if the insertion mode had been "in
779
                    head". */
780
                    $this->processWithRulesFor($token, self::IN_HEAD);
781
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
782
783
                /* A start tag token with the tag name "body" */
784
                case 'body':
785
                    /* Parse error. If the second element on the stack of open
786
                    elements is not a body element, or, if the stack of open
787
                    elements has only one node on it, then ignore the token.
788
                    (fragment case) */
789
                    if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
790
                        $this->ignored = true;
791
                        // Ignore
792
793
                    /* Otherwise, for each attribute on the token, check to see
794
                    if the attribute is already present on the body element (the
795
                    second element)    on the stack of open elements. If it is not,
796
                    add the attribute and its corresponding value to that
797
                    element. */
798
                    } else {
799
                        foreach($token['attr'] as $attr) {
800
                            if(!$this->stack[1]->hasAttribute($attr['name'])) {
801
                                $this->stack[1]->setAttribute($attr['name'], $attr['value']);
802
                            }
803
                        }
804
                    }
805
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
806
807
                case 'frameset':
808
                    // parse error
809
                    /* If the second element on the stack of open elements is
810
                     * not a body element, or, if the stack of open elements
811
                     * has only one node on it, then ignore the token.
812
                     * (fragment case) */
813
                    if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
814
                        $this->ignored = true;
815
                        // Ignore
816
                    } elseif (!$this->flag_frameset_ok) {
817
                        $this->ignored = true;
818
                        // Ignore
819
                    } else {
820
                        /* 1. Remove the second element on the stack of open 
821
                         * elements from its parent node, if it has one.  */
822
                        if($this->stack[1]->parentNode) {
823
                            $this->stack[1]->parentNode->removeChild($this->stack[1]);
824
                        }
825
826
                        /* 2. Pop all the nodes from the bottom of the stack of 
827
                         * open elements, from the current node up to the root 
828
                         * html element. */
829
                        array_splice($this->stack, 1);
830
831
                        $this->insertElement($token);
832
                        $this->mode = self::IN_FRAMESET;
833
                    }
834
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
835
836
                // in spec, there is a diversion here
837
838
                case 'address': case 'article': case 'aside': case 'blockquote':
839
                case 'center': case 'datagrid': case 'details': case 'dir':
840
                case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
841
                case 'header': case 'hgroup': case 'menu': case 'nav':
842
                case 'ol': case 'p': case 'section': case 'ul':
843
                    /* If the stack of open elements has a p element in scope,
844
                    then act as if an end tag with the tag name p had been
845
                    seen. */
846
                    if($this->elementInScope('p')) {
847
                        $this->emitToken(array(
848
                            'name' => 'p',
849
                            'type' => HTML5_Tokenizer::ENDTAG
850
                        ));
851
                    }
852
853
                    /* Insert an HTML element for the token. */
854
                    $this->insertElement($token);
855
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
856
857
                /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
858
                "h5", "h6" */
859
                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
860
                    /* If the stack of open elements has a p  element in scope,
861
                    then act as if an end tag with the tag name p had been seen. */
862
                    if($this->elementInScope('p')) {
863
                        $this->emitToken(array(
864
                            'name' => 'p',
865
                            'type' => HTML5_Tokenizer::ENDTAG
866
                        ));
867
                    }
868
869
                    /* If the current node is an element whose tag name is one
870
                     * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
871
                     * parse error; pop the current node off the stack of open
872
                     * elements. */
873
                    $peek = array_pop($this->stack);
874
                    if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
0 ignored issues
show
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
875
                        // parse error
876
                    } else {
877
                        $this->stack[] = $peek;
878
                    }
879
880
                    /* Insert an HTML element for the token. */
881
                    $this->insertElement($token);
882
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
883
884
                case 'pre': case 'listing':
885
                    /* If the stack of open elements has a p  element in scope,
886
                    then act as if an end tag with the tag name p had been seen. */
887
                    if($this->elementInScope('p')) {
888
                        $this->emitToken(array(
889
                            'name' => 'p',
890
                            'type' => HTML5_Tokenizer::ENDTAG
891
                        ));
892
                    }
893
                    $this->insertElement($token);
894
                    /* If the next token is a U+000A LINE FEED (LF) character
895
                     * token, then ignore that token and move on to the next
896
                     * one. (Newlines at the start of pre blocks are ignored as
897
                     * an authoring convenience.) */
898
                    $this->ignore_lf_token = 2;
899
                    $this->flag_frameset_ok = false;
900
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
901
902
                /* A start tag whose tag name is "form" */
903
                case 'form':
904
                    /* If the form element pointer is not null, ignore the
905
                    token with a parse error. */
906
                    if($this->form_pointer !== null) {
907
                        $this->ignored = true;
908
                        // Ignore.
909
910
                    /* Otherwise: */
911
                    } else {
912
                        /* If the stack of open elements has a p element in
913
                        scope, then act as if an end tag with the tag name p
914
                        had been seen. */
915
                        if($this->elementInScope('p')) {
916
                            $this->emitToken(array(
917
                                'name' => 'p',
918
                                'type' => HTML5_Tokenizer::ENDTAG
919
                            ));
920
                        }
921
922
                        /* Insert an HTML element for the token, and set the
923
                        form element pointer to point to the element created. */
924
                        $element = $this->insertElement($token);
925
                        $this->form_pointer = $element;
926
                    }
927
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
928
929
                // condensed specification
930
                case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
931
                    /* 1. Set the frameset-ok flag to "not ok". */
932
                    $this->flag_frameset_ok = false;
933
934
                    $stack_length = count($this->stack) - 1;
935
                    for($n = $stack_length; 0 <= $n; $n--) {
936
                        /* 2. Initialise node to be the current node (the
937
                        bottommost node of the stack). */
938
                        $stop = false;
0 ignored issues
show
$stop is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
939
                        $node = $this->stack[$n];
940
                        $cat  = $this->getElementCategory($node);
941
942
                        // for case 'li':
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
943
                        /* 3. If node is an li element, then act as if an end
944
                         * tag with the tag name "li" had been seen, then jump
945
                         * to the last step.  */
946
                        // for case 'dc': case 'dd': case 'ds': case 'dt':
0 ignored issues
show
Unused Code Comprehensibility introduced by
60% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
947
                        /* If node is a dc, dd, ds or dt element, then act as if an end
948
                         * tag with the same tag name as node had been seen, then
949
                         * jump to the last step. */
950
                        if(($token['name'] === 'li' && $node->tagName === 'li') ||
951
                        ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
952
                            $this->emitToken(array(
953
                                'type' => HTML5_Tokenizer::ENDTAG,
954
                                'name' => $node->tagName,
955
                            ));
956
                            break;
957
                        }
958
959
                        /* 4. If node is not in the formatting category, and is
960
                        not    in the phrasing category, and is not an address,
961
                        div or p element, then stop this algorithm. */
962
                        if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
963
                        $node->tagName !== 'address' && $node->tagName !== 'div' &&
964
                        $node->tagName !== 'p') {
965
                            break;
966
                        }
967
968
                        /* 5. Otherwise, set node to the previous entry in the
969
                         * stack of open elements and return to step 2. */
970
                    }
971
972
                    /* 6. This is the last step. */
973
974
                    /* If the stack of open elements has a p  element in scope,
975
                    then act as if an end tag with the tag name p had been
976
                    seen. */
977
                    if($this->elementInScope('p')) {
978
                        $this->emitToken(array(
979
                            'name' => 'p',
980
                            'type' => HTML5_Tokenizer::ENDTAG
981
                        ));
982
                    }
983
984
                    /* Finally, insert an HTML element with the same tag
985
                    name as the    token's. */
986
                    $this->insertElement($token);
987
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
988
989
                /* A start tag token whose tag name is "plaintext" */
990 View Code Duplication
                case 'plaintext':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
991
                    /* If the stack of open elements has a p  element in scope,
992
                    then act as if an end tag with the tag name p had been
993
                    seen. */
994
                    if($this->elementInScope('p')) {
995
                        $this->emitToken(array(
996
                            'name' => 'p',
997
                            'type' => HTML5_Tokenizer::ENDTAG
998
                        ));
999
                    }
1000
1001
                    /* Insert an HTML element for the token. */
1002
                    $this->insertElement($token);
1003
1004
                    $this->content_model = HTML5_Tokenizer::PLAINTEXT;
1005
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1006
1007
                // more diversions
1008
1009
                /* A start tag whose tag name is "a" */
1010
                case 'a':
1011
                    /* If the list of active formatting elements contains
1012
                    an element whose tag name is "a" between the end of the
1013
                    list and the last marker on the list (or the start of
1014
                    the list if there is no marker on the list), then this
1015
                    is a parse error; act as if an end tag with the tag name
1016
                    "a" had been seen, then remove that element from the list
1017
                    of active formatting elements and the stack of open
1018
                    elements if the end tag didn't already remove it (it
1019
                    might not have if the element is not in table scope). */
1020
                    $leng = count($this->a_formatting);
1021
1022
                    for($n = $leng - 1; $n >= 0; $n--) {
1023
                        if($this->a_formatting[$n] === self::MARKER) {
1024
                            break;
1025
1026
                        } elseif($this->a_formatting[$n]->tagName === 'a') {
1027
                            $a = $this->a_formatting[$n];
1028
                            $this->emitToken(array(
1029
                                'name' => 'a',
1030
                                'type' => HTML5_Tokenizer::ENDTAG
1031
                            ));
1032 View Code Duplication
                            if (in_array($a, $this->a_formatting)) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1033
                                $a_i = array_search($a, $this->a_formatting, true);
1034
                                if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
1035
                            }
1036 View Code Duplication
                            if (in_array($a, $this->stack)) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1037
                                $a_i = array_search($a, $this->stack, true);
1038
                                if ($a_i !== false) array_splice($this->stack, $a_i, 1);
1039
                            }
1040
                            break;
1041
                        }
1042
                    }
1043
1044
                    /* Reconstruct the active formatting elements, if any. */
1045
                    $this->reconstructActiveFormattingElements();
1046
1047
                    /* Insert an HTML element for the token. */
1048
                    $el = $this->insertElement($token);
1049
1050
                    /* Add that element to the list of active formatting
1051
                    elements. */
1052
                    $this->a_formatting[] = $el;
1053
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1054
1055
                case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
1056
                case 's': case 'small': case 'strike':
1057
                case 'strong': case 'tt': case 'u':
1058
                    /* Reconstruct the active formatting elements, if any. */
1059
                    $this->reconstructActiveFormattingElements();
1060
1061
                    /* Insert an HTML element for the token. */
1062
                    $el = $this->insertElement($token);
1063
1064
                    /* Add that element to the list of active formatting
1065
                    elements. */
1066
                    $this->a_formatting[] = $el;
1067
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1068
1069
                case 'nobr':
1070
                    /* Reconstruct the active formatting elements, if any. */
1071
                    $this->reconstructActiveFormattingElements();
1072
1073
                    /* If the stack of open elements has a nobr element in
1074
                     * scope, then this is a parse error; act as if an end tag
1075
                     * with the tag name "nobr" had been seen, then once again
1076
                     * reconstruct the active formatting elements, if any. */
1077
                    if ($this->elementInScope('nobr')) {
1078
                        $this->emitToken(array(
1079
                            'name' => 'nobr',
1080
                            'type' => HTML5_Tokenizer::ENDTAG,
1081
                        ));
1082
                        $this->reconstructActiveFormattingElements();
1083
                    }
1084
1085
                    /* Insert an HTML element for the token. */
1086
                    $el = $this->insertElement($token);
1087
1088
                    /* Add that element to the list of active formatting
1089
                    elements. */
1090
                    $this->a_formatting[] = $el;
1091
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1092
1093
                // another diversion
1094
1095
                /* A start tag token whose tag name is "button" */
1096 View Code Duplication
                case 'button':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1097
                    /* If the stack of open elements has a button element in scope,
1098
                    then this is a parse error; act as if an end tag with the tag
1099
                    name "button" had been seen, then reprocess the token. (We don't
1100
                    do that. Unnecessary.) (I hope you're right! -- ezyang) */
1101
                    if($this->elementInScope('button')) {
1102
                        $this->emitToken(array(
1103
                            'name' => 'button',
1104
                            'type' => HTML5_Tokenizer::ENDTAG
1105
                        ));
1106
                    }
1107
1108
                    /* Reconstruct the active formatting elements, if any. */
1109
                    $this->reconstructActiveFormattingElements();
1110
1111
                    /* Insert an HTML element for the token. */
1112
                    $this->insertElement($token);
1113
1114
                    /* Insert a marker at the end of the list of active
1115
                    formatting elements. */
1116
                    $this->a_formatting[] = self::MARKER;
1117
1118
                    $this->flag_frameset_ok = false;
1119
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1120
1121
                case 'applet': case 'marquee': case 'object':
1122
                    /* Reconstruct the active formatting elements, if any. */
1123
                    $this->reconstructActiveFormattingElements();
1124
1125
                    /* Insert an HTML element for the token. */
1126
                    $this->insertElement($token);
1127
1128
                    /* Insert a marker at the end of the list of active
1129
                    formatting elements. */
1130
                    $this->a_formatting[] = self::MARKER;
1131
1132
                    $this->flag_frameset_ok = false;
1133
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1134
1135
                // spec diversion
1136
1137
                /* A start tag whose tag name is "table" */
1138 View Code Duplication
                case 'table':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1139
                    /* If the Document is not set to quirks mode, and the 
1140
                     * stack of open elements has a p element in scope, then 
1141
                     * act as if an end tag with the tag name "p" had been 
1142
                     * seen. */
1143
                    if($this->quirks_mode !== self::QUIRKS_MODE &&
1144
                    $this->elementInScope('p')) {
1145
                        $this->emitToken(array(
1146
                            'name' => 'p',
1147
                            'type' => HTML5_Tokenizer::ENDTAG
1148
                        ));
1149
                    }
1150
1151
                    /* Insert an HTML element for the token. */
1152
                    $this->insertElement($token);
1153
1154
                    $this->flag_frameset_ok = false;
1155
1156
                    /* Change the insertion mode to "in table". */
1157
                    $this->mode = self::IN_TABLE;
1158
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1159
1160
                /* A start tag whose tag name is one of: "area", "basefont",
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
1161
                "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1162
                case 'area': case 'basefont': case 'bgsound': case 'br':
1163
                case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
1164
                case 'wbr':
1165
                    /* Reconstruct the active formatting elements, if any. */
1166
                    $this->reconstructActiveFormattingElements();
1167
1168
                    /* Insert an HTML element for the token. */
1169
                    $this->insertElement($token);
1170
1171
                    /* Immediately pop the current node off the stack of open elements. */
1172
                    array_pop($this->stack);
1173
1174
                    // YYY: Acknowledge the token's self-closing flag, if it is set.
1175
1176
                    $this->flag_frameset_ok = false;
1177
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1178
1179
                case 'param': case 'source':
1180
                    /* Insert an HTML element for the token. */
1181
                    $this->insertElement($token);
1182
1183
                    /* Immediately pop the current node off the stack of open elements. */
1184
                    array_pop($this->stack);
1185
1186
                    // YYY: Acknowledge the token's self-closing flag, if it is set.
1187
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1188
1189
                /* A start tag whose tag name is "hr" */
1190
                case 'hr':
1191
                    /* If the stack of open elements has a p element in scope,
1192
                    then act as if an end tag with the tag name p had been seen. */
1193
                    if($this->elementInScope('p')) {
1194
                        $this->emitToken(array(
1195
                            'name' => 'p',
1196
                            'type' => HTML5_Tokenizer::ENDTAG
1197
                        ));
1198
                    }
1199
1200
                    /* Insert an HTML element for the token. */
1201
                    $this->insertElement($token);
1202
1203
                    /* Immediately pop the current node off the stack of open elements. */
1204
                    array_pop($this->stack);
1205
1206
                    // YYY: Acknowledge the token's self-closing flag, if it is set.
1207
1208
                    $this->flag_frameset_ok = false;
1209
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1210
1211
                /* A start tag whose tag name is "image" */
1212
                case 'image':
1213
                    /* Parse error. Change the token's tag name to "img" and
1214
                    reprocess it. (Don't ask.) */
1215
                    $token['name'] = 'img';
1216
                    $this->emitToken($token);
1217
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1218
1219
                /* A start tag whose tag name is "isindex" */
1220
                case 'isindex':
1221
                    /* Parse error. */
1222
1223
                    /* If the form element pointer is not null,
1224
                    then ignore the token. */
1225
                    if($this->form_pointer === null) {
1226
                        /* Act as if a start tag token with the tag name "form" had
1227
                        been seen. */
1228
                        /* If the token has an attribute called "action", set
1229
                         * the action attribute on the resulting form
1230
                         * element to the value of the "action" attribute of
1231
                         * the token. */
1232
                        $attr = array();
1233
                        $action = $this->getAttr($token, 'action');
1234
                        if ($action !== false) {
1235
                            $attr[] = array('name' => 'action', 'value' => $action);
1236
                        }
1237
                        $this->emitToken(array(
1238
                            'name' => 'form',
1239
                            'type' => HTML5_Tokenizer::STARTTAG,
1240
                            'attr' => $attr
1241
                        ));
1242
1243
                        /* Act as if a start tag token with the tag name "hr" had
1244
                        been seen. */
1245
                        $this->emitToken(array(
1246
                            'name' => 'hr',
1247
                            'type' => HTML5_Tokenizer::STARTTAG,
1248
                            'attr' => array()
1249
                        ));
1250
1251
                        /* Act as if a start tag token with the tag name "label"
1252
                        had been seen. */
1253
                        $this->emitToken(array(
1254
                            'name' => 'label',
1255
                            'type' => HTML5_Tokenizer::STARTTAG,
1256
                            'attr' => array()
1257
                        ));
1258
1259
                        /* Act as if a stream of character tokens had been seen. */
1260
                        $prompt = $this->getAttr($token, 'prompt');
1261
                        if ($prompt === false) {
1262
                            $prompt = 'This is a searchable index. '.
1263
                            'Insert your search keywords here: ';
1264
                        }
1265
                        $this->emitToken(array(
1266
                            'data' => $prompt,
1267
                            'type' => HTML5_Tokenizer::CHARACTER,
1268
                        ));
1269
1270
                        /* Act as if a start tag token with the tag name "input"
1271
                        had been seen, with all the attributes from the "isindex"
1272
                        token, except with the "name" attribute set to the value
1273
                        "isindex" (ignoring any explicit "name" attribute). */
1274
                        $attr = array();
1275
                        foreach ($token['attr'] as $keypair) {
1276
                            if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
1277
                                $keypair['name'] === 'prompt') continue;
1278
                            $attr[] = $keypair;
1279
                        }
1280
                        $attr[] = array('name' => 'name', 'value' => 'isindex');
1281
1282
                        $this->emitToken(array(
1283
                            'name' => 'input',
1284
                            'type' => HTML5_Tokenizer::STARTTAG,
1285
                            'attr' => $attr
1286
                        ));
1287
1288
                        /* Act as if an end tag token with the tag name "label"
1289
                        had been seen. */
1290
                        $this->emitToken(array(
1291
                            'name' => 'label',
1292
                            'type' => HTML5_Tokenizer::ENDTAG
1293
                        ));
1294
1295
                        /* Act as if a start tag token with the tag name "hr" had
1296
                        been seen. */
1297
                        $this->emitToken(array(
1298
                            'name' => 'hr',
1299
                            'type' => HTML5_Tokenizer::STARTTAG
1300
                        ));
1301
1302
                        /* Act as if an end tag token with the tag name "form" had
1303
                        been seen. */
1304
                        $this->emitToken(array(
1305
                            'name' => 'form',
1306
                            'type' => HTML5_Tokenizer::ENDTAG
1307
                        ));
1308
                    } else {
1309
                        $this->ignored = true;
1310
                    }
1311
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1312
1313
                /* A start tag whose tag name is "textarea" */
1314
                case 'textarea':
1315
                    $this->insertElement($token);
1316
1317
                    /* If the next token is a U+000A LINE FEED (LF)
1318
                     * character token, then ignore that token and move on to
1319
                     * the next one. (Newlines at the start of textarea
1320
                     * elements are ignored as an authoring convenience.)
1321
                     * need flag, see also <pre> */
1322
                    $this->ignore_lf_token = 2;
1323
1324
                    $this->original_mode = $this->mode;
1325
                    $this->flag_frameset_ok = false;
1326
                    $this->mode = self::IN_CDATA_RCDATA;
1327
1328
                    /* Switch the tokeniser's content model flag to the
1329
                    RCDATA state. */
1330
                    $this->content_model = HTML5_Tokenizer::RCDATA;
1331
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1332
1333
                /* A start tag token whose tag name is "xmp" */
1334 View Code Duplication
                case 'xmp':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1335
                    /* If the stack of open elements has a p element in
1336
                    scope, then act as if an end tag with the tag name
1337
                    "p" has been seen. */
1338
                    if ($this->elementInScope('p')) {
1339
                        $this->emitToken(array(
1340
                            'name' => 'p',
1341
                            'type' => HTML5_Tokenizer::ENDTAG
1342
                        ));
1343
                    }
1344
1345
                    /* Reconstruct the active formatting elements, if any. */
1346
                    $this->reconstructActiveFormattingElements();
1347
1348
                    $this->flag_frameset_ok = false;
1349
1350
                    $this->insertCDATAElement($token);
1351
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1352
1353
                case 'iframe':
1354
                    $this->flag_frameset_ok = false;
1355
                    $this->insertCDATAElement($token);
1356
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1357
1358
                case 'noembed': case 'noscript':
1359
                    // XSCRIPT: should check scripting flag
1360
                    $this->insertCDATAElement($token);
1361
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1362
1363
                /* A start tag whose tag name is "select" */
1364
                case 'select':
1365
                    /* Reconstruct the active formatting elements, if any. */
1366
                    $this->reconstructActiveFormattingElements();
1367
1368
                    /* Insert an HTML element for the token. */
1369
                    $this->insertElement($token);
1370
1371
                    $this->flag_frameset_ok = false;
1372
1373
                    /* If the insertion mode is one of in table", "in caption",
1374
                     * "in column group", "in table body", "in row", or "in
1375
                     * cell", then switch the insertion mode to "in select in
1376
                     * table". Otherwise, switch the insertion mode  to "in
1377
                     * select". */
1378
                    if (
1379
                        $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
1380
                        $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
1381
                        $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
1382
                    ) {
1383
                        $this->mode = self::IN_SELECT_IN_TABLE;
1384
                    } else {
1385
                        $this->mode = self::IN_SELECT;
1386
                    }
1387
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1388
1389 View Code Duplication
                case 'option': case 'optgroup':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1390
                    if ($this->elementInScope('option')) {
1391
                        $this->emitToken(array(
1392
                            'name' => 'option',
1393
                            'type' => HTML5_Tokenizer::ENDTAG,
1394
                        ));
1395
                    }
1396
                    $this->reconstructActiveFormattingElements();
1397
                    $this->insertElement($token);
1398
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1399
1400
                case 'rp': case 'rt':
1401
                    /* If the stack of open elements has a ruby element in scope, then generate
1402
                     * implied end tags. If the current node is not then a ruby element, this is
1403
                     * a parse error; pop all the nodes from the current node up to the node
1404
                     * immediately before the bottommost ruby element on the stack of open elements.
1405
                     */
1406
                    if ($this->elementInScope('ruby')) {
1407
                        $this->generateImpliedEndTags();
1408
                    }
1409
                    $peek = false;
1410
                    do {
1411
                        if ($peek) {
0 ignored issues
show
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1412
                            // parse error
1413
                        }
1414
                        $peek = array_pop($this->stack);
1415
                    } while ($peek->tagName !== 'ruby');
1416
                    $this->stack[] = $peek; // we popped one too many
1417
                    $this->insertElement($token);
1418
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1419
1420
                // spec diversion
1421
1422 View Code Duplication
                case 'math':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1423
                    $this->reconstructActiveFormattingElements();
1424
                    $token = $this->adjustMathMLAttributes($token);
1425
                    $token = $this->adjustForeignAttributes($token);
1426
                    $this->insertForeignElement($token, self::NS_MATHML);
1427
                    if (isset($token['self-closing'])) {
1428
                        // XERROR: acknowledge the token's self-closing flag
1429
                        array_pop($this->stack);
1430
                    }
1431
                    if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1432
                        $this->secondary_mode = $this->mode;
1433
                        $this->mode = self::IN_FOREIGN_CONTENT;
1434
                    }
1435
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1436
1437 View Code Duplication
                case 'svg':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1438
                    $this->reconstructActiveFormattingElements();
1439
                    $token = $this->adjustSVGAttributes($token);
1440
                    $token = $this->adjustForeignAttributes($token);
1441
                    $this->insertForeignElement($token, self::NS_SVG);
1442
                    if (isset($token['self-closing'])) {
1443
                        // XERROR: acknowledge the token's self-closing flag
1444
                        array_pop($this->stack);
1445
                    }
1446
                    if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1447
                        $this->secondary_mode = $this->mode;
1448
                        $this->mode = self::IN_FOREIGN_CONTENT;
1449
                    }
1450
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1451
1452
                case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
1453
                case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
1454
                    // parse error
1455
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1456
1457
                /* A start tag token not covered by the previous entries */
1458
                default:
1459
                    /* Reconstruct the active formatting elements, if any. */
1460
                    $this->reconstructActiveFormattingElements();
1461
1462
                    $this->insertElement($token);
1463
                    /* This element will be a phrasing  element. */
1464
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1465
            }
1466
            break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1467
1468
            case HTML5_Tokenizer::ENDTAG:
1469
            switch($token['name']) {
1470
                /* An end tag with the tag name "body" */
1471
                case 'body':
1472
                    /* If the stack of open elements does not have a body 
1473
                     * element in scope, this is a parse error; ignore the 
1474
                     * token. */
1475
                    if(!$this->elementInScope('body')) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope('body') of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
1476
                        $this->ignored = true;
1477
1478
                    /* Otherwise, if there is a node in the stack of open 
1479
                     * elements that is not either a dc element, a dd element, 
1480
                     * a ds element, a dt element, an li element, an optgroup 
1481
                     * element, an option element, a p element, an rp element, 
1482
                     * an rt element, a tbody element, a td element, a tfoot 
1483
                     * element, a th element, a thead element, a tr element, 
1484
                     * the body element, or the html element, then this is a 
1485
                     * parse error.
1486
                     */
1487
                    } else {
0 ignored issues
show
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1488
                        // XERROR: implement this check for parse error
1489
                    }
1490
1491
                    /* Change the insertion mode to "after body". */
1492
                    $this->mode = self::AFTER_BODY;
1493
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1494
1495
                /* An end tag with the tag name "html" */
1496
                case 'html':
1497
                    /* Act as if an end tag with tag name "body" had been seen,
1498
                    then, if that token wasn't ignored, reprocess the current
1499
                    token. */
1500
                    $this->emitToken(array(
1501
                        'name' => 'body',
1502
                        'type' => HTML5_Tokenizer::ENDTAG
1503
                    ));
1504
1505
                    if (!$this->ignored) $this->emitToken($token);
1506
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1507
1508
                case 'address': case 'article': case 'aside': case 'blockquote':
1509
                case 'center': case 'datagrid': case 'details': case 'dir':
1510
                case 'div': case 'dl': case 'fieldset': case 'footer':
1511
                case 'header': case 'hgroup': case 'listing': case 'menu':
1512 View Code Duplication
                case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1513
                    /* If the stack of open elements has an element in scope
1514
                    with the same tag name as that of the token, then generate
1515
                    implied end tags. */
1516
                    if($this->elementInScope($token['name'])) {
1517
                        $this->generateImpliedEndTags();
1518
1519
                        /* Now, if the current node is not an element with
1520
                        the same tag name as that of the token, then this
1521
                        is a parse error. */
1522
                        // XERROR: implement parse error logic
1523
1524
                        /* If the stack of open elements has an element in
1525
                        scope with the same tag name as that of the token,
1526
                        then pop elements from this stack until an element
1527
                        with that tag name has been popped from the stack. */
1528
                        do {
1529
                            $node = array_pop($this->stack);
1530
                        } while ($node->tagName !== $token['name']);
1531
                    } else {
0 ignored issues
show
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1532
                        // parse error
1533
                    }
1534
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1535
1536
                /* An end tag whose tag name is "form" */
1537
                case 'form':
1538
                    /* Let node be the element that the form element pointer is set to. */
1539
                    $node = $this->form_pointer;
1540
                    /* Set the form element pointer  to null. */
1541
                    $this->form_pointer = null;
1542
                    /* If node is null or the stack of open elements does not 
1543
                        * have node in scope, then this is a parse error; ignore the token. */
1544
                    if ($node === null || !in_array($node, $this->stack)) {
1545
                        // parse error
1546
                        $this->ignored = true;
1547
                    } else {
1548
                        /* 1. Generate implied end tags. */
1549
                        $this->generateImpliedEndTags();
1550
                        /* 2. If the current node is not node, then this is a parse error.  */
1551
                        if (end($this->stack) !== $node) {
0 ignored issues
show
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1552
                            // parse error
1553
                        }
1554
                        /* 3. Remove node from the stack of open elements. */
1555
                        array_splice($this->stack, array_search($node, $this->stack, true), 1);
1556
                    }
1557
1558
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1559
1560
                /* An end tag whose tag name is "p" */
1561
                case 'p':
1562
                    /* If the stack of open elements has a p element in scope,
1563
                    then generate implied end tags, except for p elements. */
1564
                    if($this->elementInScope('p')) {
1565
                        /* Generate implied end tags, except for elements with
1566
                         * the same tag name as the token. */
1567
                        $this->generateImpliedEndTags(array('p'));
1568
1569
                        /* If the current node is not a p element, then this is
1570
                        a parse error. */
1571
                        // XERROR: implement
1572
1573
                        /* Pop elements from the stack of open elements  until
1574
                         * an element with the same tag name as the token has
1575
                         * been popped from the stack. */
1576
                        do {
1577
                            $node = array_pop($this->stack);
1578
                        } while ($node->tagName !== 'p');
1579
1580
                    } else {
1581
                        // parse error
1582
                        $this->emitToken(array(
1583
                            'name' => 'p',
1584
                            'type' => HTML5_Tokenizer::STARTTAG,
1585
                        ));
1586
                        $this->emitToken($token);
1587
                    }
1588
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1589
1590
                /* An end tag whose tag name is "li" */
1591 View Code Duplication
                case 'li':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1592
                    /* If the stack of open elements does not have an element
1593
                     * in list item scope with the same tag name as that of the
1594
                     * token, then this is a parse error; ignore the token. */
1595
                    if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
1596
                        /* Generate implied end tags, except for elements with the
1597
                         * same tag name as the token. */
1598
                        $this->generateImpliedEndTags(array($token['name']));
1599
                        /* If the current node is not an element with the same tag
1600
                         * name as that of the token, then this is a parse error. */
1601
                        // XERROR: parse error
1602
                        /* Pop elements from the stack of open elements  until an
1603
                         * element with the same tag name as the token has been
1604
                         * popped from the stack. */
1605
                        do {
1606
                            $node = array_pop($this->stack);
1607
                        } while ($node->tagName !== $token['name']);
1608
                    } else {
0 ignored issues
show
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1609
                        // XERROR: parse error
1610
                    }
1611
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1612
1613
                /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
1614 View Code Duplication
                case 'dc': case 'dd': case 'ds': case 'dt':
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1615
                    if($this->elementInScope($token['name'])) {
1616
                        $this->generateImpliedEndTags(array($token['name']));
1617
1618
                        /* If the current node is not an element with the same
1619
                        tag name as the token, then this is a parse error. */
1620
                        // XERROR: implement parse error
1621
1622
                        /* Pop elements from the stack of open elements  until
1623
                         * an element with the same tag name as the token has
1624
                         * been popped from the stack. */
1625
                        do {
1626
                            $node = array_pop($this->stack);
1627
                        } while ($node->tagName !== $token['name']);
1628
1629
                    } else {
0 ignored issues
show
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1630
                        // XERROR: parse error
1631
                    }
1632
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1633
1634
                /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
1635
                "h5", "h6" */
1636
                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1637
                    $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
1638
1639
                    /* If the stack of open elements has in scope an element whose
1640
                    tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1641
                    generate implied end tags. */
1642
                    if($this->elementInScope($elements)) {
1643
                        $this->generateImpliedEndTags();
1644
1645
                        /* Now, if the current node is not an element with the same
1646
                        tag name as that of the token, then this is a parse error. */
1647
                        // XERROR: implement parse error
1648
1649
                        /* If the stack of open elements has in scope an element
1650
                        whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
1651
                        "h6", then pop elements from the stack until an element
1652
                        with one of those tag names has been popped from the stack. */
1653
                        do {
1654
                            $node = array_pop($this->stack);
1655
                        } while (!in_array($node->tagName, $elements));
1656
                    } else {
0 ignored issues
show
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1657
                        // parse error
1658
                    }
1659
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1660
1661
                /* An end tag whose tag name is one of: "a", "b", "big", "em",
0 ignored issues
show
Unused Code Comprehensibility introduced by
47% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
1662
                "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1663
                case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
1664
                case 'i': case 'nobr': case 's': case 'small': case 'strike':
1665
                case 'strong': case 'tt': case 'u':
1666
                    // XERROR: generally speaking this needs parse error logic
1667
                    /* 1. Let the formatting element be the last element in
1668
                    the list of active formatting elements that:
1669
                        * is between the end of the list and the last scope
1670
                        marker in the list, if any, or the start of the list
1671
                        otherwise, and
1672
                        * has the same tag name as the token.
1673
                    */
1674
                    while(true) {
1675
                        for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
1676
                            if($this->a_formatting[$a] === self::MARKER) {
1677
                                break;
1678
1679
                            } elseif($this->a_formatting[$a]->tagName === $token['name']) {
1680
                                $formatting_element = $this->a_formatting[$a];
1681
                                $in_stack = in_array($formatting_element, $this->stack, true);
1682
                                $fe_af_pos = $a;
1683
                                break;
1684
                            }
1685
                        }
1686
1687
                        /* If there is no such node, or, if that node is
1688
                        also in the stack of open elements but the element
1689
                        is not in scope, then this is a parse error. Abort
1690
                        these steps. The token is ignored. */
1691
                        if(!isset($formatting_element) || ($in_stack &&
1692
                        !$this->elementInScope($token['name']))) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name']) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
1693
                            $this->ignored = true;
1694
                            break;
1695
1696
                        /* Otherwise, if there is such a node, but that node
1697
                        is not in the stack of open elements, then this is a
1698
                        parse error; remove the element from the list, and
1699
                        abort these steps. */
1700
                        } elseif(isset($formatting_element) && !$in_stack) {
0 ignored issues
show
The variable $in_stack does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
1701
                            unset($this->a_formatting[$fe_af_pos]);
1702
                            $this->a_formatting = array_merge($this->a_formatting);
1703
                            break;
1704
                        }
1705
1706
                        /* Otherwise, there is a formatting element and that
1707
                         * element is in the stack and is in scope. If the
1708
                         * element is not the current node, this is a parse
1709
                         * error. In any case, proceed with the algorithm as
1710
                         * written in the following steps. */
1711
                        // XERROR: implement me
1712
1713
                        /* 2. Let the furthest block be the topmost node in the
1714
                        stack of open elements that is lower in the stack
1715
                        than the formatting element, and is not an element in
1716
                        the phrasing or formatting categories. There might
1717
                        not be one. */
1718
                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
1719
                        $length = count($this->stack);
1720
1721
                        for($s = $fe_s_pos + 1; $s < $length; $s++) {
1722
                            $category = $this->getElementCategory($this->stack[$s]);
1723
1724
                            if($category !== self::PHRASING && $category !== self::FORMATTING) {
1725
                                $furthest_block = $this->stack[$s];
1726
                                break;
1727
                            }
1728
                        }
1729
1730
                        /* 3. If there is no furthest block, then the UA must
1731
                        skip the subsequent steps and instead just pop all
1732
                        the nodes from the bottom of the stack of open
1733
                        elements, from the current node up to the formatting
1734
                        element, and remove the formatting element from the
1735
                        list of active formatting elements. */
1736
                        if(!isset($furthest_block)) {
1737
                            for($n = $length - 1; $n >= $fe_s_pos; $n--) {
1738
                                array_pop($this->stack);
1739
                            }
1740
1741
                            unset($this->a_formatting[$fe_af_pos]);
1742
                            $this->a_formatting = array_merge($this->a_formatting);
1743
                            break;
1744
                        }
1745
1746
                        /* 4. Let the common ancestor be the element
1747
                        immediately above the formatting element in the stack
1748
                        of open elements. */
1749
                        $common_ancestor = $this->stack[$fe_s_pos - 1];
1750
1751
                        /* 5. Let a bookmark note the position of the
1752
                        formatting element in the list of active formatting
1753
                        elements relative to the elements on either side
1754
                        of it in the list. */
1755
                        $bookmark = $fe_af_pos;
0 ignored issues
show
The variable $fe_af_pos does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
1756
1757
                        /* 6. Let node and last node  be the furthest block.
1758
                        Follow these steps: */
1759
                        $node = $furthest_block;
1760
                        $last_node = $furthest_block;
1761
1762
                        while(true) {
1763
                            for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
1764
                                /* 6.1 Let node be the element immediately
1765
                                prior to node in the stack of open elements. */
1766
                                $node = $this->stack[$n];
1767
1768
                                /* 6.2 If node is not in the list of active
1769
                                formatting elements, then remove node from
1770
                                the stack of open elements and then go back
1771
                                to step 1. */
1772
                                if(!in_array($node, $this->a_formatting, true)) {
1773
                                    array_splice($this->stack, $n, 1);
1774
1775
                                } else {
1776
                                    break;
1777
                                }
1778
                            }
1779
1780
                            /* 6.3 Otherwise, if node is the formatting
1781
                            element, then go to the next step in the overall
1782
                            algorithm. */
1783
                            if($node === $formatting_element) {
1784
                                break;
1785
1786
                            /* 6.4 Otherwise, if last node is the furthest
1787
                            block, then move the aforementioned bookmark to
1788
                            be immediately after the node in the list of
1789
                            active formatting elements. */
1790
                            } elseif($last_node === $furthest_block) {
1791
                                $bookmark = array_search($node, $this->a_formatting, true) + 1;
1792
                            }
1793
1794
                            /* 6.5 Create an element for the token for which
1795
                             * the element node was created, replace the entry
1796
                             * for node in the list of active formatting
1797
                             * elements with an entry for the new element,
1798
                             * replace the entry for node in the stack of open
1799
                             * elements with an entry for the new element, and
1800
                             * let node be the new element. */
1801
                            // we don't know what the token is anymore
1802
                            // XDOM
1803
                            $clone = $node->cloneNode();
1804
                            $a_pos = array_search($node, $this->a_formatting, true);
1805
                            $s_pos = array_search($node, $this->stack, true);
1806
                            $this->a_formatting[$a_pos] = $clone;
1807
                            $this->stack[$s_pos] = $clone;
1808
                            $node = $clone;
1809
1810
                            /* 6.6 Insert last node into node, first removing
1811
                            it from its previous parent node if any. */
1812
                            // XDOM
1813
                            if($last_node->parentNode !== null) {
1814
                                $last_node->parentNode->removeChild($last_node);
1815
                            }
1816
1817
                            // XDOM
1818
                            $node->appendChild($last_node);
1819
1820
                            /* 6.7 Let last node be node. */
1821
                            $last_node = $node;
1822
1823
                            /* 6.8 Return to step 1 of this inner set of steps. */
1824
                        }
1825
1826
                        /* 7. If the common ancestor node is a table, tbody,
1827
                         * tfoot, thead, or tr element, then, foster parent
1828
                         * whatever last node ended up being in the previous
1829
                         * step, first removing it from its previous parent
1830
                         * node if any. */
1831
                        // XDOM
1832
                        if ($last_node->parentNode) { // common step
1833
                            $last_node->parentNode->removeChild($last_node);
1834
                        }
1835
                        if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
1836
                            $this->fosterParent($last_node);
1837
                        /* Otherwise, append whatever last node  ended up being
1838
                         * in the previous step to the common ancestor node,
1839
                         * first removing it from its previous parent node if
1840
                         * any. */
1841
                        } else {
1842
                            // XDOM
1843
                            $common_ancestor->appendChild($last_node);
1844
                        }
1845
1846
                        /* 8. Create an element for the token for which the
1847
                         * formatting element was created. */
1848
                        // XDOM
1849
                        $clone = $formatting_element->cloneNode();
1850
1851
                        /* 9. Take all of the child nodes of the furthest
1852
                        block and append them to the element created in the
1853
                        last step. */
1854
                        // XDOM
1855
                        while($furthest_block->hasChildNodes()) {
1856
                            $child = $furthest_block->firstChild;
1857
                            $furthest_block->removeChild($child);
1858
                            $clone->appendChild($child);
1859
                        }
1860
1861
                        /* 10. Append that clone to the furthest block. */
1862
                        // XDOM
1863
                        $furthest_block->appendChild($clone);
1864
1865
                        /* 11. Remove the formatting element from the list
1866
                        of active formatting elements, and insert the new element
1867
                        into the list of active formatting elements at the
1868
                        position of the aforementioned bookmark. */
1869
                        $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
1870
                        array_splice($this->a_formatting, $fe_af_pos, 1);
1871
1872
                        $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
1873
                        $af_part2 = array_slice($this->a_formatting, $bookmark);
1874
                        $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
1875
1876
                        /* 12. Remove the formatting element from the stack
1877
                        of open elements, and insert the new element into the stack
1878
                        of open elements immediately below the position of the
1879
                        furthest block in that stack. */
1880
                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
1881
                        array_splice($this->stack, $fe_s_pos, 1);
1882
1883
                        $fb_s_pos = array_search($furthest_block, $this->stack, true);
1884
                        $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
1885
                        $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
1886
                        $this->stack = array_merge($s_part1, array($clone), $s_part2);
1887
1888
                        /* 13. Jump back to step 1 in this series of steps. */
1889
                        unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
1890
                    }
1891
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1892
1893
                case 'applet': case 'button': case 'marquee': case 'object':
1894
                    /* If the stack of open elements has an element in scope whose
1895
                    tag name matches the tag name of the token, then generate implied
1896
                    tags. */
1897
                    if($this->elementInScope($token['name'])) {
1898
                        $this->generateImpliedEndTags();
1899
1900
                        /* Now, if the current node is not an element with the same
1901
                        tag name as the token, then this is a parse error. */
1902
                        // XERROR: implement logic
1903
1904
                        /* Pop elements from the stack of open elements  until
1905
                         * an element with the same tag name as the token has
1906
                         * been popped from the stack. */
1907
                        do {
1908
                            $node = array_pop($this->stack);
1909
                        } while ($node->tagName !== $token['name']);
1910
1911
                        /* Clear the list of active formatting elements up to the
1912
                         * last marker. */
1913
                        $keys = array_keys($this->a_formatting, self::MARKER, true);
1914
                        $marker = end($keys);
1915
1916
                        for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
1917
                            array_pop($this->a_formatting);
1918
                        }
1919
                    } else {
0 ignored issues
show
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1920
                        // parse error
1921
                    }
1922
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1923
1924
                case 'br':
1925
                    // Parse error
1926
                    $this->emitToken(array(
1927
                        'name' => 'br',
1928
                        'type' => HTML5_Tokenizer::STARTTAG,
1929
                    ));
1930
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1931
1932
                /* An end tag token not covered by the previous entries */
1933
                default:
1934
                    for($n = count($this->stack) - 1; $n >= 0; $n--) {
1935
                        /* Initialise node to be the current node (the bottommost
1936
                        node of the stack). */
1937
                        $node = $this->stack[$n];
1938
1939
                        /* If node has the same tag name as the end tag token,
1940
                        then: */
1941
                        if($token['name'] === $node->tagName) {
1942
                            /* Generate implied end tags. */
1943
                            $this->generateImpliedEndTags();
1944
1945
                            /* If the tag name of the end tag token does not
1946
                            match the tag name of the current node, this is a
1947
                            parse error. */
1948
                            // XERROR: implement this
1949
1950
                            /* Pop all the nodes from the current node up to
1951
                            node, including node, then stop these steps. */
1952
                            // XSKETCHY
1953
                            do {
1954
                                $pop = array_pop($this->stack);
1955
                            } while ($pop !== $node);
1956
                            break;
1957
1958
                        } else {
1959
                            $category = $this->getElementCategory($node);
1960
1961
                            if($category !== self::FORMATTING && $category !== self::PHRASING) {
1962
                                /* Otherwise, if node is in neither the formatting
1963
                                category nor the phrasing category, then this is a
1964
                                parse error. Stop this algorithm. The end tag token
1965
                                is ignored. */
1966
                                $this->ignored = true;
1967
                                break;
1968
                                // parse error
1969
                            }
1970
                        }
1971
                        /* Set node to the previous entry in the stack of open elements. Loop. */
1972
                    }
1973
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1974
            }
1975
            break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1976
        }
1977
        break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
1978
1979
    case self::IN_CDATA_RCDATA:
1980
        if (
1981
            $token['type'] === HTML5_Tokenizer::CHARACTER ||
1982
            $token['type'] === HTML5_Tokenizer::SPACECHARACTER
1983
        ) {
1984
            $this->insertText($token['data']);
1985 View Code Duplication
        } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1986
            // parse error
1987
            /* If the current node is a script  element, mark the script
1988
             * element as "already executed". */
1989
            // probably not necessary
1990
            array_pop($this->stack);
1991
            $this->mode = $this->original_mode;
1992
            $this->emitToken($token);
1993
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
1994
            array_pop($this->stack);
1995
            $this->mode = $this->original_mode;
1996
            // we're ignoring all of the execution stuff
1997 View Code Duplication
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1998
            array_pop($this->stack);
1999
            $this->mode = $this->original_mode;
2000
        }
2001
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2002
2003
    case self::IN_TABLE:
2004
        $clear = array('html', 'table');
2005
2006
        /* A character token */
2007
        if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2008
            $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2009
            /* Let the pending table character tokens
2010
             * be an empty list of tokens. */
2011
            $this->pendingTableCharacters = "";
2012
            $this->pendingTableCharactersDirty = false;
2013
            /* Let the original insertion mode be the current
2014
             * insertion mode. */
2015
            $this->original_mode = $this->mode;
2016
            /* Switch the insertion mode to
2017
             * "in table text" and
2018
             * reprocess the token. */
2019
            $this->mode = self::IN_TABLE_TEXT;
2020
            $this->emitToken($token);
2021
2022
        /* A comment token */
2023
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2024
            /* Append a Comment node to the current node with the data
2025
            attribute set to the data given in the comment token. */
2026
            $this->insertComment($token['data']);
2027
2028
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2029
            // parse error
2030
2031
        /* A start tag whose tag name is "caption" */
2032
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2033
        $token['name'] === 'caption') {
2034
            /* Clear the stack back to a table context. */
2035
            $this->clearStackToTableContext($clear);
2036
2037
            /* Insert a marker at the end of the list of active
2038
            formatting elements. */
2039
            $this->a_formatting[] = self::MARKER;
2040
2041
            /* Insert an HTML element for the token, then switch the
2042
            insertion mode to "in caption". */
2043
            $this->insertElement($token);
2044
            $this->mode = self::IN_CAPTION;
2045
2046
        /* A start tag whose tag name is "colgroup" */
2047
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2048
        $token['name'] === 'colgroup') {
2049
            /* Clear the stack back to a table context. */
2050
            $this->clearStackToTableContext($clear);
2051
2052
            /* Insert an HTML element for the token, then switch the
2053
            insertion mode to "in column group". */
2054
            $this->insertElement($token);
2055
            $this->mode = self::IN_COLUMN_GROUP;
2056
2057
        /* A start tag whose tag name is "col" */
2058
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2059
        $token['name'] === 'col') {
2060
            $this->emitToken(array(
2061
                'name' => 'colgroup',
2062
                'type' => HTML5_Tokenizer::STARTTAG,
2063
                'attr' => array()
2064
            ));
2065
2066
            $this->emitToken($token);
2067
2068
        /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2069
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2070
        array('tbody', 'tfoot', 'thead'))) {
2071
            /* Clear the stack back to a table context. */
2072
            $this->clearStackToTableContext($clear);
2073
2074
            /* Insert an HTML element for the token, then switch the insertion
2075
            mode to "in table body". */
2076
            $this->insertElement($token);
2077
            $this->mode = self::IN_TABLE_BODY;
2078
2079
        /* A start tag whose tag name is one of: "td", "th", "tr" */
2080
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2081
        in_array($token['name'], array('td', 'th', 'tr'))) {
2082
            /* Act as if a start tag token with the tag name "tbody" had been
2083
            seen, then reprocess the current token. */
2084
            $this->emitToken(array(
2085
                'name' => 'tbody',
2086
                'type' => HTML5_Tokenizer::STARTTAG,
2087
                'attr' => array()
2088
            ));
2089
2090
            $this->emitToken($token);
2091
2092
        /* A start tag whose tag name is "table" */
2093
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2094
        $token['name'] === 'table') {
2095
            /* Parse error. Act as if an end tag token with the tag name "table"
2096
            had been seen, then, if that token wasn't ignored, reprocess the
2097
            current token. */
2098
            $this->emitToken(array(
2099
                'name' => 'table',
2100
                'type' => HTML5_Tokenizer::ENDTAG
2101
            ));
2102
2103
            if (!$this->ignored) $this->emitToken($token);
2104
2105
        /* An end tag whose tag name is "table" */
2106 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2107
        $token['name'] === 'table') {
2108
            /* If the stack of open elements does not have an element in table
2109
            scope with the same tag name as the token, this is a parse error.
2110
            Ignore the token. (fragment case) */
2111
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($t...e'], self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2112
                $this->ignored = true;
2113
2114
            /* Otherwise: */
2115
            } else {
2116
                do {
2117
                    $node = array_pop($this->stack);
2118
                } while ($node->tagName !== 'table');
2119
2120
                /* Reset the insertion mode appropriately. */
2121
                $this->resetInsertionMode();
2122
            }
2123
2124
        /* An end tag whose tag name is one of: "body", "caption", "col",
0 ignored issues
show
Unused Code Comprehensibility introduced by
44% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2125
        "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2126
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2127
        array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2128
        'tfoot', 'th', 'thead', 'tr'))) {
2129
            // Parse error. Ignore the token.
2130
2131
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2132
        ($token['name'] === 'style' || $token['name'] === 'script')) {
2133
            $this->processWithRulesFor($token, self::IN_HEAD);
2134
2135
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
2136
        // assignment is intentional
2137
        /* If the token does not have an attribute with the name "type", or
2138
         * if it does, but that attribute's value is not an ASCII
2139
         * case-insensitive match for the string "hidden", then: act as
2140
         * described in the "anything else" entry below. */
2141
        ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
2142
            // I.e., if its an input with the type attribute == 'hidden'
2143
            /* Otherwise */
2144
            // parse error
2145
            $this->insertElement($token);
2146
            array_pop($this->stack);
2147
        } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2148
            /* If the current node is not the root html element, then this is a parse error. */
2149
            if (end($this->stack)->tagName !== 'html') {
0 ignored issues
show
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2150
                // Note: It can only be the current node in the fragment case.
2151
                // parse error
2152
            }
2153
            /* Stop parsing. */
2154
        /* Anything else */
2155
        } else {
2156
            /* Parse error. Process the token as if the insertion mode was "in
2157
            body", with the following exception: */
2158
2159
            $old = $this->foster_parent;
2160
            $this->foster_parent = true;
2161
            $this->processWithRulesFor($token, self::IN_BODY);
2162
            $this->foster_parent = $old;
2163
        }
2164
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2165
2166
    case self::IN_TABLE_TEXT:
2167
        /* A character token */
2168
        if($token['type'] === HTML5_Tokenizer::CHARACTER) {
2169
            /* Append the character token to the pending table
2170
             * character tokens list. */
2171
            $this->pendingTableCharacters .= $token['data'];
2172
            $this->pendingTableCharactersDirty = true;
2173
        } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2174
            $this->pendingTableCharacters .= $token['data'];
2175
        /* Anything else */
2176
        } else {
2177
            if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
2178
                /* If any of the tokens in the pending table character tokens list 
2179
                 * are character tokens that are not one of U+0009 CHARACTER 
2180
                 * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or 
2181
                 * U+0020 SPACE, then reprocess those character tokens using the 
2182
                 * rules given in the "anything else" entry in the in table" 
2183
                 * insertion mode.*/
2184
                if ($this->pendingTableCharactersDirty) {
2185
                    /* Parse error. Process the token using the rules for the 
2186
                     * "in body" insertion mode, except that if the current 
2187
                     * node is a table, tbody, tfoot, thead, or tr element, 
2188
                     * then, whenever a node would be inserted into the current 
2189
                     * node, it must instead be foster parented. */
2190
                    // XERROR
2191
                    $old = $this->foster_parent;
2192
                    $this->foster_parent = true;
2193
                    $text_token = array(
2194
                        'type' => HTML5_Tokenizer::CHARACTER,
2195
                        'data' => $this->pendingTableCharacters,
2196
                    );
2197
                    $this->processWithRulesFor($text_token, self::IN_BODY);
2198
                    $this->foster_parent = $old;
2199
2200
                /* Otherwise, insert the characters given by the pending table 
2201
                 * character tokens list into the current node. */
2202
                } else {
2203
                    $this->insertText($this->pendingTableCharacters);
2204
                }
2205
                $this->pendingTableCharacters = null;
2206
                $this->pendingTableCharactersNull = null;
0 ignored issues
show
The property pendingTableCharactersNull does not seem to exist. Did you mean pendingTableCharacters?

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
2207
            }
2208
2209
            /* Switch the insertion mode to the original insertion mode and 
2210
             * reprocess the token.
2211
             */
2212
            $this->mode = $this->original_mode;
2213
            $this->emitToken($token);
2214
        }
2215
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2216
2217
    case self::IN_CAPTION:
2218
        /* An end tag whose tag name is "caption" */
2219
        if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
2220
            /* If the stack of open elements does not have an element in table
2221
            scope with the same tag name as the token, this is a parse error.
2222
            Ignore the token. (fragment case) */
2223
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($t...e'], self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2224
                $this->ignored = true;
2225
                // Ignore
2226
2227
            /* Otherwise: */
2228
            } else {
2229
                /* Generate implied end tags. */
2230
                $this->generateImpliedEndTags();
2231
2232
                /* Now, if the current node is not a caption element, then this
2233
                is a parse error. */
2234
                // XERROR: implement
2235
2236
                /* Pop elements from this stack until a caption element has
2237
                been popped from the stack. */
2238
                do {
2239
                    $node = array_pop($this->stack);
2240
                } while ($node->tagName !== 'caption');
2241
2242
                /* Clear the list of active formatting elements up to the last
2243
                marker. */
2244
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2245
2246
                /* Switch the insertion mode to "in table". */
2247
                $this->mode = self::IN_TABLE;
2248
            }
2249
2250
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2251
        "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2252
        name is "table" */
2253
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2254
        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2255
        'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2256
        $token['name'] === 'table')) {
2257
            /* Parse error. Act as if an end tag with the tag name "caption"
2258
            had been seen, then, if that token wasn't ignored, reprocess the
2259
            current token. */
2260
            $this->emitToken(array(
2261
                'name' => 'caption',
2262
                'type' => HTML5_Tokenizer::ENDTAG
2263
            ));
2264
2265
            if (!$this->ignored) $this->emitToken($token);
2266
2267
        /* An end tag whose tag name is one of: "body", "col", "colgroup",
0 ignored issues
show
Unused Code Comprehensibility introduced by
43% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2268
        "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2269
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2270
        array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2271
        'thead', 'tr'))) {
2272
            // Parse error. Ignore the token.
2273
            $this->ignored = true;
2274
2275
        /* Anything else */
2276
        } else {
2277
            /* Process the token as if the insertion mode was "in body". */
2278
            $this->processWithRulesFor($token, self::IN_BODY);
2279
        }
2280
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2281
2282
    case self::IN_COLUMN_GROUP:
2283
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2284
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2285
        or U+0020 SPACE */
2286
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2287
            /* Append the character to the current node. */
2288
            $this->insertText($token['data']);
2289
2290
        /* A comment token */
2291
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2292
            /* Append a Comment node to the current node with the data
2293
            attribute set to the data given in the comment token. */
2294
            $this->insertToken($token['data']);
0 ignored issues
show
The method insertToken() does not seem to exist on object<HTML5_TreeBuilder>.

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
2295
2296
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2297
            // parse error
2298
2299
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2300
            $this->processWithRulesFor($token, self::IN_BODY);
2301
2302
        /* A start tag whose tag name is "col" */
2303
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
2304
            /* Insert a col element for the token. Immediately pop the current
2305
            node off the stack of open elements. */
2306
            $this->insertElement($token);
2307
            array_pop($this->stack);
2308
            // XERROR: Acknowledge the token's self-closing flag, if it is set.
2309
2310
        /* An end tag whose tag name is "colgroup" */
2311
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2312
        $token['name'] === 'colgroup') {
2313
            /* If the current node is the root html element, then this is a
2314
            parse error, ignore the token. (fragment case) */
2315
            if(end($this->stack)->tagName === 'html') {
2316
                $this->ignored = true;
2317
2318
            /* Otherwise, pop the current node (which will be a colgroup
2319
            element) from the stack of open elements. Switch the insertion
2320
            mode to "in table". */
2321
            } else {
2322
                array_pop($this->stack);
2323
                $this->mode = self::IN_TABLE;
2324
            }
2325
2326
        /* An end tag whose tag name is "col" */
2327
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
2328
            /* Parse error. Ignore the token. */
2329
            $this->ignored = true;
2330
2331
        /* An end-of-file token */
2332
        /* If the current node is the root html  element */
2333
        } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2334
            /* Stop parsing */
2335
2336
        /* Anything else */
2337
        } else {
2338
            /* Act as if an end tag with the tag name "colgroup" had been seen,
2339
            and then, if that token wasn't ignored, reprocess the current token. */
2340
            $this->emitToken(array(
2341
                'name' => 'colgroup',
2342
                'type' => HTML5_Tokenizer::ENDTAG
2343
            ));
2344
2345
            if (!$this->ignored) $this->emitToken($token);
2346
        }
2347
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2348
2349
    case self::IN_TABLE_BODY:
2350
        $clear = array('tbody', 'tfoot', 'thead', 'html');
2351
2352
        /* A start tag whose tag name is "tr" */
2353
        if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
2354
            /* Clear the stack back to a table body context. */
2355
            $this->clearStackToTableContext($clear);
2356
2357
            /* Insert a tr element for the token, then switch the insertion
2358
            mode to "in row". */
2359
            $this->insertElement($token);
2360
            $this->mode = self::IN_ROW;
2361
2362
        /* A start tag whose tag name is one of: "th", "td" */
2363 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2364
        ($token['name'] === 'th' ||    $token['name'] === 'td')) {
2365
            /* Parse error. Act as if a start tag with the tag name "tr" had
2366
            been seen, then reprocess the current token. */
2367
            $this->emitToken(array(
2368
                'name' => 'tr',
2369
                'type' => HTML5_Tokenizer::STARTTAG,
2370
                'attr' => array()
2371
            ));
2372
2373
            $this->emitToken($token);
2374
2375
        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2376
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2377
        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2378
            /* If the stack of open elements does not have an element in table
2379
            scope with the same tag name as the token, this is a parse error.
2380
            Ignore the token. */
2381
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($t...e'], self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2382
                // Parse error
2383
                $this->ignored = true;
2384
2385
            /* Otherwise: */
2386
            } else {
2387
                /* Clear the stack back to a table body context. */
2388
                $this->clearStackToTableContext($clear);
2389
2390
                /* Pop the current node from the stack of open elements. Switch
2391
                the insertion mode to "in table". */
2392
                array_pop($this->stack);
2393
                $this->mode = self::IN_TABLE;
2394
            }
2395
2396
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2397
        "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2398
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2399
        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
2400
        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2401
            /* If the stack of open elements does not have a tbody, thead, or
2402
            tfoot element in table scope, this is a parse error. Ignore the
2403
            token. (fragment case) */
2404
            if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(ar...t'), self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2405
                // parse error
2406
                $this->ignored = true;
2407
2408
            /* Otherwise: */
2409
            } else {
2410
                /* Clear the stack back to a table body context. */
2411
                $this->clearStackToTableContext($clear);
2412
2413
                /* Act as if an end tag with the same tag name as the current
2414
                node ("tbody", "tfoot", or "thead") had been seen, then
2415
                reprocess the current token. */
2416
                $this->emitToken(array(
2417
                    'name' => end($this->stack)->tagName,
2418
                    'type' => HTML5_Tokenizer::ENDTAG
2419
                ));
2420
2421
                $this->emitToken($token);
2422
            }
2423
2424
        /* An end tag whose tag name is one of: "body", "caption", "col",
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2425
        "colgroup", "html", "td", "th", "tr" */
2426
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2427
        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
2428
            /* Parse error. Ignore the token. */
2429
            $this->ignored = true;
2430
2431
        /* Anything else */
2432
        } else {
2433
            /* Process the token as if the insertion mode was "in table". */
2434
            $this->processWithRulesFor($token, self::IN_TABLE);
2435
        }
2436
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2437
2438
    case self::IN_ROW:
2439
        $clear = array('tr', 'html');
2440
2441
        /* A start tag whose tag name is one of: "th", "td" */
2442
        if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2443
        ($token['name'] === 'th' || $token['name'] === 'td')) {
2444
            /* Clear the stack back to a table row context. */
2445
            $this->clearStackToTableContext($clear);
2446
2447
            /* Insert an HTML element for the token, then switch the insertion
2448
            mode to "in cell". */
2449
            $this->insertElement($token);
2450
            $this->mode = self::IN_CELL;
2451
2452
            /* Insert a marker at the end of the list of active formatting
2453
            elements. */
2454
            $this->a_formatting[] = self::MARKER;
2455
2456
        /* An end tag whose tag name is "tr" */
2457 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2458
            /* If the stack of open elements does not have an element in table
2459
            scope with the same tag name as the token, this is a parse error.
2460
            Ignore the token. (fragment case) */
2461
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($t...e'], self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2462
                // Ignore.
2463
                $this->ignored = true;
2464
2465
            /* Otherwise: */
2466
            } else {
2467
                /* Clear the stack back to a table row context. */
2468
                $this->clearStackToTableContext($clear);
2469
2470
                /* Pop the current node (which will be a tr element) from the
2471
                stack of open elements. Switch the insertion mode to "in table
2472
                body". */
2473
                array_pop($this->stack);
2474
                $this->mode = self::IN_TABLE_BODY;
2475
            }
2476
2477
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2478
        "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
2479
        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2480
        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
2481
        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2482
            /* Act as if an end tag with the tag name "tr" had been seen, then,
2483
            if that token wasn't ignored, reprocess the current token. */
2484
            $this->emitToken(array(
2485
                'name' => 'tr',
2486
                'type' => HTML5_Tokenizer::ENDTAG
2487
            ));
2488
            if (!$this->ignored) $this->emitToken($token);
2489
2490
        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2491 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2492
        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2493
            /* If the stack of open elements does not have an element in table
2494
            scope with the same tag name as the token, this is a parse error.
2495
            Ignore the token. */
2496
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($t...e'], self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2497
                $this->ignored = true;
2498
2499
            /* Otherwise: */
2500
            } else {
2501
                /* Otherwise, act as if an end tag with the tag name "tr" had
2502
                been seen, then reprocess the current token. */
2503
                $this->emitToken(array(
2504
                    'name' => 'tr',
2505
                    'type' => HTML5_Tokenizer::ENDTAG
2506
                ));
2507
2508
                $this->emitToken($token);
2509
            }
2510
2511
        /* An end tag whose tag name is one of: "body", "caption", "col",
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2512
        "colgroup", "html", "td", "th" */
2513
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2514
        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
2515
            /* Parse error. Ignore the token. */
2516
            $this->ignored = true;
2517
2518
        /* Anything else */
2519
        } else {
2520
            /* Process the token as if the insertion mode was "in table". */
2521
            $this->processWithRulesFor($token, self::IN_TABLE);
2522
        }
2523
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2524
2525
    case self::IN_CELL:
2526
        /* An end tag whose tag name is one of: "td", "th" */
2527
        if($token['type'] === HTML5_Tokenizer::ENDTAG &&
2528
        ($token['name'] === 'td' || $token['name'] === 'th')) {
2529
            /* If the stack of open elements does not have an element in table
2530
            scope with the same tag name as that of the token, then this is a
2531
            parse error and the token must be ignored. */
2532
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($t...e'], self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2533
                $this->ignored = true;
2534
2535
            /* Otherwise: */
2536
            } else {
2537
                /* Generate implied end tags, except for elements with the same
2538
                tag name as the token. */
2539
                $this->generateImpliedEndTags(array($token['name']));
2540
2541
                /* Now, if the current node is not an element with the same tag
2542
                name as the token, then this is a parse error. */
2543
                // XERROR: Implement parse error code
2544
2545
                /* Pop elements from this stack until an element with the same
2546
                tag name as the token has been popped from the stack. */
2547
                do {
2548
                    $node = array_pop($this->stack);
2549
                } while ($node->tagName !== $token['name']);
2550
2551
                /* Clear the list of active formatting elements up to the last
2552
                marker. */
2553
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2554
2555
                /* Switch the insertion mode to "in row". (The current node
2556
                will be a tr element at this point.) */
2557
                $this->mode = self::IN_ROW;
2558
            }
2559
2560
        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
0 ignored issues
show
Unused Code Comprehensibility introduced by
41% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2561
        "tbody", "td", "tfoot", "th", "thead", "tr" */
2562 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2563
        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2564
        'thead', 'tr'))) {
2565
            /* If the stack of open elements does not have a td or th element
2566
            in table scope, then this is a parse error; ignore the token.
2567
            (fragment case) */
2568
            if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(ar...h'), self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2569
                // parse error
2570
                $this->ignored = true;
2571
2572
            /* Otherwise, close the cell (see below) and reprocess the current
2573
            token. */
2574
            } else {
2575
                $this->closeCell();
2576
                $this->emitToken($token);
2577
            }
2578
2579
        /* An end tag whose tag name is one of: "body", "caption", "col",
2580
        "colgroup", "html" */
2581
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2582
        array('body', 'caption', 'col', 'colgroup', 'html'))) {
2583
            /* Parse error. Ignore the token. */
2584
            $this->ignored = true;
2585
2586
        /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
2587
        "thead", "tr" */
2588 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2589
        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2590
            /* If the stack of open elements does not have a td or th element
2591
            in table scope, then this is a parse error; ignore the token.
2592
            (innerHTML case) */
2593
            if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(ar...h'), self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2594
                // Parse error
2595
                $this->ignored = true;
2596
2597
            /* Otherwise, close the cell (see below) and reprocess the current
2598
            token. */
2599
            } else {
2600
                $this->closeCell();
2601
                $this->emitToken($token);
2602
            }
2603
2604
        /* Anything else */
2605
        } else {
2606
            /* Process the token as if the insertion mode was "in body". */
2607
            $this->processWithRulesFor($token, self::IN_BODY);
2608
        }
2609
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2610
2611
    case self::IN_SELECT:
2612
        /* Handle the token as follows: */
2613
2614
        /* A character token */
2615
        if(
2616
            $token['type'] === HTML5_Tokenizer::CHARACTER ||
2617
            $token['type'] === HTML5_Tokenizer::SPACECHARACTER
2618
        ) {
2619
            /* Append the token's character to the current node. */
2620
            $this->insertText($token['data']);
2621
2622
        /* A comment token */
2623
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2624
            /* Append a Comment node to the current node with the data
2625
            attribute set to the data given in the comment token. */
2626
            $this->insertComment($token['data']);
2627
2628
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2629
            // parse error
2630
2631
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2632
            $this->processWithRulesFor($token, self::INBODY);
2633
2634
        /* A start tag token whose tag name is "option" */
2635
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2636
        $token['name'] === 'option') {
2637
            /* If the current node is an option element, act as if an end tag
2638
            with the tag name "option" had been seen. */
2639 View Code Duplication
            if(end($this->stack)->tagName === 'option') {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2640
                $this->emitToken(array(
2641
                    'name' => 'option',
2642
                    'type' => HTML5_Tokenizer::ENDTAG
2643
                ));
2644
            }
2645
2646
            /* Insert an HTML element for the token. */
2647
            $this->insertElement($token);
2648
2649
        /* A start tag token whose tag name is "optgroup" */
2650
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2651
        $token['name'] === 'optgroup') {
2652
            /* If the current node is an option element, act as if an end tag
2653
            with the tag name "option" had been seen. */
2654 View Code Duplication
            if(end($this->stack)->tagName === 'option') {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2655
                $this->emitToken(array(
2656
                    'name' => 'option',
2657
                    'type' => HTML5_Tokenizer::ENDTAG
2658
                ));
2659
            }
2660
2661
            /* If the current node is an optgroup element, act as if an end tag
2662
            with the tag name "optgroup" had been seen. */
2663 View Code Duplication
            if(end($this->stack)->tagName === 'optgroup') {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2664
                $this->emitToken(array(
2665
                    'name' => 'optgroup',
2666
                    'type' => HTML5_Tokenizer::ENDTAG
2667
                ));
2668
            }
2669
2670
            /* Insert an HTML element for the token. */
2671
            $this->insertElement($token);
2672
2673
        /* An end tag token whose tag name is "optgroup" */
2674
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2675
        $token['name'] === 'optgroup') {
2676
            /* First, if the current node is an option element, and the node
2677
            immediately before it in the stack of open elements is an optgroup
2678
            element, then act as if an end tag with the tag name "option" had
2679
            been seen. */
2680
            $elements_in_stack = count($this->stack);
2681
2682
            if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
2683
            $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
2684
                $this->emitToken(array(
2685
                    'name' => 'option',
2686
                    'type' => HTML5_Tokenizer::ENDTAG
2687
                ));
2688
            }
2689
2690
            /* If the current node is an optgroup element, then pop that node
2691
            from the stack of open elements. Otherwise, this is a parse error,
2692
            ignore the token. */
2693
            if(end($this->stack)->tagName === 'optgroup') {
2694
                array_pop($this->stack);
2695
            } else {
2696
                // parse error
2697
                $this->ignored = true;
2698
            }
2699
2700
        /* An end tag token whose tag name is "option" */
2701
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2702
        $token['name'] === 'option') {
2703
            /* If the current node is an option element, then pop that node
2704
            from the stack of open elements. Otherwise, this is a parse error,
2705
            ignore the token. */
2706
            if(end($this->stack)->tagName === 'option') {
2707
                array_pop($this->stack);
2708
            } else {
2709
                // parse error
2710
                $this->ignored = true;
2711
            }
2712
2713
        /* An end tag whose tag name is "select" */
2714 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2715
        $token['name'] === 'select') {
2716
            /* If the stack of open elements does not have an element in table
2717
            scope with the same tag name as the token, this is a parse error.
2718
            Ignore the token. (fragment case) */
2719
            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($t...e'], self::SCOPE_TABLE) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
2720
                $this->ignored = true;
2721
                // parse error
2722
2723
            /* Otherwise: */
2724
            } else {
2725
                /* Pop elements from the stack of open elements until a select
2726
                element has been popped from the stack. */
2727
                do {
2728
                    $node = array_pop($this->stack);
2729
                } while ($node->tagName !== 'select');
2730
2731
                /* Reset the insertion mode appropriately. */
2732
                $this->resetInsertionMode();
2733
            }
2734
2735
        /* A start tag whose tag name is "select" */
2736
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
2737
            /* Parse error. Act as if the token had been an end tag with the
2738
            tag name "select" instead. */
2739
            $this->emitToken(array(
2740
                'name' => 'select',
2741
                'type' => HTML5_Tokenizer::ENDTAG
2742
            ));
2743
2744 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2745
        ($token['name'] === 'input' || $token['name'] === 'keygen' ||  $token['name'] === 'textarea')) {
2746
            // parse error
2747
            $this->emitToken(array(
2748
                'name' => 'select',
2749
                'type' => HTML5_Tokenizer::ENDTAG
2750
            ));
2751
            $this->emitToken($token);
2752
2753
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
2754
            $this->processWithRulesFor($token, self::IN_HEAD);
2755
2756
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2757
            // XERROR: If the current node is not the root html element, then this is a parse error.
2758
            /* Stop parsing */
2759
2760
        /* Anything else */
2761
        } else {
2762
            /* Parse error. Ignore the token. */
2763
            $this->ignored = true;
2764
        }
2765
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2766
2767
    case self::IN_SELECT_IN_TABLE:
2768
2769
        if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2770
        in_array($token['name'], array('caption', 'table', 'tbody',
2771
        'tfoot', 'thead', 'tr', 'td', 'th'))) {
2772
            // parse error
2773
            $this->emitToken(array(
2774
                'name' => 'select',
2775
                'type' => HTML5_Tokenizer::ENDTAG,
2776
            ));
2777
            $this->emitToken($token);
2778
2779
        /* An end tag whose tag name is one of: "caption", "table", "tbody",
0 ignored issues
show
Unused Code Comprehensibility introduced by
40% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
2780
        "tfoot", "thead", "tr", "td", "th" */
2781 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2782
        in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th')))  {
2783
            /* Parse error. */
2784
            // parse error
2785
2786
            /* If the stack of open elements has an element in table scope with
2787
            the same tag name as that of the token, then act as if an end tag
2788
            with the tag name "select" had been seen, and reprocess the token.
2789
            Otherwise, ignore the token. */
2790
            if($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2791
                $this->emitToken(array(
2792
                    'name' => 'select',
2793
                    'type' => HTML5_Tokenizer::ENDTAG
2794
                ));
2795
2796
                $this->emitToken($token);
2797
            } else {
2798
                $this->ignored = true;
2799
            }
2800
        } else {
2801
            $this->processWithRulesFor($token, self::IN_SELECT);
2802
        }
2803
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2804
2805
    case self::IN_FOREIGN_CONTENT:
2806
        if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2807
        $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2808
            $this->insertText($token['data']);
2809
        } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2810
            $this->insertComment($token['data']);
2811
        } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2812
            // XERROR: parse error
2813
        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2814
        $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
2815
        // XDOM
2816
        end($this->stack)->namespaceURI === self::NS_SVG) {
2817
            array_pop($this->stack);
2818
            // a bunch of script running mumbo jumbo
2819
        } elseif (
2820
            ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2821
                ((
2822
                    $token['name'] !== 'mglyph' &&
2823
                    $token['name'] !== 'malignmark' &&
2824
                    // XDOM
2825
                    end($this->stack)->namespaceURI === self::NS_MATHML &&
2826
                    in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
2827
                ) ||
2828
                (
2829
                    $token['name'] === 'svg' &&
2830
                    // XDOM
2831
                    end($this->stack)->namespaceURI === self::NS_MATHML &&
2832
                    end($this->stack)->tagName === 'annotation-xml'
2833
                ) ||
2834
                (
2835
                    // XDOM
2836
                    end($this->stack)->namespaceURI === self::NS_SVG &&
2837
                    in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
2838
                ) ||
2839
                (
2840
                    // XSKETCHY && XDOM
2841
                    end($this->stack)->namespaceURI === self::NS_HTML
2842
                ))
2843
            ) || $token['type'] === HTML5_Tokenizer::ENDTAG
2844
        ) {
2845
            $this->processWithRulesFor($token, $this->secondary_mode);
2846
            /* If, after doing so, the insertion mode is still "in foreign 
2847
             * content", but there is no element in scope that has a namespace 
2848
             * other than the HTML namespace, switch the insertion mode to the 
2849
             * secondary insertion mode. */
2850
            if ($this->mode === self::IN_FOREIGN_CONTENT) {
2851
                $found = false;
2852
                // this basically duplicates elementInScope()
2853
                for ($i = count($this->stack) - 1; $i >= 0; $i--) {
2854
                    // XDOM
2855
                    $node = $this->stack[$i];
2856
                    if ($node->namespaceURI !== self::NS_HTML) {
2857
                        $found = true;
2858
                        break;
2859
                    } elseif (in_array($node->tagName, array('table', 'html',
2860
                    'applet', 'caption', 'td', 'th', 'button', 'marquee',
2861
                    'object')) || ($node->tagName === 'foreignObject' &&
2862
                    $node->namespaceURI === self::NS_SVG)) {
2863
                        break;
2864
                    }
2865
                }
2866
                if (!$found) {
2867
                    $this->mode = $this->secondary_mode;
2868
                }
2869
            }
2870
        } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
2871
        $token['type'] === HTML5_Tokenizer::STARTTAG &&
2872
        (in_array($token['name'], array('b', "big", "blockquote", "body", "br", 
2873
        "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2", 
2874
        "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing", 
2875
        "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s",  "small", 
2876
        "span", "strong", "strike",  "sub", "sup", "table", "tt", "u", "ul", 
2877
        "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
2878
        $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
2879
            // XERROR: parse error
2880
            do {
2881
                $node = array_pop($this->stack);
2882
                // XDOM
2883
            } while ($node->namespaceURI !== self::NS_HTML);
2884
            $this->stack[] = $node;
2885
            $this->mode = $this->secondary_mode;
2886
            $this->emitToken($token);
2887
        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
2888
            static $svg_lookup = array(
2889
                'altglyph' => 'altGlyph',
2890
                'altglyphdef' => 'altGlyphDef',
2891
                'altglyphitem' => 'altGlyphItem',
2892
                'animatecolor' => 'animateColor',
2893
                'animatemotion' => 'animateMotion',
2894
                'animatetransform' => 'animateTransform',
2895
                'clippath' => 'clipPath',
2896
                'feblend' => 'feBlend',
2897
                'fecolormatrix' => 'feColorMatrix',
2898
                'fecomponenttransfer' => 'feComponentTransfer',
2899
                'fecomposite' => 'feComposite',
2900
                'feconvolvematrix' => 'feConvolveMatrix',
2901
                'fediffuselighting' => 'feDiffuseLighting',
2902
                'fedisplacementmap' => 'feDisplacementMap',
2903
                'fedistantlight' => 'feDistantLight',
2904
                'feflood' => 'feFlood',
2905
                'fefunca' => 'feFuncA',
2906
                'fefuncb' => 'feFuncB',
2907
                'fefuncg' => 'feFuncG',
2908
                'fefuncr' => 'feFuncR',
2909
                'fegaussianblur' => 'feGaussianBlur',
2910
                'feimage' => 'feImage',
2911
                'femerge' => 'feMerge',
2912
                'femergenode' => 'feMergeNode',
2913
                'femorphology' => 'feMorphology',
2914
                'feoffset' => 'feOffset',
2915
                'fepointlight' => 'fePointLight',
2916
                'fespecularlighting' => 'feSpecularLighting',
2917
                'fespotlight' => 'feSpotLight',
2918
                'fetile' => 'feTile',
2919
                'feturbulence' => 'feTurbulence',
2920
                'foreignobject' => 'foreignObject',
2921
                'glyphref' => 'glyphRef',
2922
                'lineargradient' => 'linearGradient',
2923
                'radialgradient' => 'radialGradient',
2924
                'textpath' => 'textPath',
2925
            );
2926
            // XDOM
2927
            $current = end($this->stack);
2928
            if ($current->namespaceURI === self::NS_MATHML) {
2929
                $token = $this->adjustMathMLAttributes($token);
2930
            }
2931
            if ($current->namespaceURI === self::NS_SVG &&
2932
            isset($svg_lookup[$token['name']])) {
2933
                $token['name'] = $svg_lookup[$token['name']];
2934
            }
2935
            if ($current->namespaceURI === self::NS_SVG) {
2936
                $token = $this->adjustSVGAttributes($token);
2937
            }
2938
            $token = $this->adjustForeignAttributes($token);
2939
            $this->insertForeignElement($token, $current->namespaceURI);
2940
            if (isset($token['self-closing'])) {
2941
                array_pop($this->stack);
2942
                // XERROR: acknowledge self-closing flag
2943
            }
2944
        }
2945
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2946
2947
    case self::AFTER_BODY:
2948
        /* Handle the token as follows: */
2949
2950
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2951
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2952
        or U+0020 SPACE */
2953
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2954
            /* Process the token as it would be processed if the insertion mode
2955
            was "in body". */
2956
            $this->processWithRulesFor($token, self::IN_BODY);
2957
2958
        /* A comment token */
2959 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2960
            /* Append a Comment node to the first element in the stack of open
2961
            elements (the html element), with the data attribute set to the
2962
            data given in the comment token. */
2963
            // XDOM
2964
            $comment = $this->dom->createComment($token['data']);
2965
            $this->stack[0]->appendChild($comment);
2966
2967
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2968
            // parse error
2969
2970
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2971
            $this->processWithRulesFor($token, self::IN_BODY);
2972
2973
        /* An end tag with the tag name "html" */
2974
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
2975
            /*     If the parser was originally created as part of the HTML
2976
             *     fragment parsing algorithm, this is a parse error; ignore
2977
             *     the token. (fragment case) */
2978
            $this->ignored = true;
2979
            // XERROR: implement this
2980
2981
            $this->mode = self::AFTER_AFTER_BODY;
2982
2983
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2984
            /* Stop parsing */
2985
2986
        /* Anything else */
2987
        } else {
2988
            /* Parse error. Set the insertion mode to "in body" and reprocess
2989
            the token. */
2990
            $this->mode = self::IN_BODY;
2991
            $this->emitToken($token);
2992
        }
2993
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
2994
2995
    case self::IN_FRAMESET:
2996
        /* Handle the token as follows: */
2997
2998
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2999
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3000
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3001
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3002
            /* Append the character to the current node. */
3003
            $this->insertText($token['data']);
3004
3005
        /* A comment token */
3006
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
3007
            /* Append a Comment node to the current node with the data
3008
            attribute set to the data given in the comment token. */
3009
            $this->insertComment($token['data']);
3010
3011
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3012
            // parse error
3013
3014
        /* A start tag with the tag name "frameset" */
3015
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3016
        $token['name'] === 'frameset') {
3017
            $this->insertElement($token);
3018
3019
        /* An end tag with the tag name "frameset" */
3020
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
3021
        $token['name'] === 'frameset') {
3022
            /* If the current node is the root html element, then this is a
3023
            parse error; ignore the token. (fragment case) */
3024
            if(end($this->stack)->tagName === 'html') {
3025
                $this->ignored = true;
3026
                // Parse error
3027
3028
            } else {
3029
                /* Otherwise, pop the current node from the stack of open
3030
                elements. */
3031
                array_pop($this->stack);
3032
3033
                /* If the parser was not originally created as part of the HTML 
3034
                 * fragment parsing algorithm  (fragment case), and the current 
3035
                 * node is no longer a frameset element, then switch the 
3036
                 * insertion mode to "after frameset". */
3037
                $this->mode = self::AFTER_FRAMESET;
3038
            }
3039
3040
        /* A start tag with the tag name "frame" */
3041
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3042
        $token['name'] === 'frame') {
3043
            /* Insert an HTML element for the token. */
3044
            $this->insertElement($token);
3045
3046
            /* Immediately pop the current node off the stack of open elements. */
3047
            array_pop($this->stack);
3048
3049
            // XERROR: Acknowledge the token's self-closing flag, if it is set.
3050
3051
        /* A start tag with the tag name "noframes" */
3052
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3053
        $token['name'] === 'noframes') {
3054
            /* Process the token using the rules for the "in head" insertion mode. */
3055
            $this->processwithRulesFor($token, self::IN_HEAD);
3056
3057
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3058
            // XERROR: If the current node is not the root html element, then this is a parse error.
3059
            /* Stop parsing */
3060
        /* Anything else */
3061
        } else {
3062
            /* Parse error. Ignore the token. */
3063
            $this->ignored = true;
3064
        }
3065
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3066
3067
    case self::AFTER_FRAMESET:
3068
        /* Handle the token as follows: */
3069
3070
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3071
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3072
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3073
        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3074
            /* Append the character to the current node. */
3075
            $this->insertText($token['data']);
3076
3077
        /* A comment token */
3078
        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
3079
            /* Append a Comment node to the current node with the data
3080
            attribute set to the data given in the comment token. */
3081
            $this->insertComment($token['data']);
3082
3083
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3084
            // parse error
3085
3086
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
3087
            $this->processWithRulesFor($token, self::IN_BODY);
3088
3089
        /* An end tag with the tag name "html" */
3090
        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
3091
        $token['name'] === 'html') {
3092
            $this->mode = self::AFTER_AFTER_FRAMESET;
3093
3094
        /* A start tag with the tag name "noframes" */
3095
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3096
        $token['name'] === 'noframes') {
3097
            $this->processWithRulesFor($token, self::IN_HEAD);
3098
3099
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3100
            /* Stop parsing */
3101
3102
        /* Anything else */
3103
        } else {
3104
            /* Parse error. Ignore the token. */
3105
            $this->ignored = true;
3106
        }
3107
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3108
3109
    case self::AFTER_AFTER_BODY:
3110
        /* A comment token */
3111
        if($token['type'] === HTML5_Tokenizer::COMMENT) {
3112
            /* Append a Comment node to the Document object with the data
3113
            attribute set to the data given in the comment token. */
3114
            // XDOM
3115
            $comment = $this->dom->createComment($token['data']);
3116
            $this->dom->appendChild($comment);
3117
3118 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3119
        $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3120
        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3121
            $this->processWithRulesFor($token, self::IN_BODY);
3122
3123
        /* An end-of-file token */
3124
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3125
            /* OMG DONE!! */
3126
        } else {
3127
            // parse error
3128
            $this->mode = self::IN_BODY;
3129
            $this->emitToken($token);
3130
        }
3131
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3132
3133
    case self::AFTER_AFTER_FRAMESET:
3134
        /* A comment token */
3135
        if($token['type'] === HTML5_Tokenizer::COMMENT) {
3136
            /* Append a Comment node to the Document object with the data
3137
            attribute set to the data given in the comment token. */
3138
            // XDOM
3139
            $comment = $this->dom->createComment($token['data']);
3140
            $this->dom->appendChild($comment);
3141
3142 View Code Duplication
        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3143
        $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3144
        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3145
            $this->processWithRulesFor($token, self::IN_BODY);
3146
3147
        /* An end-of-file token */
3148
        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
0 ignored issues
show
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3149
            /* OMG DONE!! */
3150
        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
3151
            $this->processWithRulesFor($token, self::IN_HEAD);
3152
        } else {
0 ignored issues
show
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
3153
            // parse error
3154
        }
3155
    break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3156
    }
3157
        // end funky indenting
3158
        }
3159
3160
    private function insertElement($token, $append = true) {
3161
        $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
3162
3163
        if (!empty($token['attr'])) {
3164
            foreach($token['attr'] as $attr) {
3165
                if(!$el->hasAttribute($attr['name'])) {
3166
                    $el->setAttribute($attr['name'], $attr['value']);
3167
                }
3168
            }
3169
        }
3170
        if ($append) {
3171
            $this->appendToRealParent($el);
3172
            $this->stack[] = $el;
3173
        }
3174
3175
        return $el;
3176
    }
3177
3178
    private function insertText($data) {
3179
        if ($data === '') return;
3180
        if ($this->ignore_lf_token) {
3181
            if ($data[0] === "\n") {
3182
                $data = substr($data, 1);
3183
                if ($data === false) return;
3184
            }
3185
        }
3186
        $text = $this->dom->createTextNode($data);
3187
        $this->appendToRealParent($text);
3188
    }
3189
3190
    private function insertComment($data) {
3191
        $comment = $this->dom->createComment($data);
3192
        $this->appendToRealParent($comment);
3193
    }
3194
3195
    private function appendToRealParent($node) {
3196
        // this is only for the foster_parent case
3197
        /* If the current node is a table, tbody, tfoot, thead, or tr
3198
        element, then, whenever a node would be inserted into the current
3199
        node, it must instead be inserted into the foster parent element. */
3200
        if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
3201
        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3202
            end($this->stack)->appendChild($node);
3203
        } else {
3204
            $this->fosterParent($node);
3205
        }
3206
    }
3207
3208
    private function elementInScope($el, $scope = self::SCOPE) {
3209
        if(is_array($el)) {
3210
            foreach($el as $element) {
3211
                if($this->elementInScope($element, $scope)) {
3212
                    return true;
3213
                }
3214
            }
3215
3216
            return false;
3217
        }
3218
3219
        $leng = count($this->stack);
3220
3221
        for($n = 0; $n < $leng; $n++) {
3222
            /* 1. Initialise node to be the current node (the bottommost node of
3223
            the stack). */
3224
            $node = $this->stack[$leng - 1 - $n];
3225
3226
            if($node->tagName === $el) {
3227
                /* 2. If node is the target node, terminate in a match state. */
3228
                return true;
3229
3230
                // We've expanded the logic for these states a little differently;
3231
                // Hixie's refactoring into "specific scope" is more general, but
3232
                // this "gets the job done"
3233
3234
            // these are the common states for all scopes
3235
            } elseif($node->tagName === 'table' || $node->tagName === 'html') {
3236
                return false;
3237
3238
            // these are valid for "in scope" and "in list item scope"
3239
            } elseif($scope !== self::SCOPE_TABLE &&
3240
            (in_array($node->tagName, array('applet', 'caption', 'td',
3241
                'th', 'button', 'marquee', 'object')) ||
3242
                $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
3243
                return false;
3244
3245
3246
            // these are valid for "in list item scope"
3247
            } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) {
3248
                return false;
3249
            }
3250
3251
            /* Otherwise, set node to the previous entry in the stack of open
3252
            elements and return to step 2. (This will never fail, since the loop
3253
            will always terminate in the previous step if the top of the stack
3254
            is reached.) */
3255
        }
3256
    }
3257
3258
    private function reconstructActiveFormattingElements() {
3259
        /* 1. If there are no entries in the list of active formatting elements,
3260
        then there is nothing to reconstruct; stop this algorithm. */
3261
        $formatting_elements = count($this->a_formatting);
3262
3263
        if($formatting_elements === 0) {
3264
            return false;
3265
        }
3266
3267
        /* 3. Let entry be the last (most recently added) element in the list
3268
        of active formatting elements. */
3269
        $entry = end($this->a_formatting);
3270
3271
        /* 2. If the last (most recently added) entry in the list of active
3272
        formatting elements is a marker, or if it is an element that is in the
3273
        stack of open elements, then there is nothing to reconstruct; stop this
3274
        algorithm. */
3275
        if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3276
            return false;
3277
        }
3278
3279
        for($a = $formatting_elements - 1; $a >= 0; true) {
3280
            /* 4. If there are no entries before entry in the list of active
3281
            formatting elements, then jump to step 8. */
3282
            if($a === 0) {
3283
                $step_seven = false;
3284
                break;
3285
            }
3286
3287
            /* 5. Let entry be the entry one earlier than entry in the list of
3288
            active formatting elements. */
3289
            $a--;
3290
            $entry = $this->a_formatting[$a];
3291
3292
            /* 6. If entry is neither a marker nor an element that is also in
3293
            thetack of open elements, go to step 4. */
3294
            if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3295
                break;
3296
            }
3297
        }
3298
3299
        while(true) {
3300
            /* 7. Let entry be the element one later than entry in the list of
3301
            active formatting elements. */
3302
            if(isset($step_seven) && $step_seven === true) {
3303
                $a++;
3304
                $entry = $this->a_formatting[$a];
3305
            }
3306
3307
            /* 8. Perform a shallow clone of the element entry to obtain clone. */
3308
            $clone = $entry->cloneNode();
3309
3310
            /* 9. Append clone to the current node and push it onto the stack
3311
            of open elements  so that it is the new current node. */
3312
            $this->appendToRealParent($clone);
3313
            $this->stack[] = $clone;
3314
3315
            /* 10. Replace the entry for entry in the list with an entry for
3316
            clone. */
3317
            $this->a_formatting[$a] = $clone;
3318
3319
            /* 11. If the entry for clone in the list of active formatting
3320
            elements is not the last entry in the list, return to step 7. */
3321
            if(end($this->a_formatting) !== $clone) {
3322
                $step_seven = true;
3323
            } else {
3324
                break;
3325
            }
3326
        }
3327
    }
3328
3329
    private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3330
        /* When the steps below require the UA to clear the list of active
3331
        formatting elements up to the last marker, the UA must perform the
3332
        following steps: */
3333
3334
        while(true) {
3335
            /* 1. Let entry be the last (most recently added) entry in the list
3336
            of active formatting elements. */
3337
            $entry = end($this->a_formatting);
3338
3339
            /* 2. Remove entry from the list of active formatting elements. */
3340
            array_pop($this->a_formatting);
3341
3342
            /* 3. If entry was a marker, then stop the algorithm at this point.
3343
            The list has been cleared up to the last marker. */
3344
            if($entry === self::MARKER) {
3345
                break;
3346
            }
3347
        }
3348
    }
3349
3350
    private function generateImpliedEndTags($exclude = array()) {
3351
        /* When the steps below require the UA to generate implied end tags, 
3352
         * then, while the current node is a dc element, a dd element, a ds 
3353
         * element, a dt element, an li element, an option element, an optgroup 
3354
         * element, a p element, an rp element, or an rt element, the UA must 
3355
         * pop the current node off the stack of open elements. */
3356
        $node = end($this->stack);
0 ignored issues
show
$node is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
3357
        $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3358
3359
        while(in_array(end($this->stack)->tagName, $elements)) {
3360
            array_pop($this->stack);
3361
        }
3362
    }
3363
3364
    private function getElementCategory($node) {
3365
        if (!is_object($node)) debug_print_backtrace();
3366
        $name = $node->tagName;
3367
        if(in_array($name, $this->special))
3368
            return self::SPECIAL;
3369
3370
        elseif(in_array($name, $this->scoping))
3371
            return self::SCOPING;
3372
3373
        elseif(in_array($name, $this->formatting))
3374
            return self::FORMATTING;
3375
3376
        else
3377
            return self::PHRASING;
3378
    }
3379
3380
    private function clearStackToTableContext($elements) {
3381
        /* When the steps above require the UA to clear the stack back to a
3382
        table context, it means that the UA must, while the current node is not
3383
        a table element or an html element, pop elements from the stack of open
3384
        elements. */
3385
        while(true) {
3386
            $name = end($this->stack)->tagName;
3387
3388
            if(in_array($name, $elements)) {
3389
                break;
3390
            } else {
3391
                array_pop($this->stack);
3392
            }
3393
        }
3394
    }
3395
3396
    private function resetInsertionMode() {
3397
        /* 1. Let last be false. */
3398
        $last = false;
3399
        $leng = count($this->stack);
3400
3401
        for($n = $leng - 1; $n >= 0; $n--) {
3402
            /* 2. Let node be the last node in the stack of open elements. */
3403
            $node = $this->stack[$n];
3404
3405
            /* 3. If node is the first node in the stack of open elements, then 
3406
             * set last to true and set node to the context  element. (fragment 
3407
             * case) */
3408
            if($this->stack[0]->isSameNode($node)) {
3409
                $last = true;
3410
                $node = $this->context;
3411
            }
3412
3413
            /* 4. If node is a select element, then switch the insertion mode to
3414
            "in select" and abort these steps. (fragment case) */
3415
            if($node->tagName === 'select') {
3416
                $this->mode = self::IN_SELECT;
3417
                break;
3418
3419
            /* 5. If node is a td or th element, then switch the insertion mode
3420
            to "in cell" and abort these steps. */
3421
            } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
3422
                $this->mode = self::IN_CELL;
3423
                break;
3424
3425
            /* 6. If node is a tr element, then switch the insertion mode to
3426
            "in    row" and abort these steps. */
3427
            } elseif($node->tagName === 'tr') {
3428
                $this->mode = self::IN_ROW;
3429
                break;
3430
3431
            /* 7. If node is a tbody, thead, or tfoot element, then switch the
3432
            insertion mode to "in table body" and abort these steps. */
3433
            } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
3434
                $this->mode = self::IN_TABLE_BODY;
3435
                break;
3436
3437
            /* 8. If node is a caption element, then switch the insertion mode
3438
            to "in caption" and abort these steps. */
3439
            } elseif($node->tagName === 'caption') {
3440
                $this->mode = self::IN_CAPTION;
3441
                break;
3442
3443
            /* 9. If node is a colgroup element, then switch the insertion mode
3444
            to "in column group" and abort these steps. (innerHTML case) */
3445
            } elseif($node->tagName === 'colgroup') {
3446
                $this->mode = self::IN_COLUMN_GROUP;
3447
                break;
3448
3449
            /* 10. If node is a table element, then switch the insertion mode
3450
            to "in table" and abort these steps. */
3451
            } elseif($node->tagName === 'table') {
3452
                $this->mode = self::IN_TABLE;
3453
                break;
3454
3455
            /* 11. If node is an element from the MathML namespace or the SVG 
3456
             * namespace, then switch the insertion mode to "in foreign 
3457
             * content", let the secondary insertion mode be "in body", and 
3458
             * abort these steps. */
3459
            } elseif($node->namespaceURI === self::NS_SVG ||
3460
            $node->namespaceURI === self::NS_MATHML) {
3461
                $this->mode = self::IN_FOREIGN_CONTENT;
3462
                $this->secondary_mode = self::IN_BODY;
3463
                break;
3464
3465
            /* 12. If node is a head element, then switch the insertion mode
3466
            to "in body" ("in body"! not "in head"!) and abort these steps.
3467
            (fragment case) */
3468
            } elseif($node->tagName === 'head') {
3469
                $this->mode = self::IN_BODY;
3470
                break;
3471
3472
            /* 13. If node is a body element, then switch the insertion mode to
3473
            "in body" and abort these steps. */
3474
            } elseif($node->tagName === 'body') {
3475
                $this->mode = self::IN_BODY;
3476
                break;
3477
3478
            /* 14. If node is a frameset element, then switch the insertion
3479
            mode to "in frameset" and abort these steps. (fragment case) */
3480
            } elseif($node->tagName === 'frameset') {
3481
                $this->mode = self::IN_FRAMESET;
3482
                break;
3483
3484
            /* 15. If node is an html element, then: if the head element
3485
            pointer is null, switch the insertion mode to "before head",
3486
            otherwise, switch the insertion mode to "after head". In either
3487
            case, abort these steps. (fragment case) */
3488
            } elseif($node->tagName === 'html') {
3489
                $this->mode = ($this->head_pointer === null)
3490
                    ? self::BEFORE_HEAD
3491
                    : self::AFTER_HEAD;
3492
3493
                break;
3494
3495
            /* 16. If last is true, then set the insertion mode to "in body"
3496
            and    abort these steps. (fragment case) */
3497
            } elseif($last) {
3498
                $this->mode = self::IN_BODY;
3499
                break;
3500
            }
3501
        }
3502
    }
3503
3504
    private function closeCell() {
3505
        /* If the stack of open elements has a td or th element in table scope,
3506
        then act as if an end tag token with that tag name had been seen. */
3507
        foreach(array('td', 'th') as $cell) {
3508
            if($this->elementInScope($cell, self::SCOPE_TABLE)) {
3509
                $this->emitToken(array(
3510
                    'name' => $cell,
3511
                    'type' => HTML5_Tokenizer::ENDTAG
3512
                ));
3513
3514
                break;
3515
            }
3516
        }
3517
    }
3518
3519
    private function processWithRulesFor($token, $mode) {
3520
        /* "using the rules for the m insertion mode", where m is one of these
3521
         * modes, the user agent must use the rules described under the m
3522
         * insertion mode's section, but must leave the insertion mode
3523
         * unchanged unless the rules in m themselves switch the insertion mode
3524
         * to a new value. */
3525
        return $this->emitToken($token, $mode);
3526
    }
3527
3528 View Code Duplication
    private function insertCDATAElement($token) {
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3529
        $this->insertElement($token);
3530
        $this->original_mode = $this->mode;
3531
        $this->mode = self::IN_CDATA_RCDATA;
3532
        $this->content_model = HTML5_Tokenizer::CDATA;
3533
    }
3534
3535 View Code Duplication
    private function insertRCDATAElement($token) {
0 ignored issues
show
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3536
        $this->insertElement($token);
3537
        $this->original_mode = $this->mode;
3538
        $this->mode = self::IN_CDATA_RCDATA;
3539
        $this->content_model = HTML5_Tokenizer::RCDATA;
3540
    }
3541
3542
    private function getAttr($token, $key) {
3543
        if (!isset($token['attr'])) return false;
3544
        $ret = false;
3545
        foreach ($token['attr'] as $keypair) {
3546
            if ($keypair['name'] === $key) $ret = $keypair['value'];
3547
        }
3548
        return $ret;
3549
    }
3550
3551
    private function getCurrentTable() {
3552
        /* The current table is the last table  element in the stack of open 
3553
         * elements, if there is one. If there is no table element in the stack 
3554
         * of open elements (fragment case), then the current table is the 
3555
         * first element in the stack of open elements (the html element). */
3556 View Code Duplication
        for ($i = count($this->stack) - 1; $i >= 0; $i--) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3557
            if ($this->stack[$i]->tagName === 'table') {
3558
                return $this->stack[$i];
3559
            }
3560
        }
3561
        return $this->stack[0];
3562
    }
3563
3564
    private function getFosterParent() {
3565
        /* The foster parent element is the parent element of the last
3566
        table element in the stack of open elements, if there is a
3567
        table element and it has such a parent element. If there is no
3568
        table element in the stack of open elements (innerHTML case),
3569
        then the foster parent element is the first element in the
3570
        stack of open elements (the html  element). Otherwise, if there
3571
        is a table element in the stack of open elements, but the last
3572
        table element in the stack of open elements has no parent, or
3573
        its parent node is not an element, then the foster parent
3574
        element is the element before the last table element in the
3575
        stack of open elements. */
3576 View Code Duplication
        for($n = count($this->stack) - 1; $n >= 0; $n--) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3577
            if($this->stack[$n]->tagName === 'table') {
3578
                $table = $this->stack[$n];
3579
                break;
3580
            }
3581
        }
3582
3583
        if(isset($table) && $table->parentNode !== null) {
3584
            return $table->parentNode;
3585
3586
        } elseif(!isset($table)) {
3587
            return $this->stack[0];
3588
3589
        } elseif(isset($table) && ($table->parentNode === null ||
3590
        $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
3591
            return $this->stack[$n - 1];
3592
        }
3593
    }
3594
3595
    public function fosterParent($node) {
3596
        $foster_parent = $this->getFosterParent();
3597
        $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3598
        /* When a node node is to be foster parented, the node node must be
3599
         * be inserted into the foster parent element. */
3600
        /* If the foster parent element is the parent element of the last table 
3601
         * element in the stack of open elements, then node must be inserted 
3602
         * immediately before the last table element in the stack of open 
3603
         * elements in the foster parent element; otherwise, node must be 
3604
         * appended to the foster parent element. */
3605
        if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
3606
            $foster_parent->insertBefore($node, $table);
3607
        } else {
3608
            $foster_parent->appendChild($node);
3609
        }
3610
    }
3611
3612
    /**
3613
     * For debugging, prints the stack
3614
     */
3615
    private function printStack() {
0 ignored issues
show
This method is not used, and could be removed.
Loading history...
3616
        $names = array();
3617
        foreach ($this->stack as $i => $element) {
3618
            $names[] = $element->tagName;
3619
        }
3620
        echo "  -> stack [" . implode(', ', $names) . "]\n";
3621
    }
3622
3623
    /**
3624
     * For debugging, prints active formatting elements
3625
     */
3626
    private function printActiveFormattingElements() {
0 ignored issues
show
This method is not used, and could be removed.
Loading history...
3627
        if (!$this->a_formatting) return;
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->a_formatting of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
3628
        $names = array();
3629
        foreach ($this->a_formatting as $node) {
3630
            if ($node === self::MARKER) $names[] = 'MARKER';
3631
            else $names[] = $node->tagName;
3632
        }
3633
        echo "  -> active formatting [" . implode(', ', $names) . "]\n";
3634
    }
3635
3636
    public function currentTableIsTainted() {
3637
        return !empty($this->getCurrentTable()->tainted);
3638
    }
3639
3640
    /**
3641
     * Sets up the tree constructor for building a fragment.
3642
     */
3643
    public function setupContext($context = null) {
3644
        $this->fragment = true;
3645
        if ($context) {
3646
            $this->context = $this->dom->createElementNS(self::NS_HTML, $context);
3647
            /* 4.1. Set the HTML parser's tokenization  stage's content model
3648
             * flag according to the context element, as follows: */
3649
            switch ($this->context->tagName) {
3650
            case 'title': case 'textarea':
3651
                $this->content_model = HTML5_Tokenizer::RCDATA;
3652
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3653
            case 'style': case 'script': case 'xmp': case 'iframe':
3654
            case 'noembed': case 'noframes':
3655
                $this->content_model = HTML5_Tokenizer::CDATA;
3656
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3657
            case 'noscript':
3658
                // XSCRIPT: assuming scripting is enabled
3659
                $this->content_model = HTML5_Tokenizer::CDATA;
3660
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3661
            case 'plaintext':
3662
                $this->content_model = HTML5_Tokenizer::PLAINTEXT;
3663
                break;
0 ignored issues
show
Terminating statement must be indented to the same level as the CASE body
Loading history...
3664
            }
3665
            /* 4.2. Let root be a new html element with no attributes. */
3666
            $root = $this->dom->createElementNS(self::NS_HTML, 'html');
3667
            $this->root = $root;
3668
            /* 4.3 Append the element root to the Document node created above. */
3669
            $this->dom->appendChild($root);
3670
            /* 4.4 Set up the parser's stack of open elements so that it 
3671
             * contains just the single element root. */
3672
            $this->stack = array($root);
3673
            /* 4.5 Reset the parser's insertion mode appropriately. */
3674
            $this->resetInsertionMode();
3675
            /* 4.6 Set the parser's form element pointer  to the nearest node 
3676
             * to the context element that is a form element (going straight up 
3677
             * the ancestor chain, and including the element itself, if it is a 
3678
             * form element), or, if there is no such form element, to null. */
3679
            $node = $this->context;
3680
            do {
3681
                if ($node->tagName === 'form') {
0 ignored issues
show
The property tagName does not seem to exist in DOMNode.

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
3682
                    $this->form_pointer = $node;
3683
                    break;
3684
                }
3685
            } while ($node = $node->parentNode);
3686
        }
3687
    }
3688
3689
    public function adjustMathMLAttributes($token) {
3690
        foreach ($token['attr'] as &$kp) {
3691
            if ($kp['name'] === 'definitionurl') {
3692
                $kp['name'] = 'definitionURL';
3693
            }
3694
        }
3695
        return $token;
3696
    }
3697
3698
    public function adjustSVGAttributes($token) {
3699
        static $lookup = array(
3700
            'attributename' => 'attributeName',
3701
            'attributetype' => 'attributeType',
3702
            'basefrequency' => 'baseFrequency',
3703
            'baseprofile' => 'baseProfile',
3704
            'calcmode' => 'calcMode',
3705
            'clippathunits' => 'clipPathUnits',
3706
            'contentscripttype' => 'contentScriptType',
3707
            'contentstyletype' => 'contentStyleType',
3708
            'diffuseconstant' => 'diffuseConstant',
3709
            'edgemode' => 'edgeMode',
3710
            'externalresourcesrequired' => 'externalResourcesRequired',
3711
            'filterres' => 'filterRes',
3712
            'filterunits' => 'filterUnits',
3713
            'glyphref' => 'glyphRef',
3714
            'gradienttransform' => 'gradientTransform',
3715
            'gradientunits' => 'gradientUnits',
3716
            'kernelmatrix' => 'kernelMatrix',
3717
            'kernelunitlength' => 'kernelUnitLength',
3718
            'keypoints' => 'keyPoints',
3719
            'keysplines' => 'keySplines',
3720
            'keytimes' => 'keyTimes',
3721
            'lengthadjust' => 'lengthAdjust',
3722
            'limitingconeangle' => 'limitingConeAngle',
3723
            'markerheight' => 'markerHeight',
3724
            'markerunits' => 'markerUnits',
3725
            'markerwidth' => 'markerWidth',
3726
            'maskcontentunits' => 'maskContentUnits',
3727
            'maskunits' => 'maskUnits',
3728
            'numoctaves' => 'numOctaves',
3729
            'pathlength' => 'pathLength',
3730
            'patterncontentunits' => 'patternContentUnits',
3731
            'patterntransform' => 'patternTransform',
3732
            'patternunits' => 'patternUnits',
3733
            'pointsatx' => 'pointsAtX',
3734
            'pointsaty' => 'pointsAtY',
3735
            'pointsatz' => 'pointsAtZ',
3736
            'preservealpha' => 'preserveAlpha',
3737
            'preserveaspectratio' => 'preserveAspectRatio',
3738
            'primitiveunits' => 'primitiveUnits',
3739
            'refx' => 'refX',
3740
            'refy' => 'refY',
3741
            'repeatcount' => 'repeatCount',
3742
            'repeatdur' => 'repeatDur',
3743
            'requiredextensions' => 'requiredExtensions',
3744
            'requiredfeatures' => 'requiredFeatures',
3745
            'specularconstant' => 'specularConstant',
3746
            'specularexponent' => 'specularExponent',
3747
            'spreadmethod' => 'spreadMethod',
3748
            'startoffset' => 'startOffset',
3749
            'stddeviation' => 'stdDeviation',
3750
            'stitchtiles' => 'stitchTiles',
3751
            'surfacescale' => 'surfaceScale',
3752
            'systemlanguage' => 'systemLanguage',
3753
            'tablevalues' => 'tableValues',
3754
            'targetx' => 'targetX',
3755
            'targety' => 'targetY',
3756
            'textlength' => 'textLength',
3757
            'viewbox' => 'viewBox',
3758
            'viewtarget' => 'viewTarget',
3759
            'xchannelselector' => 'xChannelSelector',
3760
            'ychannelselector' => 'yChannelSelector',
3761
            'zoomandpan' => 'zoomAndPan',
3762
        );
3763 View Code Duplication
        foreach ($token['attr'] as &$kp) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3764
            if (isset($lookup[$kp['name']])) {
3765
                $kp['name'] = $lookup[$kp['name']];
3766
            }
3767
        }
3768
        return $token;
3769
    }
3770
3771
    public function adjustForeignAttributes($token) {
3772
        static $lookup = array(
3773
            'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
3774
            'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
3775
            'xlink:href' => array('xlink', 'href', self::NS_XLINK),
3776
            'xlink:role' => array('xlink', 'role', self::NS_XLINK),
3777
            'xlink:show' => array('xlink', 'show', self::NS_XLINK),
3778
            'xlink:title' => array('xlink', 'title', self::NS_XLINK),
3779
            'xlink:type' => array('xlink', 'type', self::NS_XLINK),
3780
            'xml:base' => array('xml', 'base', self::NS_XML),
3781
            'xml:lang' => array('xml', 'lang', self::NS_XML),
3782
            'xml:space' => array('xml', 'space', self::NS_XML),
3783
            'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
3784
            'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
3785
        );
3786 View Code Duplication
        foreach ($token['attr'] as &$kp) {
0 ignored issues
show
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3787
            if (isset($lookup[$kp['name']])) {
3788
                $kp['name'] = $lookup[$kp['name']];
3789
            }
3790
        }
3791
        return $token;
3792
    }
3793
3794
    public function insertForeignElement($token, $namespaceURI) {
3795
        $el = $this->dom->createElementNS($namespaceURI, $token['name']);
3796
        if (!empty($token['attr'])) {
3797
            foreach ($token['attr'] as $kp) {
3798
                $attr = $kp['name'];
3799
                if (is_array($attr)) {
3800
                    $ns = $attr[2];
3801
                    $attr = $attr[1];
3802
                } else {
3803
                    $ns = self::NS_HTML;
3804
                }
3805
                if (!$el->hasAttributeNS($ns, $attr)) {
3806
                    // XSKETCHY: work around godawful libxml bug
3807
                    if ($ns === self::NS_XLINK) {
3808
                        $el->setAttribute('xlink:'.$attr, $kp['value']);
3809
                    } elseif ($ns === self::NS_HTML) {
3810
                        // Another godawful libxml bug
3811
                        $el->setAttribute($attr, $kp['value']);
3812
                    } else {
3813
                        $el->setAttributeNS($ns, $attr, $kp['value']);
3814
                    }
3815
                }
3816
            }
3817
        }
3818
        $this->appendToRealParent($el);
3819
        $this->stack[] = $el;
3820
        // XERROR: see below
3821
        /* If the newly created element has an xmlns attribute in the XMLNS 
3822
         * namespace  whose value is not exactly the same as the element's 
3823
         * namespace, that is a parse error. Similarly, if the newly created 
3824
         * element has an xmlns:xlink attribute in the XMLNS namespace whose 
3825
         * value is not the XLink Namespace, that is a parse error. */
3826
    }
3827
3828
    public function save() {
3829
        $this->dom->normalize();
3830
        if (!$this->fragment) {
3831
            return $this->dom;
3832
        } else {
3833
            if ($this->root) {
3834
                return $this->root->childNodes;
3835
            } else {
3836
                return $this->dom->childNodes;
3837
            }
3838
        }
3839
    }
3840
}
3841
3842