Completed
Pull Request — master (#133)
by Goffy
16:45
created

HTML5::entityDataState()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 18
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 8
nc 2
nop 0
dl 0
loc 18
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
5
 * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.
6
 *
7
 * @note
8
 *    Recent changes to PHP's DOM extension have resulted in some fatal
9
 *    error conditions with the original version of PH5P. Pending changes,
10
 *    this lexer will punt to DirectLex if DOM throws an exception.
11
 */
12
13
class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
14
{
15
    /**
16
     * @param string $html
17
     * @param HTMLPurifier_Config $config
18
     * @param HTMLPurifier_Context $context
19
     * @return HTMLPurifier_Token[]
20
     */
21
    public function tokenizeHTML($html, $config, $context)
22
    {
23
        $new_html = $this->normalize($html, $config, $context);
24
        $new_html = $this->wrapHTML($new_html, $config, $context);
25
        try {
26
            $parser = new HTML5($new_html);
27
            $doc = $parser->save();
28
        } catch (DOMException $e) {
29
            // Uh oh, it failed. Punt to DirectLex.
30
            $lexer = new HTMLPurifier_Lexer_DirectLex();
31
            $context->register('PH5PError', $e); // save the error, so we can detect it
32
            return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
33
        }
34
        $tokens = array();
35
        $this->tokenizeDOM(
36
            $doc->getElementsByTagName('html')->item(0)-> // <html>
37
                getElementsByTagName('body')->item(0) //   <body>
38
            ,
39
            $tokens
40
        );
41
        return $tokens;
42
    }
43
}
44
45
/*
46
47
Copyright 2007 Jeroen van der Meer <http://jero.net/>
48
49
Permission is hereby granted, free of charge, to any person obtaining a
50
copy of this software and associated documentation files (the
51
"Software"), to deal in the Software without restriction, including
52
without limitation the rights to use, copy, modify, merge, publish,
53
distribute, sublicense, and/or sell copies of the Software, and to
54
permit persons to whom the Software is furnished to do so, subject to
55
the following conditions:
56
57
The above copyright notice and this permission notice shall be included
58
in all copies or substantial portions of the Software.
59
60
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
61
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
62
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
63
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
64
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
65
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
66
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
67
68
*/
69
70
class HTML5
0 ignored issues
show
Coding Style Compatibility introduced by
PSR1 recommends that each class should be in its own file to aid autoloaders.

Having each class in a dedicated file usually plays nice with PSR autoloaders and is therefore a well established practice. If you use other autoloaders, you might not want to follow this rule.

Loading history...
71
{
72
    private $data;
73
    private $char;
74
    private $EOF;
75
    private $state;
76
    private $tree;
77
    private $token;
78
    private $content_model;
79
    private $escape = false;
80
    private $entities = array(
81
        'AElig;',
82
        'AElig',
83
        'AMP;',
84
        'AMP',
85
        'Aacute;',
86
        'Aacute',
87
        'Acirc;',
88
        'Acirc',
89
        'Agrave;',
90
        'Agrave',
91
        'Alpha;',
92
        'Aring;',
93
        'Aring',
94
        'Atilde;',
95
        'Atilde',
96
        'Auml;',
97
        'Auml',
98
        'Beta;',
99
        'COPY;',
100
        'COPY',
101
        'Ccedil;',
102
        'Ccedil',
103
        'Chi;',
104
        'Dagger;',
105
        'Delta;',
106
        'ETH;',
107
        'ETH',
108
        'Eacute;',
109
        'Eacute',
110
        'Ecirc;',
111
        'Ecirc',
112
        'Egrave;',
113
        'Egrave',
114
        'Epsilon;',
115
        'Eta;',
116
        'Euml;',
117
        'Euml',
118
        'GT;',
119
        'GT',
120
        'Gamma;',
121
        'Iacute;',
122
        'Iacute',
123
        'Icirc;',
124
        'Icirc',
125
        'Igrave;',
126
        'Igrave',
127
        'Iota;',
128
        'Iuml;',
129
        'Iuml',
130
        'Kappa;',
131
        'LT;',
132
        'LT',
133
        'Lambda;',
134
        'Mu;',
135
        'Ntilde;',
136
        'Ntilde',
137
        'Nu;',
138
        'OElig;',
139
        'Oacute;',
140
        'Oacute',
141
        'Ocirc;',
142
        'Ocirc',
143
        'Ograve;',
144
        'Ograve',
145
        'Omega;',
146
        'Omicron;',
147
        'Oslash;',
148
        'Oslash',
149
        'Otilde;',
150
        'Otilde',
151
        'Ouml;',
152
        'Ouml',
153
        'Phi;',
154
        'Pi;',
155
        'Prime;',
156
        'Psi;',
157
        'QUOT;',
158
        'QUOT',
159
        'REG;',
160
        'REG',
161
        'Rho;',
162
        'Scaron;',
163
        'Sigma;',
164
        'THORN;',
165
        'THORN',
166
        'TRADE;',
167
        'Tau;',
168
        'Theta;',
169
        'Uacute;',
170
        'Uacute',
171
        'Ucirc;',
172
        'Ucirc',
173
        'Ugrave;',
174
        'Ugrave',
175
        'Upsilon;',
176
        'Uuml;',
177
        'Uuml',
178
        'Xi;',
179
        'Yacute;',
180
        'Yacute',
181
        'Yuml;',
182
        'Zeta;',
183
        'aacute;',
184
        'aacute',
185
        'acirc;',
186
        'acirc',
187
        'acute;',
188
        'acute',
189
        'aelig;',
190
        'aelig',
191
        'agrave;',
192
        'agrave',
193
        'alefsym;',
194
        'alpha;',
195
        'amp;',
196
        'amp',
197
        'and;',
198
        'ang;',
199
        'apos;',
200
        'aring;',
201
        'aring',
202
        'asymp;',
203
        'atilde;',
204
        'atilde',
205
        'auml;',
206
        'auml',
207
        'bdquo;',
208
        'beta;',
209
        'brvbar;',
210
        'brvbar',
211
        'bull;',
212
        'cap;',
213
        'ccedil;',
214
        'ccedil',
215
        'cedil;',
216
        'cedil',
217
        'cent;',
218
        'cent',
219
        'chi;',
220
        'circ;',
221
        'clubs;',
222
        'cong;',
223
        'copy;',
224
        'copy',
225
        'crarr;',
226
        'cup;',
227
        'curren;',
228
        'curren',
229
        'dArr;',
230
        'dagger;',
231
        'darr;',
232
        'deg;',
233
        'deg',
234
        'delta;',
235
        'diams;',
236
        'divide;',
237
        'divide',
238
        'eacute;',
239
        'eacute',
240
        'ecirc;',
241
        'ecirc',
242
        'egrave;',
243
        'egrave',
244
        'empty;',
245
        'emsp;',
246
        'ensp;',
247
        'epsilon;',
248
        'equiv;',
249
        'eta;',
250
        'eth;',
251
        'eth',
252
        'euml;',
253
        'euml',
254
        'euro;',
255
        'exist;',
256
        'fnof;',
257
        'forall;',
258
        'frac12;',
259
        'frac12',
260
        'frac14;',
261
        'frac14',
262
        'frac34;',
263
        'frac34',
264
        'frasl;',
265
        'gamma;',
266
        'ge;',
267
        'gt;',
268
        'gt',
269
        'hArr;',
270
        'harr;',
271
        'hearts;',
272
        'hellip;',
273
        'iacute;',
274
        'iacute',
275
        'icirc;',
276
        'icirc',
277
        'iexcl;',
278
        'iexcl',
279
        'igrave;',
280
        'igrave',
281
        'image;',
282
        'infin;',
283
        'int;',
284
        'iota;',
285
        'iquest;',
286
        'iquest',
287
        'isin;',
288
        'iuml;',
289
        'iuml',
290
        'kappa;',
291
        'lArr;',
292
        'lambda;',
293
        'lang;',
294
        'laquo;',
295
        'laquo',
296
        'larr;',
297
        'lceil;',
298
        'ldquo;',
299
        'le;',
300
        'lfloor;',
301
        'lowast;',
302
        'loz;',
303
        'lrm;',
304
        'lsaquo;',
305
        'lsquo;',
306
        'lt;',
307
        'lt',
308
        'macr;',
309
        'macr',
310
        'mdash;',
311
        'micro;',
312
        'micro',
313
        'middot;',
314
        'middot',
315
        'minus;',
316
        'mu;',
317
        'nabla;',
318
        'nbsp;',
319
        'nbsp',
320
        'ndash;',
321
        'ne;',
322
        'ni;',
323
        'not;',
324
        'not',
325
        'notin;',
326
        'nsub;',
327
        'ntilde;',
328
        'ntilde',
329
        'nu;',
330
        'oacute;',
331
        'oacute',
332
        'ocirc;',
333
        'ocirc',
334
        'oelig;',
335
        'ograve;',
336
        'ograve',
337
        'oline;',
338
        'omega;',
339
        'omicron;',
340
        'oplus;',
341
        'or;',
342
        'ordf;',
343
        'ordf',
344
        'ordm;',
345
        'ordm',
346
        'oslash;',
347
        'oslash',
348
        'otilde;',
349
        'otilde',
350
        'otimes;',
351
        'ouml;',
352
        'ouml',
353
        'para;',
354
        'para',
355
        'part;',
356
        'permil;',
357
        'perp;',
358
        'phi;',
359
        'pi;',
360
        'piv;',
361
        'plusmn;',
362
        'plusmn',
363
        'pound;',
364
        'pound',
365
        'prime;',
366
        'prod;',
367
        'prop;',
368
        'psi;',
369
        'quot;',
370
        'quot',
371
        'rArr;',
372
        'radic;',
373
        'rang;',
374
        'raquo;',
375
        'raquo',
376
        'rarr;',
377
        'rceil;',
378
        'rdquo;',
379
        'real;',
380
        'reg;',
381
        'reg',
382
        'rfloor;',
383
        'rho;',
384
        'rlm;',
385
        'rsaquo;',
386
        'rsquo;',
387
        'sbquo;',
388
        'scaron;',
389
        'sdot;',
390
        'sect;',
391
        'sect',
392
        'shy;',
393
        'shy',
394
        'sigma;',
395
        'sigmaf;',
396
        'sim;',
397
        'spades;',
398
        'sub;',
399
        'sube;',
400
        'sum;',
401
        'sup1;',
402
        'sup1',
403
        'sup2;',
404
        'sup2',
405
        'sup3;',
406
        'sup3',
407
        'sup;',
408
        'supe;',
409
        'szlig;',
410
        'szlig',
411
        'tau;',
412
        'there4;',
413
        'theta;',
414
        'thetasym;',
415
        'thinsp;',
416
        'thorn;',
417
        'thorn',
418
        'tilde;',
419
        'times;',
420
        'times',
421
        'trade;',
422
        'uArr;',
423
        'uacute;',
424
        'uacute',
425
        'uarr;',
426
        'ucirc;',
427
        'ucirc',
428
        'ugrave;',
429
        'ugrave',
430
        'uml;',
431
        'uml',
432
        'upsih;',
433
        'upsilon;',
434
        'uuml;',
435
        'uuml',
436
        'weierp;',
437
        'xi;',
438
        'yacute;',
439
        'yacute',
440
        'yen;',
441
        'yen',
442
        'yuml;',
443
        'yuml',
444
        'zeta;',
445
        'zwj;',
446
        'zwnj;'
447
    );
448
449
    const PCDATA = 0;
450
    const RCDATA = 1;
451
    const CDATA = 2;
452
    const PLAINTEXT = 3;
453
454
    const DOCTYPE = 0;
455
    const STARTTAG = 1;
456
    const ENDTAG = 2;
457
    const COMMENT = 3;
458
    const CHARACTR = 4;
459
    const EOF = 5;
460
461
    public function __construct($data)
462
    {
463
        $this->data = $data;
464
        $this->char = -1;
465
        $this->EOF = strlen($data);
466
        $this->tree = new HTML5TreeConstructer;
467
        $this->content_model = self::PCDATA;
468
469
        $this->state = 'data';
470
471
        while ($this->state !== null) {
472
            $this->{$this->state . 'State'}();
473
        }
474
    }
475
476
    public function save()
477
    {
478
        return $this->tree->save();
479
    }
480
481
    private function char()
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
482
    {
483
        return ($this->char < $this->EOF)
484
            ? $this->data[$this->char]
485
            : false;
486
    }
487
488
    private function character($s, $l = 0)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
489
    {
490
        if ($s + $l < $this->EOF) {
491
            if ($l === 0) {
492
                return $this->data[$s];
493
            } else {
494
                return substr($this->data, $s, $l);
495
            }
496
        }
497
        return null;
498
    }
499
500
    private function characters($char_class, $start)
501
    {
502
        return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
503
    }
504
505
    private function dataState()
506
    {
507
        // Consume the next input character
508
        $this->char++;
509
        $char = $this->char();
510
511
        if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
512
            /* U+0026 AMPERSAND (&)
513
            When the content model flag is set to one of the PCDATA or RCDATA
514
            states: switch to the entity data state. Otherwise: treat it as per
515
            the "anything else"    entry below. */
516
            $this->state = 'entityData';
517
518 View Code Duplication
        } elseif ($char === '-') {
519
            /* If the content model flag is set to either the RCDATA state or
520
            the CDATA state, and the escape flag is false, and there are at
521
            least three characters before this one in the input stream, and the
522
            last four characters in the input stream, including this one, are
523
            U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
524
            and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
525
            if (($this->content_model === self::RCDATA || $this->content_model ===
526
                    self::CDATA) && $this->escape === false &&
527
                $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
528
            ) {
529
                $this->escape = true;
530
            }
531
532
            /* In any case, emit the input character as a character token. Stay
533
            in the data state. */
534
            $this->emitToken(
535
                array(
536
                    'type' => self::CHARACTR,
537
                    'data' => $char
538
                )
539
            );
540
541
            /* U+003C LESS-THAN SIGN (<) */
542
        } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
543
                (($this->content_model === self::RCDATA ||
544
                        $this->content_model === self::CDATA) && $this->escape === false))
545
        ) {
546
            /* When the content model flag is set to the PCDATA state: switch
547
            to the tag open state.
548
549
            When the content model flag is set to either the RCDATA state or
550
            the CDATA state and the escape flag is false: switch to the tag
551
            open state.
552
553
            Otherwise: treat it as per the "anything else" entry below. */
554
            $this->state = 'tagOpen';
555
556
            /* U+003E GREATER-THAN SIGN (>) */
557 View Code Duplication
        } elseif ($char === '>') {
558
            /* If the content model flag is set to either the RCDATA state or
559
            the CDATA state, and the escape flag is true, and the last three
560
            characters in the input stream including this one are U+002D
561
            HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
562
            set the escape flag to false. */
563
            if (($this->content_model === self::RCDATA ||
564
                    $this->content_model === self::CDATA) && $this->escape === true &&
565
                $this->character($this->char, 3) === '-->'
566
            ) {
567
                $this->escape = false;
568
            }
569
570
            /* In any case, emit the input character as a character token.
571
            Stay in the data state. */
572
            $this->emitToken(
573
                array(
574
                    'type' => self::CHARACTR,
575
                    'data' => $char
576
                )
577
            );
578
579
        } elseif ($this->char === $this->EOF) {
580
            /* EOF
581
            Emit an end-of-file token. */
582
            $this->EOF();
583
584
        } elseif ($this->content_model === self::PLAINTEXT) {
585
            /* When the content model flag is set to the PLAINTEXT state
586
            THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
587
            the text and emit it as a character token. */
588
            $this->emitToken(
589
                array(
590
                    'type' => self::CHARACTR,
591
                    'data' => substr($this->data, $this->char)
592
                )
593
            );
594
595
            $this->EOF();
596
597
        } else {
598
            /* Anything else
599
            THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
600
            otherwise would also be treated as a character token and emit it
601
            as a single character token. Stay in the data state. */
602
            $len = strcspn($this->data, '<&', $this->char);
603
            $char = substr($this->data, $this->char, $len);
604
            $this->char += $len - 1;
605
606
            $this->emitToken(
607
                array(
608
                    'type' => self::CHARACTR,
609
                    'data' => $char
610
                )
611
            );
612
613
            $this->state = 'data';
614
        }
615
    }
616
617
    private function entityDataState()
618
    {
619
        // Attempt to consume an entity.
620
        $entity = $this->entity();
621
622
        // If nothing is returned, emit a U+0026 AMPERSAND character token.
623
        // Otherwise, emit the character token that was returned.
624
        $char = (!$entity) ? '&' : $entity;
625
        $this->emitToken(
626
            array(
627
                'type' => self::CHARACTR,
628
                'data' => $char
629
            )
630
        );
631
632
        // Finally, switch to the data state.
633
        $this->state = 'data';
634
    }
635
636
    private function tagOpenState()
637
    {
638
        switch ($this->content_model) {
639
            case self::RCDATA:
640
            case self::CDATA:
641
                /* If the next input character is a U+002F SOLIDUS (/) character,
642
                consume it and switch to the close tag open state. If the next
643
                input character is not a U+002F SOLIDUS (/) character, emit a
644
                U+003C LESS-THAN SIGN character token and switch to the data
645
                state to process the next input character. */
646
                if ($this->character($this->char + 1) === '/') {
647
                    $this->char++;
648
                    $this->state = 'closeTagOpen';
649
650
                } else {
651
                    $this->emitToken(
652
                        array(
653
                            'type' => self::CHARACTR,
654
                            'data' => '<'
655
                        )
656
                    );
657
658
                    $this->state = 'data';
659
                }
660
                break;
661
662
            case self::PCDATA:
663
                // If the content model flag is set to the PCDATA state
664
                // Consume the next input character:
665
                $this->char++;
666
                $char = $this->char();
667
668
                if ($char === '!') {
669
                    /* U+0021 EXCLAMATION MARK (!)
670
                    Switch to the markup declaration open state. */
671
                    $this->state = 'markupDeclarationOpen';
672
673
                } elseif ($char === '/') {
674
                    /* U+002F SOLIDUS (/)
675
                    Switch to the close tag open state. */
676
                    $this->state = 'closeTagOpen';
677
678
                } elseif (preg_match('/^[A-Za-z]$/', $char)) {
679
                    /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
680
                    Create a new start tag token, set its tag name to the lowercase
681
                    version of the input character (add 0x0020 to the character's code
682
                    point), then switch to the tag name state. (Don't emit the token
683
                    yet; further details will be filled in before it is emitted.) */
684
                    $this->token = array(
685
                        'name' => strtolower($char),
686
                        'type' => self::STARTTAG,
687
                        'attr' => array()
688
                    );
689
690
                    $this->state = 'tagName';
691
692
                } elseif ($char === '>') {
693
                    /* U+003E GREATER-THAN SIGN (>)
694
                    Parse error. Emit a U+003C LESS-THAN SIGN character token and a
695
                    U+003E GREATER-THAN SIGN character token. Switch to the data state. */
696
                    $this->emitToken(
697
                        array(
698
                            'type' => self::CHARACTR,
699
                            'data' => '<>'
700
                        )
701
                    );
702
703
                    $this->state = 'data';
704
705
                } elseif ($char === '?') {
706
                    /* U+003F QUESTION MARK (?)
707
                    Parse error. Switch to the bogus comment state. */
708
                    $this->state = 'bogusComment';
709
710
                } else {
711
                    /* Anything else
712
                    Parse error. Emit a U+003C LESS-THAN SIGN character token and
713
                    reconsume the current input character in the data state. */
714
                    $this->emitToken(
715
                        array(
716
                            'type' => self::CHARACTR,
717
                            'data' => '<'
718
                        )
719
                    );
720
721
                    $this->char--;
722
                    $this->state = 'data';
723
                }
724
                break;
725
        }
726
    }
727
728
    private function closeTagOpenState()
729
    {
730
        $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
731
        $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
732
733
        if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
734
            (!$the_same || ($the_same && (!preg_match(
735
                            '/[\t\n\x0b\x0c >\/]/',
736
                            $this->character($this->char + 1 + strlen($next_node))
737
                        ) || $this->EOF === $this->char)))
738
        ) {
739
            /* If the content model flag is set to the RCDATA or CDATA states then
740
            examine the next few characters. If they do not match the tag name of
741
            the last start tag token emitted (case insensitively), or if they do but
742
            they are not immediately followed by one of the following characters:
743
                * U+0009 CHARACTER TABULATION
744
                * U+000A LINE FEED (LF)
745
                * U+000B LINE TABULATION
746
                * U+000C FORM FEED (FF)
747
                * U+0020 SPACE
748
                * U+003E GREATER-THAN SIGN (>)
749
                * U+002F SOLIDUS (/)
750
                * EOF
751
            ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
752
            token, a U+002F SOLIDUS character token, and switch to the data state
753
            to process the next input character. */
754
            $this->emitToken(
755
                array(
756
                    'type' => self::CHARACTR,
757
                    'data' => '</'
758
                )
759
            );
760
761
            $this->state = 'data';
762
763
        } else {
764
            /* Otherwise, if the content model flag is set to the PCDATA state,
765
            or if the next few characters do match that tag name, consume the
766
            next input character: */
767
            $this->char++;
768
            $char = $this->char();
769
770
            if (preg_match('/^[A-Za-z]$/', $char)) {
771
                /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
772
                Create a new end tag token, set its tag name to the lowercase version
773
                of the input character (add 0x0020 to the character's code point), then
774
                switch to the tag name state. (Don't emit the token yet; further details
775
                will be filled in before it is emitted.) */
776
                $this->token = array(
777
                    'name' => strtolower($char),
778
                    'type' => self::ENDTAG
779
                );
780
781
                $this->state = 'tagName';
782
783
            } elseif ($char === '>') {
784
                /* U+003E GREATER-THAN SIGN (>)
785
                Parse error. Switch to the data state. */
786
                $this->state = 'data';
787
788 View Code Duplication
            } elseif ($this->char === $this->EOF) {
789
                /* EOF
790
                Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
791
                SOLIDUS character token. Reconsume the EOF character in the data state. */
792
                $this->emitToken(
793
                    array(
794
                        'type' => self::CHARACTR,
795
                        'data' => '</'
796
                    )
797
                );
798
799
                $this->char--;
800
                $this->state = 'data';
801
802
            } else {
803
                /* Parse error. Switch to the bogus comment state. */
804
                $this->state = 'bogusComment';
805
            }
806
        }
807
    }
808
809
    private function tagNameState()
810
    {
811
        // Consume the next input character:
812
        $this->char++;
813
        $char = $this->character($this->char);
814
815
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
816
            /* U+0009 CHARACTER TABULATION
817
            U+000A LINE FEED (LF)
818
            U+000B LINE TABULATION
819
            U+000C FORM FEED (FF)
820
            U+0020 SPACE
821
            Switch to the before attribute name state. */
822
            $this->state = 'beforeAttributeName';
823
824
        } elseif ($char === '>') {
825
            /* U+003E GREATER-THAN SIGN (>)
826
            Emit the current tag token. Switch to the data state. */
827
            $this->emitToken($this->token);
828
            $this->state = 'data';
829
830
        } elseif ($this->char === $this->EOF) {
831
            /* EOF
832
            Parse error. Emit the current tag token. Reconsume the EOF
833
            character in the data state. */
834
            $this->emitToken($this->token);
835
836
            $this->char--;
837
            $this->state = 'data';
838
839
        } elseif ($char === '/') {
840
            /* U+002F SOLIDUS (/)
841
            Parse error unless this is a permitted slash. Switch to the before
842
            attribute name state. */
843
            $this->state = 'beforeAttributeName';
844
845
        } else {
846
            /* Anything else
847
            Append the current input character to the current tag token's tag name.
848
            Stay in the tag name state. */
849
            $this->token['name'] .= strtolower($char);
850
            $this->state = 'tagName';
851
        }
852
    }
853
854
    private function beforeAttributeNameState()
855
    {
856
        // Consume the next input character:
857
        $this->char++;
858
        $char = $this->character($this->char);
859
860
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
861
            /* U+0009 CHARACTER TABULATION
862
            U+000A LINE FEED (LF)
863
            U+000B LINE TABULATION
864
            U+000C FORM FEED (FF)
865
            U+0020 SPACE
866
            Stay in the before attribute name state. */
867
            $this->state = 'beforeAttributeName';
868
869
        } elseif ($char === '>') {
870
            /* U+003E GREATER-THAN SIGN (>)
871
            Emit the current tag token. Switch to the data state. */
872
            $this->emitToken($this->token);
873
            $this->state = 'data';
874
875
        } elseif ($char === '/') {
876
            /* U+002F SOLIDUS (/)
877
            Parse error unless this is a permitted slash. Stay in the before
878
            attribute name state. */
879
            $this->state = 'beforeAttributeName';
880
881
        } elseif ($this->char === $this->EOF) {
882
            /* EOF
883
            Parse error. Emit the current tag token. Reconsume the EOF
884
            character in the data state. */
885
            $this->emitToken($this->token);
886
887
            $this->char--;
888
            $this->state = 'data';
889
890 View Code Duplication
        } else {
891
            /* Anything else
892
            Start a new attribute in the current tag token. Set that attribute's
893
            name to the current input character, and its value to the empty string.
894
            Switch to the attribute name state. */
895
            $this->token['attr'][] = array(
896
                'name' => strtolower($char),
897
                'value' => null
898
            );
899
900
            $this->state = 'attributeName';
901
        }
902
    }
903
904
    private function attributeNameState()
905
    {
906
        // Consume the next input character:
907
        $this->char++;
908
        $char = $this->character($this->char);
909
910
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
911
            /* U+0009 CHARACTER TABULATION
912
            U+000A LINE FEED (LF)
913
            U+000B LINE TABULATION
914
            U+000C FORM FEED (FF)
915
            U+0020 SPACE
916
            Stay in the before attribute name state. */
917
            $this->state = 'afterAttributeName';
918
919
        } elseif ($char === '=') {
920
            /* U+003D EQUALS SIGN (=)
921
            Switch to the before attribute value state. */
922
            $this->state = 'beforeAttributeValue';
923
924
        } elseif ($char === '>') {
925
            /* U+003E GREATER-THAN SIGN (>)
926
            Emit the current tag token. Switch to the data state. */
927
            $this->emitToken($this->token);
928
            $this->state = 'data';
929
930
        } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
931
            /* U+002F SOLIDUS (/)
932
            Parse error unless this is a permitted slash. Switch to the before
933
            attribute name state. */
934
            $this->state = 'beforeAttributeName';
935
936
        } elseif ($this->char === $this->EOF) {
937
            /* EOF
938
            Parse error. Emit the current tag token. Reconsume the EOF
939
            character in the data state. */
940
            $this->emitToken($this->token);
941
942
            $this->char--;
943
            $this->state = 'data';
944
945
        } else {
946
            /* Anything else
947
            Append the current input character to the current attribute's name.
948
            Stay in the attribute name state. */
949
            $last = count($this->token['attr']) - 1;
950
            $this->token['attr'][$last]['name'] .= strtolower($char);
951
952
            $this->state = 'attributeName';
953
        }
954
    }
955
956
    private function afterAttributeNameState()
957
    {
958
        // Consume the next input character:
959
        $this->char++;
960
        $char = $this->character($this->char);
961
962
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
963
            /* U+0009 CHARACTER TABULATION
964
            U+000A LINE FEED (LF)
965
            U+000B LINE TABULATION
966
            U+000C FORM FEED (FF)
967
            U+0020 SPACE
968
            Stay in the after attribute name state. */
969
            $this->state = 'afterAttributeName';
970
971
        } elseif ($char === '=') {
972
            /* U+003D EQUALS SIGN (=)
973
            Switch to the before attribute value state. */
974
            $this->state = 'beforeAttributeValue';
975
976
        } elseif ($char === '>') {
977
            /* U+003E GREATER-THAN SIGN (>)
978
            Emit the current tag token. Switch to the data state. */
979
            $this->emitToken($this->token);
980
            $this->state = 'data';
981
982
        } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
983
            /* U+002F SOLIDUS (/)
984
            Parse error unless this is a permitted slash. Switch to the
985
            before attribute name state. */
986
            $this->state = 'beforeAttributeName';
987
988
        } elseif ($this->char === $this->EOF) {
989
            /* EOF
990
            Parse error. Emit the current tag token. Reconsume the EOF
991
            character in the data state. */
992
            $this->emitToken($this->token);
993
994
            $this->char--;
995
            $this->state = 'data';
996
997 View Code Duplication
        } else {
998
            /* Anything else
999
            Start a new attribute in the current tag token. Set that attribute's
1000
            name to the current input character, and its value to the empty string.
1001
            Switch to the attribute name state. */
1002
            $this->token['attr'][] = array(
1003
                'name' => strtolower($char),
1004
                'value' => null
1005
            );
1006
1007
            $this->state = 'attributeName';
1008
        }
1009
    }
1010
1011
    private function beforeAttributeValueState()
1012
    {
1013
        // Consume the next input character:
1014
        $this->char++;
1015
        $char = $this->character($this->char);
1016
1017
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1018
            /* U+0009 CHARACTER TABULATION
1019
            U+000A LINE FEED (LF)
1020
            U+000B LINE TABULATION
1021
            U+000C FORM FEED (FF)
1022
            U+0020 SPACE
1023
            Stay in the before attribute value state. */
1024
            $this->state = 'beforeAttributeValue';
1025
1026
        } elseif ($char === '"') {
1027
            /* U+0022 QUOTATION MARK (")
1028
            Switch to the attribute value (double-quoted) state. */
1029
            $this->state = 'attributeValueDoubleQuoted';
1030
1031
        } elseif ($char === '&') {
1032
            /* U+0026 AMPERSAND (&)
1033
            Switch to the attribute value (unquoted) state and reconsume
1034
            this input character. */
1035
            $this->char--;
1036
            $this->state = 'attributeValueUnquoted';
1037
1038
        } elseif ($char === '\'') {
1039
            /* U+0027 APOSTROPHE (')
1040
            Switch to the attribute value (single-quoted) state. */
1041
            $this->state = 'attributeValueSingleQuoted';
1042
1043
        } elseif ($char === '>') {
1044
            /* U+003E GREATER-THAN SIGN (>)
1045
            Emit the current tag token. Switch to the data state. */
1046
            $this->emitToken($this->token);
1047
            $this->state = 'data';
1048
1049
        } else {
1050
            /* Anything else
1051
            Append the current input character to the current attribute's value.
1052
            Switch to the attribute value (unquoted) state. */
1053
            $last = count($this->token['attr']) - 1;
1054
            $this->token['attr'][$last]['value'] .= $char;
1055
1056
            $this->state = 'attributeValueUnquoted';
1057
        }
1058
    }
1059
1060 View Code Duplication
    private function attributeValueDoubleQuotedState()
1061
    {
1062
        // Consume the next input character:
1063
        $this->char++;
1064
        $char = $this->character($this->char);
1065
1066
        if ($char === '"') {
1067
            /* U+0022 QUOTATION MARK (")
1068
            Switch to the before attribute name state. */
1069
            $this->state = 'beforeAttributeName';
1070
1071
        } elseif ($char === '&') {
1072
            /* U+0026 AMPERSAND (&)
1073
            Switch to the entity in attribute value state. */
1074
            $this->entityInAttributeValueState('double');
0 ignored issues
show
Unused Code introduced by
The call to HTML5::entityInAttributeValueState() has too many arguments starting with 'double'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
1075
1076
        } elseif ($this->char === $this->EOF) {
1077
            /* EOF
1078
            Parse error. Emit the current tag token. Reconsume the character
1079
            in the data state. */
1080
            $this->emitToken($this->token);
1081
1082
            $this->char--;
1083
            $this->state = 'data';
1084
1085
        } else {
1086
            /* Anything else
1087
            Append the current input character to the current attribute's value.
1088
            Stay in the attribute value (double-quoted) state. */
1089
            $last = count($this->token['attr']) - 1;
1090
            $this->token['attr'][$last]['value'] .= $char;
1091
1092
            $this->state = 'attributeValueDoubleQuoted';
1093
        }
1094
    }
1095
1096 View Code Duplication
    private function attributeValueSingleQuotedState()
1097
    {
1098
        // Consume the next input character:
1099
        $this->char++;
1100
        $char = $this->character($this->char);
1101
1102
        if ($char === '\'') {
1103
            /* U+0022 QUOTATION MARK (')
1104
            Switch to the before attribute name state. */
1105
            $this->state = 'beforeAttributeName';
1106
1107
        } elseif ($char === '&') {
1108
            /* U+0026 AMPERSAND (&)
1109
            Switch to the entity in attribute value state. */
1110
            $this->entityInAttributeValueState('single');
0 ignored issues
show
Unused Code introduced by
The call to HTML5::entityInAttributeValueState() has too many arguments starting with 'single'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
1111
1112
        } elseif ($this->char === $this->EOF) {
1113
            /* EOF
1114
            Parse error. Emit the current tag token. Reconsume the character
1115
            in the data state. */
1116
            $this->emitToken($this->token);
1117
1118
            $this->char--;
1119
            $this->state = 'data';
1120
1121
        } else {
1122
            /* Anything else
1123
            Append the current input character to the current attribute's value.
1124
            Stay in the attribute value (single-quoted) state. */
1125
            $last = count($this->token['attr']) - 1;
1126
            $this->token['attr'][$last]['value'] .= $char;
1127
1128
            $this->state = 'attributeValueSingleQuoted';
1129
        }
1130
    }
1131
1132
    private function attributeValueUnquotedState()
1133
    {
1134
        // Consume the next input character:
1135
        $this->char++;
1136
        $char = $this->character($this->char);
1137
1138
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1139
            /* U+0009 CHARACTER TABULATION
1140
            U+000A LINE FEED (LF)
1141
            U+000B LINE TABULATION
1142
            U+000C FORM FEED (FF)
1143
            U+0020 SPACE
1144
            Switch to the before attribute name state. */
1145
            $this->state = 'beforeAttributeName';
1146
1147
        } elseif ($char === '&') {
1148
            /* U+0026 AMPERSAND (&)
1149
            Switch to the entity in attribute value state. */
1150
            $this->entityInAttributeValueState();
1151
1152
        } elseif ($char === '>') {
1153
            /* U+003E GREATER-THAN SIGN (>)
1154
            Emit the current tag token. Switch to the data state. */
1155
            $this->emitToken($this->token);
1156
            $this->state = 'data';
1157
1158
        } else {
1159
            /* Anything else
1160
            Append the current input character to the current attribute's value.
1161
            Stay in the attribute value (unquoted) state. */
1162
            $last = count($this->token['attr']) - 1;
1163
            $this->token['attr'][$last]['value'] .= $char;
1164
1165
            $this->state = 'attributeValueUnquoted';
1166
        }
1167
    }
1168
1169
    private function entityInAttributeValueState()
1170
    {
1171
        // Attempt to consume an entity.
1172
        $entity = $this->entity();
1173
1174
        // If nothing is returned, append a U+0026 AMPERSAND character to the
1175
        // current attribute's value. Otherwise, emit the character token that
1176
        // was returned.
1177
        $char = (!$entity)
1178
            ? '&'
1179
            : $entity;
1180
1181
        $last = count($this->token['attr']) - 1;
1182
        $this->token['attr'][$last]['value'] .= $char;
1183
    }
1184
1185
    private function bogusCommentState()
1186
    {
1187
        /* Consume every character up to the first U+003E GREATER-THAN SIGN
1188
        character (>) or the end of the file (EOF), whichever comes first. Emit
1189
        a comment token whose data is the concatenation of all the characters
1190
        starting from and including the character that caused the state machine
1191
        to switch into the bogus comment state, up to and including the last
1192
        consumed character before the U+003E character, if any, or up to the
1193
        end of the file otherwise. (If the comment was started by the end of
1194
        the file (EOF), the token is empty.) */
1195
        $data = $this->characters('^>', $this->char);
1196
        $this->emitToken(
1197
            array(
1198
                'data' => $data,
1199
                'type' => self::COMMENT
1200
            )
1201
        );
1202
1203
        $this->char += strlen($data);
1204
1205
        /* Switch to the data state. */
1206
        $this->state = 'data';
1207
1208
        /* If the end of the file was reached, reconsume the EOF character. */
1209
        if ($this->char === $this->EOF) {
1210
            $this->char = $this->EOF - 1;
1211
        }
1212
    }
1213
1214
    private function markupDeclarationOpenState()
1215
    {
1216
        /* If the next two characters are both U+002D HYPHEN-MINUS (-)
1217
        characters, consume those two characters, create a comment token whose
1218
        data is the empty string, and switch to the comment state. */
1219
        if ($this->character($this->char + 1, 2) === '--') {
1220
            $this->char += 2;
1221
            $this->state = 'comment';
1222
            $this->token = array(
1223
                'data' => null,
1224
                'type' => self::COMMENT
1225
            );
1226
1227
            /* Otherwise if the next seven chacacters are a case-insensitive match
1228
            for the word "DOCTYPE", then consume those characters and switch to the
1229
            DOCTYPE state. */
1230
        } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
1231
            $this->char += 7;
1232
            $this->state = 'doctype';
1233
1234
            /* Otherwise, is is a parse error. Switch to the bogus comment state.
1235
            The next character that is consumed, if any, is the first character
1236
            that will be in the comment. */
1237
        } else {
1238
            $this->char++;
1239
            $this->state = 'bogusComment';
1240
        }
1241
    }
1242
1243 View Code Duplication
    private function commentState()
1244
    {
1245
        /* Consume the next input character: */
1246
        $this->char++;
1247
        $char = $this->char();
1248
1249
        /* U+002D HYPHEN-MINUS (-) */
1250
        if ($char === '-') {
1251
            /* Switch to the comment dash state  */
1252
            $this->state = 'commentDash';
1253
1254
            /* EOF */
1255
        } elseif ($this->char === $this->EOF) {
1256
            /* Parse error. Emit the comment token. Reconsume the EOF character
1257
            in the data state. */
1258
            $this->emitToken($this->token);
1259
            $this->char--;
1260
            $this->state = 'data';
1261
1262
            /* Anything else */
1263
        } else {
1264
            /* Append the input character to the comment token's data. Stay in
1265
            the comment state. */
1266
            $this->token['data'] .= $char;
1267
        }
1268
    }
1269
1270 View Code Duplication
    private function commentDashState()
1271
    {
1272
        /* Consume the next input character: */
1273
        $this->char++;
1274
        $char = $this->char();
1275
1276
        /* U+002D HYPHEN-MINUS (-) */
1277
        if ($char === '-') {
1278
            /* Switch to the comment end state  */
1279
            $this->state = 'commentEnd';
1280
1281
            /* EOF */
1282
        } elseif ($this->char === $this->EOF) {
1283
            /* Parse error. Emit the comment token. Reconsume the EOF character
1284
            in the data state. */
1285
            $this->emitToken($this->token);
1286
            $this->char--;
1287
            $this->state = 'data';
1288
1289
            /* Anything else */
1290
        } else {
1291
            /* Append a U+002D HYPHEN-MINUS (-) character and the input
1292
            character to the comment token's data. Switch to the comment state. */
1293
            $this->token['data'] .= '-' . $char;
1294
            $this->state = 'comment';
1295
        }
1296
    }
1297
1298
    private function commentEndState()
1299
    {
1300
        /* Consume the next input character: */
1301
        $this->char++;
1302
        $char = $this->char();
1303
1304
        if ($char === '>') {
1305
            $this->emitToken($this->token);
1306
            $this->state = 'data';
1307
1308
        } elseif ($char === '-') {
1309
            $this->token['data'] .= '-';
1310
1311
        } elseif ($this->char === $this->EOF) {
1312
            $this->emitToken($this->token);
1313
            $this->char--;
1314
            $this->state = 'data';
1315
1316
        } else {
1317
            $this->token['data'] .= '--' . $char;
1318
            $this->state = 'comment';
1319
        }
1320
    }
1321
1322
    private function doctypeState()
1323
    {
1324
        /* Consume the next input character: */
1325
        $this->char++;
1326
        $char = $this->char();
1327
1328
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1329
            $this->state = 'beforeDoctypeName';
1330
1331
        } else {
1332
            $this->char--;
1333
            $this->state = 'beforeDoctypeName';
1334
        }
1335
    }
1336
1337
    private function beforeDoctypeNameState()
1338
    {
1339
        /* Consume the next input character: */
1340
        $this->char++;
1341
        $char = $this->char();
1342
1343
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1344
            // Stay in the before DOCTYPE name state.
1345
1346
        } elseif (preg_match('/^[a-z]$/', $char)) {
1347
            $this->token = array(
1348
                'name' => strtoupper($char),
1349
                'type' => self::DOCTYPE,
1350
                'error' => true
1351
            );
1352
1353
            $this->state = 'doctypeName';
1354
1355
        } elseif ($char === '>') {
1356
            $this->emitToken(
1357
                array(
1358
                    'name' => null,
1359
                    'type' => self::DOCTYPE,
1360
                    'error' => true
1361
                )
1362
            );
1363
1364
            $this->state = 'data';
1365
1366 View Code Duplication
        } elseif ($this->char === $this->EOF) {
1367
            $this->emitToken(
1368
                array(
1369
                    'name' => null,
1370
                    'type' => self::DOCTYPE,
1371
                    'error' => true
1372
                )
1373
            );
1374
1375
            $this->char--;
1376
            $this->state = 'data';
1377
1378
        } else {
1379
            $this->token = array(
1380
                'name' => $char,
1381
                'type' => self::DOCTYPE,
1382
                'error' => true
1383
            );
1384
1385
            $this->state = 'doctypeName';
1386
        }
1387
    }
1388
1389
    private function doctypeNameState()
1390
    {
1391
        /* Consume the next input character: */
1392
        $this->char++;
1393
        $char = $this->char();
1394
1395
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1396
            $this->state = 'AfterDoctypeName';
1397
1398
        } elseif ($char === '>') {
1399
            $this->emitToken($this->token);
1400
            $this->state = 'data';
1401
1402
        } elseif (preg_match('/^[a-z]$/', $char)) {
1403
            $this->token['name'] .= strtoupper($char);
1404
1405
        } elseif ($this->char === $this->EOF) {
1406
            $this->emitToken($this->token);
1407
            $this->char--;
1408
            $this->state = 'data';
1409
1410
        } else {
1411
            $this->token['name'] .= $char;
1412
        }
1413
1414
        $this->token['error'] = ($this->token['name'] === 'HTML')
1415
            ? false
1416
            : true;
1417
    }
1418
1419
    private function afterDoctypeNameState()
1420
    {
1421
        /* Consume the next input character: */
1422
        $this->char++;
1423
        $char = $this->char();
1424
1425
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1426
            // Stay in the DOCTYPE name state.
1427
1428
        } elseif ($char === '>') {
1429
            $this->emitToken($this->token);
1430
            $this->state = 'data';
1431
1432
        } elseif ($this->char === $this->EOF) {
1433
            $this->emitToken($this->token);
1434
            $this->char--;
1435
            $this->state = 'data';
1436
1437
        } else {
1438
            $this->token['error'] = true;
1439
            $this->state = 'bogusDoctype';
1440
        }
1441
    }
1442
1443 View Code Duplication
    private function bogusDoctypeState()
1444
    {
1445
        /* Consume the next input character: */
1446
        $this->char++;
1447
        $char = $this->char();
1448
1449
        if ($char === '>') {
1450
            $this->emitToken($this->token);
1451
            $this->state = 'data';
1452
1453
        } elseif ($this->char === $this->EOF) {
1454
            $this->emitToken($this->token);
1455
            $this->char--;
1456
            $this->state = 'data';
1457
1458
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1459
            // Stay in the bogus DOCTYPE state.
1460
        }
1461
    }
1462
1463
    private function entity()
1464
    {
1465
        $start = $this->char;
1466
1467
        // This section defines how to consume an entity. This definition is
1468
        // used when parsing entities in text and in attributes.
1469
1470
        // The behaviour depends on the identity of the next character (the
1471
        // one immediately after the U+0026 AMPERSAND character):
1472
1473
        switch ($this->character($this->char + 1)) {
1474
            // U+0023 NUMBER SIGN (#)
1475
            case '#':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1476
1477
                // The behaviour further depends on the character after the
1478
                // U+0023 NUMBER SIGN:
1479
                switch ($this->character($this->char + 1)) {
1480
                    // U+0078 LATIN SMALL LETTER X
1481
                    // U+0058 LATIN CAPITAL LETTER X
1482
                    case 'x':
1483
                    case 'X':
1484
                        // Follow the steps below, but using the range of
1485
                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1486
                        // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1487
                        // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1488
                        // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1489
                        // words, 0-9, A-F, a-f).
1490
                        $char = 1;
1491
                        $char_class = '0-9A-Fa-f';
1492
                        break;
1493
1494
                    // Anything else
1495
                    default:
1496
                        // Follow the steps below, but using the range of
1497
                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1498
                        // NINE (i.e. just 0-9).
1499
                        $char = 0;
1500
                        $char_class = '0-9';
1501
                        break;
1502
                }
1503
1504
                // Consume as many characters as match the range of characters
1505
                // given above.
1506
                $this->char++;
1507
                $e_name = $this->characters($char_class, $this->char + $char + 1);
1508
                $entity = $this->character($start, $this->char);
1509
                $cond = strlen($e_name) > 0;
1510
1511
                // The rest of the parsing happens bellow.
1512
                break;
1513
1514
            // Anything else
1515
            default:
1516
                // Consume the maximum number of characters possible, with the
1517
                // consumed characters case-sensitively matching one of the
1518
                // identifiers in the first column of the entities table.
1519
                $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1520
                $len = strlen($e_name);
1521
1522
                for ($c = 1; $c <= $len; $c++) {
1523
                    $id = substr($e_name, 0, $c);
1524
                    $this->char++;
1525
1526
                    if (in_array($id, $this->entities)) {
1527
                        if ($e_name[$c - 1] !== ';') {
1528
                            if ($c < $len && $e_name[$c] === ';') {
1529
                                $this->char++; // consume extra semicolon
1530
                            }
1531
                        }
1532
                        $entity = $id;
1533
                        break;
1534
                    }
1535
                }
1536
1537
                $cond = isset($entity);
1538
                // The rest of the parsing happens bellow.
1539
                break;
1540
        }
1541
1542
        if (!$cond) {
1543
            // If no match can be made, then this is a parse error. No
1544
            // characters are consumed, and nothing is returned.
1545
            $this->char = $start;
1546
            return false;
1547
        }
1548
1549
        // Return a character token for the character corresponding to the
1550
        // entity name (as given by the second column of the entities table).
1551
        return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
0 ignored issues
show
Bug introduced by
The variable $entity does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
1552
    }
1553
1554
    private function emitToken($token)
1555
    {
1556
        $emit = $this->tree->emitToken($token);
1557
1558
        if (is_int($emit)) {
1559
            $this->content_model = $emit;
1560
1561
        } elseif ($token['type'] === self::ENDTAG) {
1562
            $this->content_model = self::PCDATA;
1563
        }
1564
    }
1565
1566
    private function EOF()
1567
    {
1568
        $this->state = null;
1569
        $this->tree->emitToken(
1570
            array(
1571
                'type' => self::EOF
1572
            )
1573
        );
1574
    }
1575
}
1576
1577
class HTML5TreeConstructer
0 ignored issues
show
Coding Style Compatibility introduced by
PSR1 recommends that each class should be in its own file to aid autoloaders.

Having each class in a dedicated file usually plays nice with PSR autoloaders and is therefore a well established practice. If you use other autoloaders, you might not want to follow this rule.

Loading history...
1578
{
1579
    public $stack = array();
1580
1581
    private $phase;
1582
    private $mode;
1583
    private $dom;
1584
    private $foster_parent = null;
1585
    private $a_formatting = array();
1586
1587
    private $head_pointer = null;
1588
    private $form_pointer = null;
1589
1590
    private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th');
1591
    private $formatting = array(
1592
        'a',
1593
        'b',
1594
        'big',
1595
        'em',
1596
        'font',
1597
        'i',
1598
        'nobr',
1599
        's',
1600
        'small',
1601
        'strike',
1602
        'strong',
1603
        'tt',
1604
        'u'
1605
    );
1606
    private $special = array(
1607
        'address',
1608
        'area',
1609
        'base',
1610
        'basefont',
1611
        'bgsound',
1612
        'blockquote',
1613
        'body',
1614
        'br',
1615
        'center',
1616
        'col',
1617
        'colgroup',
1618
        'dd',
1619
        'dir',
1620
        'div',
1621
        'dl',
1622
        'dt',
1623
        'embed',
1624
        'fieldset',
1625
        'form',
1626
        'frame',
1627
        'frameset',
1628
        'h1',
1629
        'h2',
1630
        'h3',
1631
        'h4',
1632
        'h5',
1633
        'h6',
1634
        'head',
1635
        'hr',
1636
        'iframe',
1637
        'image',
1638
        'img',
1639
        'input',
1640
        'isindex',
1641
        'li',
1642
        'link',
1643
        'listing',
1644
        'menu',
1645
        'meta',
1646
        'noembed',
1647
        'noframes',
1648
        'noscript',
1649
        'ol',
1650
        'optgroup',
1651
        'option',
1652
        'p',
1653
        'param',
1654
        'plaintext',
1655
        'pre',
1656
        'script',
1657
        'select',
1658
        'spacer',
1659
        'style',
1660
        'tbody',
1661
        'textarea',
1662
        'tfoot',
1663
        'thead',
1664
        'title',
1665
        'tr',
1666
        'ul',
1667
        'wbr'
1668
    );
1669
1670
    // The different phases.
1671
    const INIT_PHASE = 0;
1672
    const ROOT_PHASE = 1;
1673
    const MAIN_PHASE = 2;
1674
    const END_PHASE = 3;
1675
1676
    // The different insertion modes for the main phase.
1677
    const BEFOR_HEAD = 0;
1678
    const IN_HEAD = 1;
1679
    const AFTER_HEAD = 2;
1680
    const IN_BODY = 3;
1681
    const IN_TABLE = 4;
1682
    const IN_CAPTION = 5;
1683
    const IN_CGROUP = 6;
1684
    const IN_TBODY = 7;
1685
    const IN_ROW = 8;
1686
    const IN_CELL = 9;
1687
    const IN_SELECT = 10;
1688
    const AFTER_BODY = 11;
1689
    const IN_FRAME = 12;
1690
    const AFTR_FRAME = 13;
1691
1692
    // The different types of elements.
1693
    const SPECIAL = 0;
1694
    const SCOPING = 1;
1695
    const FORMATTING = 2;
1696
    const PHRASING = 3;
1697
1698
    const MARKER = 0;
1699
1700
    public function __construct()
1701
    {
1702
        $this->phase = self::INIT_PHASE;
1703
        $this->mode = self::BEFOR_HEAD;
1704
        $this->dom = new DOMDocument;
1705
1706
        $this->dom->encoding = 'UTF-8';
1707
        $this->dom->preserveWhiteSpace = true;
1708
        $this->dom->substituteEntities = true;
1709
        $this->dom->strictErrorChecking = false;
1710
    }
1711
1712
    // Process tag tokens
1713
    public function emitToken($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
1714
    {
1715
        switch ($this->phase) {
1716
            case self::INIT_PHASE:
1717
                return $this->initPhase($token);
1718
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1719
            case self::ROOT_PHASE:
1720
                return $this->rootElementPhase($token);
1721
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1722
            case self::MAIN_PHASE:
1723
                return $this->mainPhase($token);
1724
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1725
            case self::END_PHASE :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1726
                return $this->trailingEndPhase($token);
1727
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1728
        }
1729
        return null;
1730
    }
1731
1732
    private function initPhase($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
1733
    {
1734
        /* Initially, the tree construction stage must handle each token
1735
        emitted from the tokenisation stage as follows: */
1736
1737
        /* A DOCTYPE token that is marked as being in error
1738
        A comment token
1739
        A start tag token
1740
        An end tag token
1741
        A character token that is not one of one of U+0009 CHARACTER TABULATION,
1742
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1743
            or U+0020 SPACE
1744
        An end-of-file token */
1745
        if ((isset($token['error']) && $token['error']) ||
1746
            $token['type'] === HTML5::COMMENT ||
1747
            $token['type'] === HTML5::STARTTAG ||
1748
            $token['type'] === HTML5::ENDTAG ||
1749
            $token['type'] === HTML5::EOF ||
1750
            ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
1751
                !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))
1752
        ) {
1753
            /* This specification does not define how to handle this case. In
1754
            particular, user agents may ignore the entirety of this specification
1755
            altogether for such documents, and instead invoke special parse modes
1756
            with a greater emphasis on backwards compatibility. */
1757
1758
            $this->phase = self::ROOT_PHASE;
1759
            return $this->rootElementPhase($token);
1760
1761
            /* A DOCTYPE token marked as being correct */
1762
        } elseif (isset($token['error']) && !$token['error']) {
1763
            /* Append a DocumentType node to the Document  node, with the name
1764
            attribute set to the name given in the DOCTYPE token (which will be
1765
            "HTML"), and the other attributes specific to DocumentType objects
1766
            set to null, empty lists, or the empty string as appropriate. */
1767
            $doctype = new DOMDocumentType(null, null, 'HTML');
0 ignored issues
show
Unused Code introduced by
The call to DOMDocumentType::__construct() has too many arguments starting with null.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
Unused Code introduced by
$doctype is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
1768
1769
            /* Then, switch to the root element phase of the tree construction
1770
            stage. */
1771
            $this->phase = self::ROOT_PHASE;
1772
1773
            /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1774
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1775
            or U+0020 SPACE */
1776 View Code Duplication
        } elseif (isset($token['data']) && preg_match(
1777
                '/^[\t\n\x0b\x0c ]+$/',
1778
                $token['data']
1779
            )
1780
        ) {
1781
            /* Append that character  to the Document node. */
1782
            $text = $this->dom->createTextNode($token['data']);
1783
            $this->dom->appendChild($text);
1784
        }
1785
        return null;
1786
    }
1787
1788
    private function rootElementPhase($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
1789
    {
1790
        /* After the initial phase, as each token is emitted from the tokenisation
1791
        stage, it must be processed as described in this section. */
1792
1793
        /* A DOCTYPE token */
1794
        if ($token['type'] === HTML5::DOCTYPE) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1795
            // Parse error. Ignore the token.
1796
1797
            /* A comment token */
1798
        } elseif ($token['type'] === HTML5::COMMENT) {
1799
            /* Append a Comment node to the Document object with the data
1800
            attribute set to the data given in the comment token. */
1801
            $comment = $this->dom->createComment($token['data']);
1802
            $this->dom->appendChild($comment);
1803
1804
            /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1805
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1806
            or U+0020 SPACE */
1807 View Code Duplication
        } elseif ($token['type'] === HTML5::CHARACTR &&
1808
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
1809
        ) {
1810
            /* Append that character  to the Document node. */
1811
            $text = $this->dom->createTextNode($token['data']);
1812
            $this->dom->appendChild($text);
1813
1814
            /* A character token that is not one of U+0009 CHARACTER TABULATION,
1815
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
1816
                (FF), or U+0020 SPACE
1817
            A start tag token
1818
            An end tag token
1819
            An end-of-file token */
1820
        } elseif (($token['type'] === HTML5::CHARACTR &&
1821
                !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
1822
            $token['type'] === HTML5::STARTTAG ||
1823
            $token['type'] === HTML5::ENDTAG ||
1824
            $token['type'] === HTML5::EOF
1825
        ) {
1826
            /* Create an HTMLElement node with the tag name html, in the HTML
1827
            namespace. Append it to the Document object. Switch to the main
1828
            phase and reprocess the current token. */
1829
            $html = $this->dom->createElement('html');
1830
            $this->dom->appendChild($html);
1831
            $this->stack[] = $html;
1832
1833
            $this->phase = self::MAIN_PHASE;
1834
            return $this->mainPhase($token);
1835
        }
1836
        return null;
1837
    }
1838
1839
    private function mainPhase($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
1840
    {
1841
        /* Tokens in the main phase must be handled as follows: */
1842
1843
        /* A DOCTYPE token */
1844
        if ($token['type'] === HTML5::DOCTYPE) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1845
            // Parse error. Ignore the token.
1846
1847
            /* A start tag token with the tag name "html" */
1848
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
1849
            /* If this start tag token was not the first start tag token, then
1850
            it is a parse error. */
1851
1852
            /* For each attribute on the token, check to see if the attribute
1853
            is already present on the top element of the stack of open elements.
1854
            If it is not, add the attribute and its corresponding value to that
1855
            element. */
1856
            foreach ($token['attr'] as $attr) {
1857
                if (!$this->stack[0]->hasAttribute($attr['name'])) {
1858
                    $this->stack[0]->setAttribute($attr['name'], $attr['value']);
1859
                }
1860
            }
1861
1862
            /* An end-of-file token */
1863
        } elseif ($token['type'] === HTML5::EOF) {
1864
            /* Generate implied end tags. */
1865
            $this->generateImpliedEndTags();
1866
1867
            /* Anything else. */
1868
        } else {
1869
            /* Depends on the insertion mode: */
1870
            switch ($this->mode) {
1871
                case self::BEFOR_HEAD:
1872
                    return $this->beforeHead($token);
1873
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1874
                case self::IN_HEAD:
1875
                    return $this->inHead($token);
1876
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1877
                case self::AFTER_HEAD:
1878
                    return $this->afterHead($token);
1879
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1880
                case self::IN_BODY:
1881
                    return $this->inBody($token);
1882
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1883
                case self::IN_TABLE:
1884
                    return $this->inTable($token);
1885
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1886
                case self::IN_CAPTION:
1887
                    return $this->inCaption($token);
1888
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1889
                case self::IN_CGROUP:
1890
                    return $this->inColumnGroup($token);
1891
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1892
                case self::IN_TBODY:
1893
                    return $this->inTableBody($token);
1894
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1895
                case self::IN_ROW:
1896
                    return $this->inRow($token);
1897
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1898
                case self::IN_CELL:
1899
                    return $this->inCell($token);
1900
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1901
                case self::IN_SELECT:
1902
                    return $this->inSelect($token);
1903
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1904
                case self::AFTER_BODY:
1905
                    return $this->afterBody($token);
1906
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1907
                case self::IN_FRAME:
1908
                    return $this->inFrameset($token);
1909
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1910
                case self::AFTR_FRAME:
1911
                    return $this->afterFrameset($token);
1912
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1913
                case self::END_PHASE:
1914
                    return $this->trailingEndPhase($token);
1915
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1916
            }
1917
        }
1918
        return null;
1919
    }
1920
1921
    private function beforeHead($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
1922
    {
1923
        /* Handle the token as follows: */
1924
1925
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1926
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1927
        or U+0020 SPACE */
1928
        if ($token['type'] === HTML5::CHARACTR &&
1929
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
1930
        ) {
1931
            /* Append the character to the current node. */
1932
            $this->insertText($token['data']);
1933
1934
            /* A comment token */
1935
        } elseif ($token['type'] === HTML5::COMMENT) {
1936
            /* Append a Comment node to the current node with the data attribute
1937
            set to the data given in the comment token. */
1938
            $this->insertComment($token['data']);
1939
1940
            /* A start tag token with the tag name "head" */
1941
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
1942
            /* Create an element for the token, append the new element to the
1943
            current node and push it onto the stack of open elements. */
1944
            $element = $this->insertElement($token);
1945
1946
            /* Set the head element pointer to this new element node. */
1947
            $this->head_pointer = $element;
1948
1949
            /* Change the insertion mode to "in head". */
1950
            $this->mode = self::IN_HEAD;
1951
1952
            /* A start tag token whose tag name is one of: "base", "link", "meta",
1953
            "script", "style", "title". Or an end tag with the tag name "html".
1954
            Or a character token that is not one of U+0009 CHARACTER TABULATION,
1955
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1956
            or U+0020 SPACE. Or any other start tag token */
1957
        } elseif ($token['type'] === HTML5::STARTTAG ||
1958
            ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
1959
            ($token['type'] === HTML5::CHARACTR && !preg_match(
1960
                    '/^[\t\n\x0b\x0c ]$/',
1961
                    $token['data']
1962
                ))
1963
        ) {
1964
            /* Act as if a start tag token with the tag name "head" and no
1965
            attributes had been seen, then reprocess the current token. */
1966
            $this->beforeHead(
1967
                array(
1968
                    'name' => 'head',
1969
                    'type' => HTML5::STARTTAG,
1970
                    'attr' => array()
1971
                )
1972
            );
1973
1974
            return $this->inHead($token);
1975
1976
            /* Any other end tag */
1977
        } elseif ($token['type'] === HTML5::ENDTAG) {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
1978
            /* Parse error. Ignore the token. */
1979
        }
1980
        return null;
1981
    }
1982
1983
    private function inHead($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
1984
    {
1985
        /* Handle the token as follows: */
1986
1987
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1988
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1989
        or U+0020 SPACE.
1990
1991
        THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
1992
        or script element, append the character to the current node regardless
1993
        of its content. */
1994
        if (($token['type'] === HTML5::CHARACTR &&
1995
                preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
1996
                $token['type'] === HTML5::CHARACTR && in_array(
1997
                    end($this->stack)->nodeName,
1998
                    array('title', 'style', 'script')
1999
                ))
2000
        ) {
2001
            /* Append the character to the current node. */
2002
            $this->insertText($token['data']);
2003
2004
            /* A comment token */
2005
        } elseif ($token['type'] === HTML5::COMMENT) {
2006
            /* Append a Comment node to the current node with the data attribute
2007
            set to the data given in the comment token. */
2008
            $this->insertComment($token['data']);
2009
2010
        } elseif ($token['type'] === HTML5::ENDTAG &&
2011
            in_array($token['name'], array('title', 'style', 'script'))
2012
        ) {
2013
            array_pop($this->stack);
2014
            return HTML5::PCDATA;
2015
2016
            /* A start tag with the tag name "title" */
2017
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
2018
            /* Create an element for the token and append the new element to the
2019
            node pointed to by the head element pointer, or, if that is null
2020
            (innerHTML case), to the current node. */
2021 View Code Duplication
            if ($this->head_pointer !== null) {
2022
                $element = $this->insertElement($token, false);
2023
                $this->head_pointer->appendChild($element);
2024
2025
            } else {
2026
                $element = $this->insertElement($token);
0 ignored issues
show
Unused Code introduced by
$element is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
2027
            }
2028
2029
            /* Switch the tokeniser's content model flag  to the RCDATA state. */
2030
            return HTML5::RCDATA;
2031
2032
            /* A start tag with the tag name "style" */
2033
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
2034
            /* Create an element for the token and append the new element to the
2035
            node pointed to by the head element pointer, or, if that is null
2036
            (innerHTML case), to the current node. */
2037 View Code Duplication
            if ($this->head_pointer !== null) {
2038
                $element = $this->insertElement($token, false);
2039
                $this->head_pointer->appendChild($element);
2040
2041
            } else {
2042
                $this->insertElement($token);
2043
            }
2044
2045
            /* Switch the tokeniser's content model flag  to the CDATA state. */
2046
            return HTML5::CDATA;
2047
2048
            /* A start tag with the tag name "script" */
2049
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
2050
            /* Create an element for the token. */
2051
            $element = $this->insertElement($token, false);
2052
            $this->head_pointer->appendChild($element);
2053
2054
            /* Switch the tokeniser's content model flag  to the CDATA state. */
2055
            return HTML5::CDATA;
2056
2057
            /* A start tag with the tag name "base", "link", or "meta" */
2058
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
2059
                $token['name'],
2060
                array('base', 'link', 'meta')
2061
            )
2062
        ) {
2063
            /* Create an element for the token and append the new element to the
2064
            node pointed to by the head element pointer, or, if that is null
2065
            (innerHTML case), to the current node. */
2066 View Code Duplication
            if ($this->head_pointer !== null) {
2067
                $element = $this->insertElement($token, false);
2068
                $this->head_pointer->appendChild($element);
2069
                array_pop($this->stack);
2070
2071
            } else {
2072
                $this->insertElement($token);
2073
            }
2074
2075
            /* An end tag with the tag name "head" */
2076 View Code Duplication
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
2077
            /* If the current node is a head element, pop the current node off
2078
            the stack of open elements. */
2079
            if ($this->head_pointer->isSameNode(end($this->stack))) {
2080
                array_pop($this->stack);
2081
2082
                /* Otherwise, this is a parse error. */
2083
            } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
2084
                // k
2085
            }
2086
2087
            /* Change the insertion mode to "after head". */
2088
            $this->mode = self::AFTER_HEAD;
2089
2090
            /* A start tag with the tag name "head" or an end tag except "html". */
2091
        } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2092
            ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')
2093
        ) {
2094
            // Parse error. Ignore the token.
2095
2096
            /* Anything else */
2097
        } else {
2098
            /* If the current node is a head element, act as if an end tag
2099
            token with the tag name "head" had been seen. */
2100
            if ($this->head_pointer->isSameNode(end($this->stack))) {
2101
                $this->inHead(
2102
                    array(
2103
                        'name' => 'head',
2104
                        'type' => HTML5::ENDTAG
2105
                    )
2106
                );
2107
2108
                /* Otherwise, change the insertion mode to "after head". */
2109
            } else {
2110
                $this->mode = self::AFTER_HEAD;
2111
            }
2112
2113
            /* Then, reprocess the current token. */
2114
            return $this->afterHead($token);
2115
        }
2116
        return null;
2117
    }
2118
2119
    private function afterHead($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
2120
    {
2121
        /* Handle the token as follows: */
2122
2123
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2124
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2125
        or U+0020 SPACE */
2126
        if ($token['type'] === HTML5::CHARACTR &&
2127
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
2128
        ) {
2129
            /* Append the character to the current node. */
2130
            $this->insertText($token['data']);
2131
2132
            /* A comment token */
2133
        } elseif ($token['type'] === HTML5::COMMENT) {
2134
            /* Append a Comment node to the current node with the data attribute
2135
            set to the data given in the comment token. */
2136
            $this->insertComment($token['data']);
2137
2138
            /* A start tag token with the tag name "body" */
2139
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
2140
            /* Insert a body element for the token. */
2141
            $this->insertElement($token);
2142
2143
            /* Change the insertion mode to "in body". */
2144
            $this->mode = self::IN_BODY;
2145
2146
            /* A start tag token with the tag name "frameset" */
2147
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
2148
            /* Insert a frameset element for the token. */
2149
            $this->insertElement($token);
2150
2151
            /* Change the insertion mode to "in frameset". */
2152
            $this->mode = self::IN_FRAME;
2153
2154
            /* A start tag token whose tag name is one of: "base", "link", "meta",
2155
            "script", "style", "title" */
2156
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
2157
                $token['name'],
2158
                array('base', 'link', 'meta', 'script', 'style', 'title')
2159
            )
2160
        ) {
2161
            /* Parse error. Switch the insertion mode back to "in head" and
2162
            reprocess the token. */
2163
            $this->mode = self::IN_HEAD;
2164
            return $this->inHead($token);
2165
2166
            /* Anything else */
2167
        } else {
2168
            /* Act as if a start tag token with the tag name "body" and no
2169
            attributes had been seen, and then reprocess the current token. */
2170
            $this->afterHead(
2171
                array(
2172
                    'name' => 'body',
2173
                    'type' => HTML5::STARTTAG,
2174
                    'attr' => array()
2175
                )
2176
            );
2177
2178
            return $this->inBody($token);
2179
        }
2180
        return null;
2181
    }
2182
2183
    private function inBody($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
2184
    {
2185
        /* Handle the token as follows: */
2186
2187
        switch ($token['type']) {
2188
            /* A character token */
2189
            case HTML5::CHARACTR:
2190
                /* Reconstruct the active formatting elements, if any. */
2191
                $this->reconstructActiveFormattingElements();
2192
2193
                /* Append the token's character to the current node. */
2194
                $this->insertText($token['data']);
2195
                break;
2196
2197
            /* A comment token */
2198
            case HTML5::COMMENT:
2199
                /* Append a Comment node to the current node with the data
2200
                attribute set to the data given in the comment token. */
2201
                $this->insertComment($token['data']);
2202
                break;
2203
2204
            case HTML5::STARTTAG:
2205
                switch ($token['name']) {
2206
                    /* A start tag token whose tag name is one of: "script",
2207
                    "style" */
2208
                    case 'script':
2209
                    case 'style':
2210
                        /* Process the token as if the insertion mode had been "in
2211
                        head". */
2212
                        return $this->inHead($token);
2213
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2214
2215
                    /* A start tag token whose tag name is one of: "base", "link",
2216
                    "meta", "title" */
2217
                    case 'base':
2218
                    case 'link':
2219
                    case 'meta':
2220
                    case 'title':
2221
                        /* Parse error. Process the token as if the insertion mode
2222
                        had    been "in head". */
2223
                        return $this->inHead($token);
2224
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2225
2226
                    /* A start tag token with the tag name "body" */
2227
                    case 'body':
2228
                        /* Parse error. If the second element on the stack of open
2229
                        elements is not a body element, or, if the stack of open
2230
                        elements has only one node on it, then ignore the token.
2231
                        (innerHTML case) */
2232
                        if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2233
                            // Ignore
2234
2235
                            /* Otherwise, for each attribute on the token, check to see
2236
                            if the attribute is already present on the body element (the
2237
                            second element)    on the stack of open elements. If it is not,
2238
                            add the attribute and its corresponding value to that
2239
                            element. */
2240
                        } else {
2241
                            foreach ($token['attr'] as $attr) {
2242
                                if (!$this->stack[1]->hasAttribute($attr['name'])) {
2243
                                    $this->stack[1]->setAttribute($attr['name'], $attr['value']);
2244
                                }
2245
                            }
2246
                        }
2247
                        break;
2248
2249
                    /* A start tag whose tag name is one of: "address",
2250
                    "blockquote", "center", "dir", "div", "dl", "fieldset",
2251
                    "listing", "menu", "ol", "p", "ul" */
2252
                    case 'address':
2253
                    case 'blockquote':
2254
                    case 'center':
2255
                    case 'dir':
2256
                    case 'div':
2257
                    case 'dl':
2258
                    case 'fieldset':
2259
                    case 'listing':
2260
                    case 'menu':
2261
                    case 'ol':
2262
                    case 'p':
2263
                    case 'ul':
2264
                        /* If the stack of open elements has a p element in scope,
2265
                        then act as if an end tag with the tag name p had been
2266
                        seen. */
2267
                        if ($this->elementInScope('p')) {
2268
                            $this->emitToken(
2269
                                array(
2270
                                    'name' => 'p',
2271
                                    'type' => HTML5::ENDTAG
2272
                                )
2273
                            );
2274
                        }
2275
2276
                        /* Insert an HTML element for the token. */
2277
                        $this->insertElement($token);
2278
                        break;
2279
2280
                    /* A start tag whose tag name is "form" */
2281
                    case 'form':
2282
                        /* If the form element pointer is not null, ignore the
2283
                        token with a parse error. */
2284
                        if ($this->form_pointer !== null) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2285
                            // Ignore.
2286
2287
                            /* Otherwise: */
2288
                        } else {
2289
                            /* If the stack of open elements has a p element in
2290
                            scope, then act as if an end tag with the tag name p
2291
                            had been seen. */
2292
                            if ($this->elementInScope('p')) {
2293
                                $this->emitToken(
2294
                                    array(
2295
                                        'name' => 'p',
2296
                                        'type' => HTML5::ENDTAG
2297
                                    )
2298
                                );
2299
                            }
2300
2301
                            /* Insert an HTML element for the token, and set the
2302
                            form element pointer to point to the element created. */
2303
                            $element = $this->insertElement($token);
2304
                            $this->form_pointer = $element;
2305
                        }
2306
                        break;
2307
2308
                    /* A start tag whose tag name is "li", "dd" or "dt" */
2309
                    case 'li':
2310
                    case 'dd':
2311
                    case 'dt':
2312
                        /* If the stack of open elements has a p  element in scope,
2313
                        then act as if an end tag with the tag name p had been
2314
                        seen. */
2315
                        if ($this->elementInScope('p')) {
2316
                            $this->emitToken(
2317
                                array(
2318
                                    'name' => 'p',
2319
                                    'type' => HTML5::ENDTAG
2320
                                )
2321
                            );
2322
                        }
2323
2324
                        $stack_length = count($this->stack) - 1;
2325
2326
                        for ($n = $stack_length; 0 <= $n; $n--) {
2327
                            /* 1. Initialise node to be the current node (the
2328
                            bottommost node of the stack). */
2329
                            $stop = false;
0 ignored issues
show
Unused Code introduced by
$stop is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
2330
                            $node = $this->stack[$n];
2331
                            $cat = $this->getElementCategory($node->tagName);
2332
2333
                            /* 2. If node is an li, dd or dt element, then pop all
2334
                            the    nodes from the current node up to node, including
2335
                            node, then stop this algorithm. */
2336
                            if ($token['name'] === $node->tagName || ($token['name'] !== 'li'
2337
                                    && ($node->tagName === 'dd' || $node->tagName === 'dt'))
2338
                            ) {
2339
                                for ($x = $stack_length; $x >= $n; $x--) {
2340
                                    array_pop($this->stack);
2341
                                }
2342
2343
                                break;
2344
                            }
2345
2346
                            /* 3. If node is not in the formatting category, and is
2347
                            not    in the phrasing category, and is not an address or
2348
                            div element, then stop this algorithm. */
2349
                            if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
2350
                                $node->tagName !== 'address' && $node->tagName !== 'div'
2351
                            ) {
2352
                                break;
2353
                            }
2354
                        }
2355
2356
                        /* Finally, insert an HTML element with the same tag
2357
                        name as the    token's. */
2358
                        $this->insertElement($token);
2359
                        break;
2360
2361
                    /* A start tag token whose tag name is "plaintext" */
2362 View Code Duplication
                    case 'plaintext':
2363
                        /* If the stack of open elements has a p  element in scope,
2364
                        then act as if an end tag with the tag name p had been
2365
                        seen. */
2366
                        if ($this->elementInScope('p')) {
2367
                            $this->emitToken(
2368
                                array(
2369
                                    'name' => 'p',
2370
                                    'type' => HTML5::ENDTAG
2371
                                )
2372
                            );
2373
                        }
2374
2375
                        /* Insert an HTML element for the token. */
2376
                        $this->insertElement($token);
2377
2378
                        return HTML5::PLAINTEXT;
2379
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2380
2381
                    /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
2382
                    "h5", "h6" */
2383
                    case 'h1':
2384
                    case 'h2':
2385
                    case 'h3':
2386
                    case 'h4':
2387
                    case 'h5':
2388
                    case 'h6':
2389
                        /* If the stack of open elements has a p  element in scope,
2390
                        then act as if an end tag with the tag name p had been seen. */
2391
                        if ($this->elementInScope('p')) {
2392
                            $this->emitToken(
2393
                                array(
2394
                                    'name' => 'p',
2395
                                    'type' => HTML5::ENDTAG
2396
                                )
2397
                            );
2398
                        }
2399
2400
                        /* If the stack of open elements has in scope an element whose
2401
                        tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
2402
                        this is a parse error; pop elements from the stack until an
2403
                        element with one of those tag names has been popped from the
2404
                        stack. */
2405
                        while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
2406
                            array_pop($this->stack);
2407
                        }
2408
2409
                        /* Insert an HTML element for the token. */
2410
                        $this->insertElement($token);
2411
                        break;
2412
2413
                    /* A start tag whose tag name is "a" */
2414
                    case 'a':
2415
                        /* If the list of active formatting elements contains
2416
                        an element whose tag name is "a" between the end of the
2417
                        list and the last marker on the list (or the start of
2418
                        the list if there is no marker on the list), then this
2419
                        is a parse error; act as if an end tag with the tag name
2420
                        "a" had been seen, then remove that element from the list
2421
                        of active formatting elements and the stack of open
2422
                        elements if the end tag didn't already remove it (it
2423
                        might not have if the element is not in table scope). */
2424
                        $leng = count($this->a_formatting);
2425
2426
                        for ($n = $leng - 1; $n >= 0; $n--) {
2427
                            if ($this->a_formatting[$n] === self::MARKER) {
2428
                                break;
2429
2430
                            } elseif ($this->a_formatting[$n]->nodeName === 'a') {
2431
                                $this->emitToken(
2432
                                    array(
2433
                                        'name' => 'a',
2434
                                        'type' => HTML5::ENDTAG
2435
                                    )
2436
                                );
2437
                                break;
2438
                            }
2439
                        }
2440
2441
                        /* Reconstruct the active formatting elements, if any. */
2442
                        $this->reconstructActiveFormattingElements();
2443
2444
                        /* Insert an HTML element for the token. */
2445
                        $el = $this->insertElement($token);
2446
2447
                        /* Add that element to the list of active formatting
2448
                        elements. */
2449
                        $this->a_formatting[] = $el;
2450
                        break;
2451
2452
                    /* A start tag whose tag name is one of: "b", "big", "em", "font",
2453
                    "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
2454
                    case 'b':
2455
                    case 'big':
2456
                    case 'em':
2457
                    case 'font':
2458
                    case 'i':
2459
                    case 'nobr':
2460
                    case 's':
2461
                    case 'small':
2462
                    case 'strike':
2463
                    case 'strong':
2464
                    case 'tt':
2465
                    case 'u':
2466
                        /* Reconstruct the active formatting elements, if any. */
2467
                        $this->reconstructActiveFormattingElements();
2468
2469
                        /* Insert an HTML element for the token. */
2470
                        $el = $this->insertElement($token);
2471
2472
                        /* Add that element to the list of active formatting
2473
                        elements. */
2474
                        $this->a_formatting[] = $el;
2475
                        break;
2476
2477
                    /* A start tag token whose tag name is "button" */
2478
                    case 'button':
2479
                        /* If the stack of open elements has a button element in scope,
2480
                        then this is a parse error; act as if an end tag with the tag
2481
                        name "button" had been seen, then reprocess the token. (We don't
2482
                        do that. Unnecessary.) */
2483
                        if ($this->elementInScope('button')) {
2484
                            $this->inBody(
2485
                                array(
2486
                                    'name' => 'button',
2487
                                    'type' => HTML5::ENDTAG
2488
                                )
2489
                            );
2490
                        }
2491
2492
                        /* Reconstruct the active formatting elements, if any. */
2493
                        $this->reconstructActiveFormattingElements();
2494
2495
                        /* Insert an HTML element for the token. */
2496
                        $this->insertElement($token);
2497
2498
                        /* Insert a marker at the end of the list of active
2499
                        formatting elements. */
2500
                        $this->a_formatting[] = self::MARKER;
2501
                        break;
2502
2503
                    /* A start tag token whose tag name is one of: "marquee", "object" */
2504
                    case 'marquee':
2505
                    case 'object':
2506
                        /* Reconstruct the active formatting elements, if any. */
2507
                        $this->reconstructActiveFormattingElements();
2508
2509
                        /* Insert an HTML element for the token. */
2510
                        $this->insertElement($token);
2511
2512
                        /* Insert a marker at the end of the list of active
2513
                        formatting elements. */
2514
                        $this->a_formatting[] = self::MARKER;
2515
                        break;
2516
2517
                    /* A start tag token whose tag name is "xmp" */
2518
                    case 'xmp':
2519
                        /* Reconstruct the active formatting elements, if any. */
2520
                        $this->reconstructActiveFormattingElements();
2521
2522
                        /* Insert an HTML element for the token. */
2523
                        $this->insertElement($token);
2524
2525
                        /* Switch the content model flag to the CDATA state. */
2526
                        return HTML5::CDATA;
2527
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2528
2529
                    /* A start tag whose tag name is "table" */
2530 View Code Duplication
                    case 'table':
2531
                        /* If the stack of open elements has a p element in scope,
2532
                        then act as if an end tag with the tag name p had been seen. */
2533
                        if ($this->elementInScope('p')) {
2534
                            $this->emitToken(
2535
                                array(
2536
                                    'name' => 'p',
2537
                                    'type' => HTML5::ENDTAG
2538
                                )
2539
                            );
2540
                        }
2541
2542
                        /* Insert an HTML element for the token. */
2543
                        $this->insertElement($token);
2544
2545
                        /* Change the insertion mode to "in table". */
2546
                        $this->mode = self::IN_TABLE;
2547
                        break;
2548
2549
                    /* A start tag whose tag name is one of: "area", "basefont",
2550
                    "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
2551
                    case 'area':
2552
                    case 'basefont':
2553
                    case 'bgsound':
2554
                    case 'br':
2555
                    case 'embed':
2556
                    case 'img':
2557
                    case 'param':
2558
                    case 'spacer':
2559
                    case 'wbr':
2560
                        /* Reconstruct the active formatting elements, if any. */
2561
                        $this->reconstructActiveFormattingElements();
2562
2563
                        /* Insert an HTML element for the token. */
2564
                        $this->insertElement($token);
2565
2566
                        /* Immediately pop the current node off the stack of open elements. */
2567
                        array_pop($this->stack);
2568
                        break;
2569
2570
                    /* A start tag whose tag name is "hr" */
2571 View Code Duplication
                    case 'hr':
2572
                        /* If the stack of open elements has a p element in scope,
2573
                        then act as if an end tag with the tag name p had been seen. */
2574
                        if ($this->elementInScope('p')) {
2575
                            $this->emitToken(
2576
                                array(
2577
                                    'name' => 'p',
2578
                                    'type' => HTML5::ENDTAG
2579
                                )
2580
                            );
2581
                        }
2582
2583
                        /* Insert an HTML element for the token. */
2584
                        $this->insertElement($token);
2585
2586
                        /* Immediately pop the current node off the stack of open elements. */
2587
                        array_pop($this->stack);
2588
                        break;
2589
2590
                    /* A start tag whose tag name is "image" */
2591
                    case 'image':
2592
                        /* Parse error. Change the token's tag name to "img" and
2593
                        reprocess it. (Don't ask.) */
2594
                        $token['name'] = 'img';
2595
                        return $this->inBody($token);
2596
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2597
2598
                    /* A start tag whose tag name is "input" */
2599
                    case 'input':
2600
                        /* Reconstruct the active formatting elements, if any. */
2601
                        $this->reconstructActiveFormattingElements();
2602
2603
                        /* Insert an input element for the token. */
2604
                        $element = $this->insertElement($token, false);
2605
2606
                        /* If the form element pointer is not null, then associate the
2607
                        input element with the form element pointed to by the form
2608
                        element pointer. */
2609
                        $this->form_pointer !== null
2610
                            ? $this->form_pointer->appendChild($element)
2611
                            : end($this->stack)->appendChild($element);
2612
2613
                        /* Pop that input element off the stack of open elements. */
2614
                        array_pop($this->stack);
2615
                        break;
2616
2617
                    /* A start tag whose tag name is "isindex" */
2618
                    case 'isindex':
2619
                        /* Parse error. */
2620
                        // w/e
2621
2622
                        /* If the form element pointer is not null,
2623
                        then ignore the token. */
2624
                        if ($this->form_pointer === null) {
2625
                            /* Act as if a start tag token with the tag name "form" had
2626
                            been seen. */
2627
                            $this->inBody(
2628
                                array(
2629
                                    'name' => 'body',
2630
                                    'type' => HTML5::STARTTAG,
2631
                                    'attr' => array()
2632
                                )
2633
                            );
2634
2635
                            /* Act as if a start tag token with the tag name "hr" had
2636
                            been seen. */
2637
                            $this->inBody(
2638
                                array(
2639
                                    'name' => 'hr',
2640
                                    'type' => HTML5::STARTTAG,
2641
                                    'attr' => array()
2642
                                )
2643
                            );
2644
2645
                            /* Act as if a start tag token with the tag name "p" had
2646
                            been seen. */
2647
                            $this->inBody(
2648
                                array(
2649
                                    'name' => 'p',
2650
                                    'type' => HTML5::STARTTAG,
2651
                                    'attr' => array()
2652
                                )
2653
                            );
2654
2655
                            /* Act as if a start tag token with the tag name "label"
2656
                            had been seen. */
2657
                            $this->inBody(
2658
                                array(
2659
                                    'name' => 'label',
2660
                                    'type' => HTML5::STARTTAG,
2661
                                    'attr' => array()
2662
                                )
2663
                            );
2664
2665
                            /* Act as if a stream of character tokens had been seen. */
2666
                            $this->insertText(
2667
                                'This is a searchable index. ' .
2668
                                'Insert your search keywords here: '
2669
                            );
2670
2671
                            /* Act as if a start tag token with the tag name "input"
2672
                            had been seen, with all the attributes from the "isindex"
2673
                            token, except with the "name" attribute set to the value
2674
                            "isindex" (ignoring any explicit "name" attribute). */
2675
                            $attr = $token['attr'];
2676
                            $attr[] = array('name' => 'name', 'value' => 'isindex');
2677
2678
                            $this->inBody(
2679
                                array(
2680
                                    'name' => 'input',
2681
                                    'type' => HTML5::STARTTAG,
2682
                                    'attr' => $attr
2683
                                )
2684
                            );
2685
2686
                            /* Act as if a stream of character tokens had been seen
2687
                            (see below for what they should say). */
2688
                            $this->insertText(
2689
                                'This is a searchable index. ' .
2690
                                'Insert your search keywords here: '
2691
                            );
2692
2693
                            /* Act as if an end tag token with the tag name "label"
2694
                            had been seen. */
2695
                            $this->inBody(
2696
                                array(
2697
                                    'name' => 'label',
2698
                                    'type' => HTML5::ENDTAG
2699
                                )
2700
                            );
2701
2702
                            /* Act as if an end tag token with the tag name "p" had
2703
                            been seen. */
2704
                            $this->inBody(
2705
                                array(
2706
                                    'name' => 'p',
2707
                                    'type' => HTML5::ENDTAG
2708
                                )
2709
                            );
2710
2711
                            /* Act as if a start tag token with the tag name "hr" had
2712
                            been seen. */
2713
                            $this->inBody(
2714
                                array(
2715
                                    'name' => 'hr',
2716
                                    'type' => HTML5::ENDTAG
2717
                                )
2718
                            );
2719
2720
                            /* Act as if an end tag token with the tag name "form" had
2721
                            been seen. */
2722
                            $this->inBody(
2723
                                array(
2724
                                    'name' => 'form',
2725
                                    'type' => HTML5::ENDTAG
2726
                                )
2727
                            );
2728
                        }
2729
                        break;
2730
2731
                    /* A start tag whose tag name is "textarea" */
2732
                    case 'textarea':
2733
                        $this->insertElement($token);
2734
2735
                        /* Switch the tokeniser's content model flag to the
2736
                        RCDATA state. */
2737
                        return HTML5::RCDATA;
2738
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2739
2740
                    /* A start tag whose tag name is one of: "iframe", "noembed",
2741
                    "noframes" */
2742
                    case 'iframe':
2743
                    case 'noembed':
2744
                    case 'noframes':
2745
                        $this->insertElement($token);
2746
2747
                        /* Switch the tokeniser's content model flag to the CDATA state. */
2748
                        return HTML5::CDATA;
2749
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2750
2751
                    /* A start tag whose tag name is "select" */
2752
                    case 'select':
2753
                        /* Reconstruct the active formatting elements, if any. */
2754
                        $this->reconstructActiveFormattingElements();
2755
2756
                        /* Insert an HTML element for the token. */
2757
                        $this->insertElement($token);
2758
2759
                        /* Change the insertion mode to "in select". */
2760
                        $this->mode = self::IN_SELECT;
2761
                        break;
2762
2763
                    /* A start or end tag whose tag name is one of: "caption", "col",
2764
                    "colgroup", "frame", "frameset", "head", "option", "optgroup",
2765
                    "tbody", "td", "tfoot", "th", "thead", "tr". */
2766
                    case 'caption':
2767
                    case 'col':
2768
                    case 'colgroup':
2769
                    case 'frame':
2770
                    case 'frameset':
2771
                    case 'head':
2772
                    case 'option':
2773
                    case 'optgroup':
2774
                    case 'tbody':
2775
                    case 'td':
2776
                    case 'tfoot':
2777
                    case 'th':
2778
                    case 'thead':
2779
                    case 'tr':
2780
                        // Parse error. Ignore the token.
2781
                        break;
2782
2783
                    /* A start or end tag whose tag name is one of: "event-source",
2784
                    "section", "nav", "article", "aside", "header", "footer",
2785
                    "datagrid", "command" */
2786
                    case 'event-source':
2787
                    case 'section':
2788
                    case 'nav':
2789
                    case 'article':
2790
                    case 'aside':
2791
                    case 'header':
2792
                    case 'footer':
2793
                    case 'datagrid':
2794
                    case 'command':
2795
                        // Work in progress!
2796
                        break;
2797
2798
                    /* A start tag token not covered by the previous entries */
2799
                    default:
2800
                        /* Reconstruct the active formatting elements, if any. */
2801
                        $this->reconstructActiveFormattingElements();
2802
2803
                        $this->insertElement($token, true, true);
2804
                        break;
2805
                }
2806
                break;
2807
2808
            case HTML5::ENDTAG:
2809
                switch ($token['name']) {
2810
                    /* An end tag with the tag name "body" */
2811
                    case 'body':
2812
                        /* If the second element in the stack of open elements is
2813
                        not a body element, this is a parse error. Ignore the token.
2814
                        (innerHTML case) */
2815
                        if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2816
                            // Ignore.
2817
2818
                            /* If the current node is not the body element, then this
2819
                            is a parse error. */
2820
                        } elseif (end($this->stack)->nodeName !== 'body') {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2821
                            // Parse error.
2822
                        }
2823
2824
                        /* Change the insertion mode to "after body". */
2825
                        $this->mode = self::AFTER_BODY;
2826
                        break;
2827
2828
                    /* An end tag with the tag name "html" */
2829
                    case 'html':
2830
                        /* Act as if an end tag with tag name "body" had been seen,
2831
                        then, if that token wasn't ignored, reprocess the current
2832
                        token. */
2833
                        $this->inBody(
2834
                            array(
2835
                                'name' => 'body',
2836
                                'type' => HTML5::ENDTAG
2837
                            )
2838
                        );
2839
2840
                        return $this->afterBody($token);
2841
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2842
2843
                    /* An end tag whose tag name is one of: "address", "blockquote",
2844
                    "center", "dir", "div", "dl", "fieldset", "listing", "menu",
2845
                    "ol", "pre", "ul" */
2846
                    case 'address':
2847
                    case 'blockquote':
2848
                    case 'center':
2849
                    case 'dir':
2850
                    case 'div':
2851
                    case 'dl':
2852
                    case 'fieldset':
2853
                    case 'listing':
2854
                    case 'menu':
2855
                    case 'ol':
2856
                    case 'pre':
2857 View Code Duplication
                    case 'ul':
2858
                        /* If the stack of open elements has an element in scope
2859
                        with the same tag name as that of the token, then generate
2860
                        implied end tags. */
2861
                        if ($this->elementInScope($token['name'])) {
2862
                            $this->generateImpliedEndTags();
2863
2864
                            /* Now, if the current node is not an element with
2865
                            the same tag name as that of the token, then this
2866
                            is a parse error. */
2867
                            // w/e
2868
2869
                            /* If the stack of open elements has an element in
2870
                            scope with the same tag name as that of the token,
2871
                            then pop elements from this stack until an element
2872
                            with that tag name has been popped from the stack. */
2873
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2874
                                if ($this->stack[$n]->nodeName === $token['name']) {
2875
                                    $n = -1;
2876
                                }
2877
2878
                                array_pop($this->stack);
2879
                            }
2880
                        }
2881
                        break;
2882
2883
                    /* An end tag whose tag name is "form" */
2884
                    case 'form':
2885
                        /* If the stack of open elements has an element in scope
2886
                        with the same tag name as that of the token, then generate
2887
                        implied    end tags. */
2888
                        if ($this->elementInScope($token['name'])) {
2889
                            $this->generateImpliedEndTags();
2890
2891
                        }
2892
2893
                        if (end($this->stack)->nodeName !== $token['name']) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2894
                            /* Now, if the current node is not an element with the
2895
                            same tag name as that of the token, then this is a parse
2896
                            error. */
2897
                            // w/e
2898
2899
                        } else {
2900
                            /* Otherwise, if the current node is an element with
2901
                            the same tag name as that of the token pop that element
2902
                            from the stack. */
2903
                            array_pop($this->stack);
2904
                        }
2905
2906
                        /* In any case, set the form element pointer to null. */
2907
                        $this->form_pointer = null;
2908
                        break;
2909
2910
                    /* An end tag whose tag name is "p" */
2911
                    case 'p':
2912
                        /* If the stack of open elements has a p element in scope,
2913
                        then generate implied end tags, except for p elements. */
2914
                        if ($this->elementInScope('p')) {
2915
                            $this->generateImpliedEndTags(array('p'));
2916
2917
                            /* If the current node is not a p element, then this is
2918
                            a parse error. */
2919
                            // k
2920
2921
                            /* If the stack of open elements has a p element in
2922
                            scope, then pop elements from this stack until the stack
2923
                            no longer has a p element in scope. */
2924
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2925
                                if ($this->elementInScope('p')) {
2926
                                    array_pop($this->stack);
2927
2928
                                } else {
2929
                                    break;
2930
                                }
2931
                            }
2932
                        }
2933
                        break;
2934
2935
                    /* An end tag whose tag name is "dd", "dt", or "li" */
2936
                    case 'dd':
2937
                    case 'dt':
2938 View Code Duplication
                    case 'li':
2939
                        /* If the stack of open elements has an element in scope
2940
                        whose tag name matches the tag name of the token, then
2941
                        generate implied end tags, except for elements with the
2942
                        same tag name as the token. */
2943
                        if ($this->elementInScope($token['name'])) {
2944
                            $this->generateImpliedEndTags(array($token['name']));
2945
2946
                            /* If the current node is not an element with the same
2947
                            tag name as the token, then this is a parse error. */
2948
                            // w/e
2949
2950
                            /* If the stack of open elements has an element in scope
2951
                            whose tag name matches the tag name of the token, then
2952
                            pop elements from this stack until an element with that
2953
                            tag name has been popped from the stack. */
2954
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2955
                                if ($this->stack[$n]->nodeName === $token['name']) {
2956
                                    $n = -1;
2957
                                }
2958
2959
                                array_pop($this->stack);
2960
                            }
2961
                        }
2962
                        break;
2963
2964
                    /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
2965
                    "h5", "h6" */
2966
                    case 'h1':
2967
                    case 'h2':
2968
                    case 'h3':
2969
                    case 'h4':
2970
                    case 'h5':
2971
                    case 'h6':
2972
                        $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
2973
2974
                        /* If the stack of open elements has in scope an element whose
2975
                        tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
2976
                        generate implied end tags. */
2977
                        if ($this->elementInScope($elements)) {
2978
                            $this->generateImpliedEndTags();
2979
2980
                            /* Now, if the current node is not an element with the same
2981
                            tag name as that of the token, then this is a parse error. */
2982
                            // w/e
2983
2984
                            /* If the stack of open elements has in scope an element
2985
                            whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
2986
                            "h6", then pop elements from the stack until an element
2987
                            with one of those tag names has been popped from the stack. */
2988
                            while ($this->elementInScope($elements)) {
2989
                                array_pop($this->stack);
2990
                            }
2991
                        }
2992
                        break;
2993
2994
                    /* An end tag whose tag name is one of: "a", "b", "big", "em",
2995
                    "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
2996
                    case 'a':
2997
                    case 'b':
2998
                    case 'big':
2999
                    case 'em':
3000
                    case 'font':
3001
                    case 'i':
3002
                    case 'nobr':
3003
                    case 's':
3004
                    case 'small':
3005
                    case 'strike':
3006
                    case 'strong':
3007
                    case 'tt':
3008
                    case 'u':
3009
                        /* 1. Let the formatting element be the last element in
3010
                        the list of active formatting elements that:
3011
                            * is between the end of the list and the last scope
3012
                            marker in the list, if any, or the start of the list
3013
                            otherwise, and
3014
                            * has the same tag name as the token.
3015
                        */
3016
                        while (true) {
3017
                            for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
3018
                                if ($this->a_formatting[$a] === self::MARKER) {
3019
                                    break;
3020
3021
                                } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
3022
                                    $formatting_element = $this->a_formatting[$a];
3023
                                    $in_stack = in_array($formatting_element, $this->stack, true);
3024
                                    $fe_af_pos = $a;
3025
                                    break;
3026
                                }
3027
                            }
3028
3029
                            /* If there is no such node, or, if that node is
3030
                            also in the stack of open elements but the element
3031
                            is not in scope, then this is a parse error. Abort
3032
                            these steps. The token is ignored. */
3033
                            if (!isset($formatting_element) || ($in_stack &&
3034
                                    !$this->elementInScope($token['name']))
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name']) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
3035
                            ) {
3036
                                break;
3037
3038
                                /* Otherwise, if there is such a node, but that node
3039
                                is not in the stack of open elements, then this is a
3040
                                parse error; remove the element from the list, and
3041
                                abort these steps. */
3042
                            } elseif (isset($formatting_element) && !$in_stack) {
0 ignored issues
show
Bug introduced by
The variable $in_stack does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
3043
                                unset($this->a_formatting[$fe_af_pos]);
3044
                                $this->a_formatting = array_merge($this->a_formatting);
3045
                                break;
3046
                            }
3047
3048
                            /* 2. Let the furthest block be the topmost node in the
3049
                            stack of open elements that is lower in the stack
3050
                            than the formatting element, and is not an element in
3051
                            the phrasing or formatting categories. There might
3052
                            not be one. */
3053
                            $fe_s_pos = array_search($formatting_element, $this->stack, true);
3054
                            $length = count($this->stack);
3055
3056
                            for ($s = $fe_s_pos + 1; $s < $length; $s++) {
3057
                                $category = $this->getElementCategory($this->stack[$s]->nodeName);
3058
3059
                                if ($category !== self::PHRASING && $category !== self::FORMATTING) {
3060
                                    $furthest_block = $this->stack[$s];
3061
                                }
3062
                            }
3063
3064
                            /* 3. If there is no furthest block, then the UA must
3065
                            skip the subsequent steps and instead just pop all
3066
                            the nodes from the bottom of the stack of open
3067
                            elements, from the current node up to the formatting
3068
                            element, and remove the formatting element from the
3069
                            list of active formatting elements. */
3070
                            if (!isset($furthest_block)) {
3071
                                for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
3072
                                    array_pop($this->stack);
3073
                                }
3074
3075
                                unset($this->a_formatting[$fe_af_pos]);
3076
                                $this->a_formatting = array_merge($this->a_formatting);
3077
                                break;
3078
                            }
3079
3080
                            /* 4. Let the common ancestor be the element
3081
                            immediately above the formatting element in the stack
3082
                            of open elements. */
3083
                            $common_ancestor = $this->stack[$fe_s_pos - 1];
3084
3085
                            /* 5. If the furthest block has a parent node, then
3086
                            remove the furthest block from its parent node. */
3087
                            if ($furthest_block->parentNode !== null) {
3088
                                $furthest_block->parentNode->removeChild($furthest_block);
3089
                            }
3090
3091
                            /* 6. Let a bookmark note the position of the
3092
                            formatting element in the list of active formatting
3093
                            elements relative to the elements on either side
3094
                            of it in the list. */
3095
                            $bookmark = $fe_af_pos;
0 ignored issues
show
Bug introduced by
The variable $fe_af_pos does not seem to be defined for all execution paths leading up to this point.

If you define a variable conditionally, it can happen that it is not defined for all execution paths.

Let’s take a look at an example:

function myFunction($a) {
    switch ($a) {
        case 'foo':
            $x = 1;
            break;

        case 'bar':
            $x = 2;
            break;
    }

    // $x is potentially undefined here.
    echo $x;
}

In the above example, the variable $x is defined if you pass “foo” or “bar” as argument for $a. However, since the switch statement has no default case statement, if you pass any other value, the variable $x would be undefined.

Available Fixes

  1. Check for existence of the variable explicitly:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        if (isset($x)) { // Make sure it's always set.
            echo $x;
        }
    }
    
  2. Define a default value for the variable:

    function myFunction($a) {
        $x = ''; // Set a default which gets overridden for certain paths.
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
        }
    
        echo $x;
    }
    
  3. Add a value for the missing path:

    function myFunction($a) {
        switch ($a) {
            case 'foo':
                $x = 1;
                break;
    
            case 'bar':
                $x = 2;
                break;
    
            // We add support for the missing case.
            default:
                $x = '';
                break;
        }
    
        echo $x;
    }
    
Loading history...
3096
3097
                            /* 7. Let node and last node  be the furthest block.
3098
                            Follow these steps: */
3099
                            $node = $furthest_block;
3100
                            $last_node = $furthest_block;
3101
3102
                            while (true) {
3103
                                for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
3104
                                    /* 7.1 Let node be the element immediately
3105
                                    prior to node in the stack of open elements. */
3106
                                    $node = $this->stack[$n];
3107
3108
                                    /* 7.2 If node is not in the list of active
3109
                                    formatting elements, then remove node from
3110
                                    the stack of open elements and then go back
3111
                                    to step 1. */
3112
                                    if (!in_array($node, $this->a_formatting, true)) {
3113
                                        unset($this->stack[$n]);
3114
                                        $this->stack = array_merge($this->stack);
3115
3116
                                    } else {
3117
                                        break;
3118
                                    }
3119
                                }
3120
3121
                                /* 7.3 Otherwise, if node is the formatting
3122
                                element, then go to the next step in the overall
3123
                                algorithm. */
3124
                                if ($node === $formatting_element) {
3125
                                    break;
3126
3127
                                    /* 7.4 Otherwise, if last node is the furthest
3128
                                    block, then move the aforementioned bookmark to
3129
                                    be immediately after the node in the list of
3130
                                    active formatting elements. */
3131
                                } elseif ($last_node === $furthest_block) {
3132
                                    $bookmark = array_search($node, $this->a_formatting, true) + 1;
3133
                                }
3134
3135
                                /* 7.5 If node has any children, perform a
3136
                                shallow clone of node, replace the entry for
3137
                                node in the list of active formatting elements
3138
                                with an entry for the clone, replace the entry
3139
                                for node in the stack of open elements with an
3140
                                entry for the clone, and let node be the clone. */
3141
                                if ($node->hasChildNodes()) {
3142
                                    $clone = $node->cloneNode();
3143
                                    $s_pos = array_search($node, $this->stack, true);
3144
                                    $a_pos = array_search($node, $this->a_formatting, true);
3145
3146
                                    $this->stack[$s_pos] = $clone;
3147
                                    $this->a_formatting[$a_pos] = $clone;
3148
                                    $node = $clone;
3149
                                }
3150
3151
                                /* 7.6 Insert last node into node, first removing
3152
                                it from its previous parent node if any. */
3153
                                if ($last_node->parentNode !== null) {
3154
                                    $last_node->parentNode->removeChild($last_node);
3155
                                }
3156
3157
                                $node->appendChild($last_node);
3158
3159
                                /* 7.7 Let last node be node. */
3160
                                $last_node = $node;
3161
                            }
3162
3163
                            /* 8. Insert whatever last node ended up being in
3164
                            the previous step into the common ancestor node,
3165
                            first removing it from its previous parent node if
3166
                            any. */
3167
                            if ($last_node->parentNode !== null) {
3168
                                $last_node->parentNode->removeChild($last_node);
3169
                            }
3170
3171
                            $common_ancestor->appendChild($last_node);
3172
3173
                            /* 9. Perform a shallow clone of the formatting
3174
                            element. */
3175
                            $clone = $formatting_element->cloneNode();
3176
3177
                            /* 10. Take all of the child nodes of the furthest
3178
                            block and append them to the clone created in the
3179
                            last step. */
3180
                            while ($furthest_block->hasChildNodes()) {
3181
                                $child = $furthest_block->firstChild;
3182
                                $furthest_block->removeChild($child);
3183
                                $clone->appendChild($child);
3184
                            }
3185
3186
                            /* 11. Append that clone to the furthest block. */
3187
                            $furthest_block->appendChild($clone);
3188
3189
                            /* 12. Remove the formatting element from the list
3190
                            of active formatting elements, and insert the clone
3191
                            into the list of active formatting elements at the
3192
                            position of the aforementioned bookmark. */
3193
                            $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
3194
                            unset($this->a_formatting[$fe_af_pos]);
3195
                            $this->a_formatting = array_merge($this->a_formatting);
3196
3197
                            $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
3198
                            $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
3199
                            $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
3200
3201
                            /* 13. Remove the formatting element from the stack
3202
                            of open elements, and insert the clone into the stack
3203
                            of open elements immediately after (i.e. in a more
3204
                            deeply nested position than) the position of the
3205
                            furthest block in that stack. */
3206
                            $fe_s_pos = array_search($formatting_element, $this->stack, true);
3207
                            $fb_s_pos = array_search($furthest_block, $this->stack, true);
3208
                            unset($this->stack[$fe_s_pos]);
3209
3210
                            $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
3211
                            $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
3212
                            $this->stack = array_merge($s_part1, array($clone), $s_part2);
3213
3214
                            /* 14. Jump back to step 1 in this series of steps. */
3215
                            unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
3216
                        }
3217
                        break;
3218
3219
                    /* An end tag token whose tag name is one of: "button",
3220
                    "marquee", "object" */
3221
                    case 'button':
3222
                    case 'marquee':
3223
                    case 'object':
3224
                        /* If the stack of open elements has an element in scope whose
3225
                        tag name matches the tag name of the token, then generate implied
3226
                        tags. */
3227
                        if ($this->elementInScope($token['name'])) {
3228
                            $this->generateImpliedEndTags();
3229
3230
                            /* Now, if the current node is not an element with the same
3231
                            tag name as the token, then this is a parse error. */
3232
                            // k
3233
3234
                            /* Now, if the stack of open elements has an element in scope
3235
                            whose tag name matches the tag name of the token, then pop
3236
                            elements from the stack until that element has been popped from
3237
                            the stack, and clear the list of active formatting elements up
3238
                            to the last marker. */
3239
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3240
                                if ($this->stack[$n]->nodeName === $token['name']) {
3241
                                    $n = -1;
3242
                                }
3243
3244
                                array_pop($this->stack);
3245
                            }
3246
3247
                            $marker = end(array_keys($this->a_formatting, self::MARKER, true));
0 ignored issues
show
Bug introduced by
array_keys($this->a_form...ng, self::MARKER, true) cannot be passed to end() as the parameter $array expects a reference.
Loading history...
3248
3249
                            for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
3250
                                array_pop($this->a_formatting);
3251
                            }
3252
                        }
3253
                        break;
3254
3255
                    /* Or an end tag whose tag name is one of: "area", "basefont",
3256
                    "bgsound", "br", "embed", "hr", "iframe", "image", "img",
3257
                    "input", "isindex", "noembed", "noframes", "param", "select",
3258
                    "spacer", "table", "textarea", "wbr" */
3259
                    case 'area':
3260
                    case 'basefont':
3261
                    case 'bgsound':
3262
                    case 'br':
3263
                    case 'embed':
3264
                    case 'hr':
3265
                    case 'iframe':
3266
                    case 'image':
3267
                    case 'img':
3268
                    case 'input':
3269
                    case 'isindex':
3270
                    case 'noembed':
3271
                    case 'noframes':
3272
                    case 'param':
3273
                    case 'select':
3274
                    case 'spacer':
3275
                    case 'table':
3276
                    case 'textarea':
3277
                    case 'wbr':
3278
                        // Parse error. Ignore the token.
3279
                        break;
3280
3281
                    /* An end tag token not covered by the previous entries */
3282
                    default:
3283
                        for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3284
                            /* Initialise node to be the current node (the bottommost
3285
                            node of the stack). */
3286
                            $node = end($this->stack);
3287
3288
                            /* If node has the same tag name as the end tag token,
3289
                            then: */
3290
                            if ($token['name'] === $node->nodeName) {
3291
                                /* Generate implied end tags. */
3292
                                $this->generateImpliedEndTags();
3293
3294
                                /* If the tag name of the end tag token does not
3295
                                match the tag name of the current node, this is a
3296
                                parse error. */
3297
                                // k
3298
3299
                                /* Pop all the nodes from the current node up to
3300
                                node, including node, then stop this algorithm. */
3301
                                for ($x = count($this->stack) - $n; $x >= $n; $x--) {
3302
                                    array_pop($this->stack);
3303
                                }
3304
3305
                            } else {
3306
                                $category = $this->getElementCategory($node);
3307
3308
                                if ($category !== self::SPECIAL && $category !== self::SCOPING) {
3309
                                    /* Otherwise, if node is in neither the formatting
3310
                                    category nor the phrasing category, then this is a
3311
                                    parse error. Stop this algorithm. The end tag token
3312
                                    is ignored. */
3313
                                    return false;
3314
                                }
3315
                            }
3316
                        }
3317
                        break;
3318
                }
3319
                break;
3320
        }
3321
        return null;
3322
    }
3323
3324
    private function inTable($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
3325
    {
3326
        $clear = array('html', 'table');
3327
3328
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3329
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3330
        or U+0020 SPACE */
3331
        if ($token['type'] === HTML5::CHARACTR &&
3332
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
3333
        ) {
3334
            /* Append the character to the current node. */
3335
            $text = $this->dom->createTextNode($token['data']);
3336
            end($this->stack)->appendChild($text);
3337
3338
            /* A comment token */
3339
        } elseif ($token['type'] === HTML5::COMMENT) {
3340
            /* Append a Comment node to the current node with the data
3341
            attribute set to the data given in the comment token. */
3342
            $comment = $this->dom->createComment($token['data']);
3343
            end($this->stack)->appendChild($comment);
3344
3345
            /* A start tag whose tag name is "caption" */
3346
        } elseif ($token['type'] === HTML5::STARTTAG &&
3347
            $token['name'] === 'caption'
3348
        ) {
3349
            /* Clear the stack back to a table context. */
3350
            $this->clearStackToTableContext($clear);
3351
3352
            /* Insert a marker at the end of the list of active
3353
            formatting elements. */
3354
            $this->a_formatting[] = self::MARKER;
3355
3356
            /* Insert an HTML element for the token, then switch the
3357
            insertion mode to "in caption". */
3358
            $this->insertElement($token);
3359
            $this->mode = self::IN_CAPTION;
3360
3361
            /* A start tag whose tag name is "colgroup" */
3362
        } elseif ($token['type'] === HTML5::STARTTAG &&
3363
            $token['name'] === 'colgroup'
3364
        ) {
3365
            /* Clear the stack back to a table context. */
3366
            $this->clearStackToTableContext($clear);
3367
3368
            /* Insert an HTML element for the token, then switch the
3369
            insertion mode to "in column group". */
3370
            $this->insertElement($token);
3371
            $this->mode = self::IN_CGROUP;
3372
3373
            /* A start tag whose tag name is "col" */
3374
        } elseif ($token['type'] === HTML5::STARTTAG &&
3375
            $token['name'] === 'col'
3376
        ) {
3377
            $this->inTable(
3378
                array(
3379
                    'name' => 'colgroup',
3380
                    'type' => HTML5::STARTTAG,
3381
                    'attr' => array()
3382
                )
3383
            );
3384
3385
            $this->inColumnGroup($token);
3386
3387
            /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
3388
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3389
                $token['name'],
3390
                array('tbody', 'tfoot', 'thead')
3391
            )
3392
        ) {
3393
            /* Clear the stack back to a table context. */
3394
            $this->clearStackToTableContext($clear);
3395
3396
            /* Insert an HTML element for the token, then switch the insertion
3397
            mode to "in table body". */
3398
            $this->insertElement($token);
3399
            $this->mode = self::IN_TBODY;
3400
3401
            /* A start tag whose tag name is one of: "td", "th", "tr" */
3402
        } elseif ($token['type'] === HTML5::STARTTAG &&
3403
            in_array($token['name'], array('td', 'th', 'tr'))
3404
        ) {
3405
            /* Act as if a start tag token with the tag name "tbody" had been
3406
            seen, then reprocess the current token. */
3407
            $this->inTable(
3408
                array(
3409
                    'name' => 'tbody',
3410
                    'type' => HTML5::STARTTAG,
3411
                    'attr' => array()
3412
                )
3413
            );
3414
3415
            return $this->inTableBody($token);
3416
3417
            /* A start tag whose tag name is "table" */
3418
        } elseif ($token['type'] === HTML5::STARTTAG &&
3419
            $token['name'] === 'table'
3420
        ) {
3421
            /* Parse error. Act as if an end tag token with the tag name "table"
3422
            had been seen, then, if that token wasn't ignored, reprocess the
3423
            current token. */
3424
            $this->inTable(
3425
                array(
3426
                    'name' => 'table',
3427
                    'type' => HTML5::ENDTAG
3428
                )
3429
            );
3430
3431
            return $this->mainPhase($token);
3432
3433
            /* An end tag whose tag name is "table" */
3434 View Code Duplication
        } elseif ($token['type'] === HTML5::ENDTAG &&
3435
            $token['name'] === 'table'
3436
        ) {
3437
            /* If the stack of open elements does not have an element in table
3438
            scope with the same tag name as the token, this is a parse error.
3439
            Ignore the token. (innerHTML case) */
3440
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
3441
                return false;
3442
3443
                /* Otherwise: */
3444
            } else {
3445
                /* Generate implied end tags. */
3446
                $this->generateImpliedEndTags();
3447
3448
                /* Now, if the current node is not a table element, then this
3449
                is a parse error. */
3450
                // w/e
3451
3452
                /* Pop elements from this stack until a table element has been
3453
                popped from the stack. */
3454
                while (true) {
3455
                    $current = end($this->stack)->nodeName;
3456
                    array_pop($this->stack);
3457
3458
                    if ($current === 'table') {
3459
                        break;
3460
                    }
3461
                }
3462
3463
                /* Reset the insertion mode appropriately. */
3464
                $this->resetInsertionMode();
3465
            }
3466
3467
            /* An end tag whose tag name is one of: "body", "caption", "col",
3468
            "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
3469
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3470
                $token['name'],
3471
                array(
3472
                    'body',
3473
                    'caption',
3474
                    'col',
3475
                    'colgroup',
3476
                    'html',
3477
                    'tbody',
3478
                    'td',
3479
                    'tfoot',
3480
                    'th',
3481
                    'thead',
3482
                    'tr'
3483
                )
3484
            )
3485
        ) {
3486
            // Parse error. Ignore the token.
3487
3488
            /* Anything else */
3489
        } else {
3490
            /* Parse error. Process the token as if the insertion mode was "in
3491
            body", with the following exception: */
3492
3493
            /* If the current node is a table, tbody, tfoot, thead, or tr
3494
            element, then, whenever a node would be inserted into the current
3495
            node, it must instead be inserted into the foster parent element. */
3496
            if (in_array(
3497
                end($this->stack)->nodeName,
3498
                array('table', 'tbody', 'tfoot', 'thead', 'tr')
3499
            )
3500
            ) {
3501
                /* The foster parent element is the parent element of the last
3502
                table element in the stack of open elements, if there is a
3503
                table element and it has such a parent element. If there is no
3504
                table element in the stack of open elements (innerHTML case),
3505
                then the foster parent element is the first element in the
3506
                stack of open elements (the html  element). Otherwise, if there
3507
                is a table element in the stack of open elements, but the last
3508
                table element in the stack of open elements has no parent, or
3509
                its parent node is not an element, then the foster parent
3510
                element is the element before the last table element in the
3511
                stack of open elements. */
3512
                for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3513
                    if ($this->stack[$n]->nodeName === 'table') {
3514
                        $table = $this->stack[$n];
3515
                        break;
3516
                    }
3517
                }
3518
3519
                if (isset($table) && $table->parentNode !== null) {
3520
                    $this->foster_parent = $table->parentNode;
3521
3522
                } elseif (!isset($table)) {
3523
                    $this->foster_parent = $this->stack[0];
3524
3525
                } elseif (isset($table) && ($table->parentNode === null ||
3526
                        $table->parentNode->nodeType !== XML_ELEMENT_NODE)
3527
                ) {
3528
                    $this->foster_parent = $this->stack[$n - 1];
3529
                }
3530
            }
3531
3532
            $this->inBody($token);
3533
        }
3534
        return null;
3535
    }
3536
3537
    private function inCaption($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
3538
    {
3539
        /* An end tag whose tag name is "caption" */
3540
        if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
3541
            /* If the stack of open elements does not have an element in table
3542
            scope with the same tag name as the token, this is a parse error.
3543
            Ignore the token. (innerHTML case) */
3544
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3545
                // Ignore
3546
3547
                /* Otherwise: */
3548
            } else {
3549
                /* Generate implied end tags. */
3550
                $this->generateImpliedEndTags();
3551
3552
                /* Now, if the current node is not a caption element, then this
3553
                is a parse error. */
3554
                // w/e
3555
3556
                /* Pop elements from this stack until a caption element has
3557
                been popped from the stack. */
3558
                while (true) {
3559
                    $node = end($this->stack)->nodeName;
3560
                    array_pop($this->stack);
3561
3562
                    if ($node === 'caption') {
3563
                        break;
3564
                    }
3565
                }
3566
3567
                /* Clear the list of active formatting elements up to the last
3568
                marker. */
3569
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3570
3571
                /* Switch the insertion mode to "in table". */
3572
                $this->mode = self::IN_TABLE;
3573
            }
3574
3575
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3576
            "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
3577
            name is "table" */
3578
        } elseif (($token['type'] === HTML5::STARTTAG && in_array(
3579
                    $token['name'],
3580
                    array(
3581
                        'caption',
3582
                        'col',
3583
                        'colgroup',
3584
                        'tbody',
3585
                        'td',
3586
                        'tfoot',
3587
                        'th',
3588
                        'thead',
3589
                        'tr'
3590
                    )
3591
                )) || ($token['type'] === HTML5::ENDTAG &&
3592
                $token['name'] === 'table')
3593
        ) {
3594
            /* Parse error. Act as if an end tag with the tag name "caption"
3595
            had been seen, then, if that token wasn't ignored, reprocess the
3596
            current token. */
3597
            $this->inCaption(
3598
                array(
3599
                    'name' => 'caption',
3600
                    'type' => HTML5::ENDTAG
3601
                )
3602
            );
3603
3604
            return $this->inTable($token);
3605
3606
            /* An end tag whose tag name is one of: "body", "col", "colgroup",
3607
            "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
3608
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3609
                $token['name'],
3610
                array(
3611
                    'body',
3612
                    'col',
3613
                    'colgroup',
3614
                    'html',
3615
                    'tbody',
3616
                    'tfoot',
3617
                    'th',
3618
                    'thead',
3619
                    'tr'
3620
                )
3621
            )
3622
        ) {
3623
            // Parse error. Ignore the token.
3624
3625
            /* Anything else */
3626
        } else {
3627
            /* Process the token as if the insertion mode was "in body". */
3628
            $this->inBody($token);
3629
        }
3630
3631
        return null;}
3632
3633
    private function inColumnGroup($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
3634
    {
3635
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3636
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3637
        or U+0020 SPACE */
3638
        if ($token['type'] === HTML5::CHARACTR &&
3639
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
3640
        ) {
3641
            /* Append the character to the current node. */
3642
            $text = $this->dom->createTextNode($token['data']);
3643
            end($this->stack)->appendChild($text);
3644
3645
            /* A comment token */
3646
        } elseif ($token['type'] === HTML5::COMMENT) {
3647
            /* Append a Comment node to the current node with the data
3648
            attribute set to the data given in the comment token. */
3649
            $comment = $this->dom->createComment($token['data']);
3650
            end($this->stack)->appendChild($comment);
3651
3652
            /* A start tag whose tag name is "col" */
3653
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
3654
            /* Insert a col element for the token. Immediately pop the current
3655
            node off the stack of open elements. */
3656
            $this->insertElement($token);
3657
            array_pop($this->stack);
3658
3659
            /* An end tag whose tag name is "colgroup" */
3660 View Code Duplication
        } elseif ($token['type'] === HTML5::ENDTAG &&
3661
            $token['name'] === 'colgroup'
3662
        ) {
3663
            /* If the current node is the root html element, then this is a
3664
            parse error, ignore the token. (innerHTML case) */
3665
            if (end($this->stack)->nodeName === 'html') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3666
                // Ignore
3667
3668
                /* Otherwise, pop the current node (which will be a colgroup
3669
                element) from the stack of open elements. Switch the insertion
3670
                mode to "in table". */
3671
            } else {
3672
                array_pop($this->stack);
3673
                $this->mode = self::IN_TABLE;
3674
            }
3675
3676
            /* An end tag whose tag name is "col" */
3677
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3678
            /* Parse error. Ignore the token. */
3679
3680
            /* Anything else */
3681
        } else {
3682
            /* Act as if an end tag with the tag name "colgroup" had been seen,
3683
            and then, if that token wasn't ignored, reprocess the current token. */
3684
            $this->inColumnGroup(
3685
                array(
3686
                    'name' => 'colgroup',
3687
                    'type' => HTML5::ENDTAG
3688
                )
3689
            );
3690
3691
            return $this->inTable($token);
3692
        }
3693
        return null;
3694
    }
3695
3696
    private function inTableBody($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
3697
    {
3698
        $clear = array('tbody', 'tfoot', 'thead', 'html');
3699
3700
        /* A start tag whose tag name is "tr" */
3701
        if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
3702
            /* Clear the stack back to a table body context. */
3703
            $this->clearStackToTableContext($clear);
3704
3705
            /* Insert a tr element for the token, then switch the insertion
3706
            mode to "in row". */
3707
            $this->insertElement($token);
3708
            $this->mode = self::IN_ROW;
3709
3710
            /* A start tag whose tag name is one of: "th", "td" */
3711
        } elseif ($token['type'] === HTML5::STARTTAG &&
3712
            ($token['name'] === 'th' || $token['name'] === 'td')
3713
        ) {
3714
            /* Parse error. Act as if a start tag with the tag name "tr" had
3715
            been seen, then reprocess the current token. */
3716
            $this->inTableBody(
3717
                array(
3718
                    'name' => 'tr',
3719
                    'type' => HTML5::STARTTAG,
3720
                    'attr' => array()
3721
                )
3722
            );
3723
3724
            return $this->inRow($token);
3725
3726
            /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
3727
        } elseif ($token['type'] === HTML5::ENDTAG &&
3728
            in_array($token['name'], array('tbody', 'tfoot', 'thead'))
3729
        ) {
3730
            /* If the stack of open elements does not have an element in table
3731
            scope with the same tag name as the token, this is a parse error.
3732
            Ignore the token. */
3733 View Code Duplication
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3734
                // Ignore
3735
3736
                /* Otherwise: */
3737
            } else {
3738
                /* Clear the stack back to a table body context. */
3739
                $this->clearStackToTableContext($clear);
3740
3741
                /* Pop the current node from the stack of open elements. Switch
3742
                the insertion mode to "in table". */
3743
                array_pop($this->stack);
3744
                $this->mode = self::IN_TABLE;
3745
            }
3746
3747
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3748
            "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
3749
        } elseif (($token['type'] === HTML5::STARTTAG && in_array(
3750
                    $token['name'],
3751
                    array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead')
3752
                )) ||
3753
            ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')
3754
        ) {
3755
            /* If the stack of open elements does not have a tbody, thead, or
3756
            tfoot element in table scope, this is a parse error. Ignore the
3757
            token. (innerHTML case) */
3758
            if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(ar...thead', 'tfoot'), true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3759
                // Ignore.
3760
3761
                /* Otherwise: */
3762
            } else {
3763
                /* Clear the stack back to a table body context. */
3764
                $this->clearStackToTableContext($clear);
3765
3766
                /* Act as if an end tag with the same tag name as the current
3767
                node ("tbody", "tfoot", or "thead") had been seen, then
3768
                reprocess the current token. */
3769
                $this->inTableBody(
3770
                    array(
3771
                        'name' => end($this->stack)->nodeName,
3772
                        'type' => HTML5::ENDTAG
3773
                    )
3774
                );
3775
3776
                return $this->mainPhase($token);
3777
            }
3778
3779
            /* An end tag whose tag name is one of: "body", "caption", "col",
3780
            "colgroup", "html", "td", "th", "tr" */
3781
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3782
                $token['name'],
3783
                array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
3784
            )
3785
        ) {
3786
            /* Parse error. Ignore the token. */
3787
3788
            /* Anything else */
3789
        } else {
3790
            /* Process the token as if the insertion mode was "in table". */
3791
            $this->inTable($token);
3792
        }
3793
        return null;
3794
    }
3795
3796
    private function inRow($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
3797
    {
3798
        $clear = array('tr', 'html');
3799
3800
        /* A start tag whose tag name is one of: "th", "td" */
3801
        if ($token['type'] === HTML5::STARTTAG &&
3802
            ($token['name'] === 'th' || $token['name'] === 'td')
3803
        ) {
3804
            /* Clear the stack back to a table row context. */
3805
            $this->clearStackToTableContext($clear);
3806
3807
            /* Insert an HTML element for the token, then switch the insertion
3808
            mode to "in cell". */
3809
            $this->insertElement($token);
3810
            $this->mode = self::IN_CELL;
3811
3812
            /* Insert a marker at the end of the list of active formatting
3813
            elements. */
3814
            $this->a_formatting[] = self::MARKER;
3815
3816
            /* An end tag whose tag name is "tr" */
3817
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
3818
            /* If the stack of open elements does not have an element in table
3819
            scope with the same tag name as the token, this is a parse error.
3820
            Ignore the token. (innerHTML case) */
3821 View Code Duplication
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3822
                // Ignore.
3823
3824
                /* Otherwise: */
3825
            } else {
3826
                /* Clear the stack back to a table row context. */
3827
                $this->clearStackToTableContext($clear);
3828
3829
                /* Pop the current node (which will be a tr element) from the
3830
                stack of open elements. Switch the insertion mode to "in table
3831
                body". */
3832
                array_pop($this->stack);
3833
                $this->mode = self::IN_TBODY;
3834
            }
3835
3836
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3837
            "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
3838
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3839
                $token['name'],
3840
                array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr')
3841
            )
3842
        ) {
3843
            /* Act as if an end tag with the tag name "tr" had been seen, then,
3844
            if that token wasn't ignored, reprocess the current token. */
3845
            $this->inRow(
3846
                array(
3847
                    'name' => 'tr',
3848
                    'type' => HTML5::ENDTAG
3849
                )
3850
            );
3851
3852
            return $this->inCell($token);
3853
3854
            /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
3855 View Code Duplication
        } elseif ($token['type'] === HTML5::ENDTAG &&
3856
            in_array($token['name'], array('tbody', 'tfoot', 'thead'))
3857
        ) {
3858
            /* If the stack of open elements does not have an element in table
3859
            scope with the same tag name as the token, this is a parse error.
3860
            Ignore the token. */
3861
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3862
                // Ignore.
3863
3864
                /* Otherwise: */
3865
            } else {
3866
                /* Otherwise, act as if an end tag with the tag name "tr" had
3867
                been seen, then reprocess the current token. */
3868
                $this->inRow(
3869
                    array(
3870
                        'name' => 'tr',
3871
                        'type' => HTML5::ENDTAG
3872
                    )
3873
                );
3874
3875
                return $this->inCell($token);
3876
            }
3877
3878
            /* An end tag whose tag name is one of: "body", "caption", "col",
3879
            "colgroup", "html", "td", "th" */
3880
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3881
                $token['name'],
3882
                array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
3883
            )
3884
        ) {
3885
            /* Parse error. Ignore the token. */
3886
3887
            /* Anything else */
3888
        } else {
3889
            /* Process the token as if the insertion mode was "in table". */
3890
            $this->inTable($token);
3891
        }
3892
        return null;
3893
    }
3894
3895
    private function inCell($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
3896
    {
3897
        /* An end tag whose tag name is one of: "td", "th" */
3898
        if ($token['type'] === HTML5::ENDTAG &&
3899
            ($token['name'] === 'td' || $token['name'] === 'th')
3900
        ) {
3901
            /* If the stack of open elements does not have an element in table
3902
            scope with the same tag name as that of the token, then this is a
3903
            parse error and the token must be ignored. */
3904
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3905
                // Ignore.
3906
3907
                /* Otherwise: */
3908
            } else {
3909
                /* Generate implied end tags, except for elements with the same
3910
                tag name as the token. */
3911
                $this->generateImpliedEndTags(array($token['name']));
3912
3913
                /* Now, if the current node is not an element with the same tag
3914
                name as the token, then this is a parse error. */
3915
                // k
3916
3917
                /* Pop elements from this stack until an element with the same
3918
                tag name as the token has been popped from the stack. */
3919
                while (true) {
3920
                    $node = end($this->stack)->nodeName;
3921
                    array_pop($this->stack);
3922
3923
                    if ($node === $token['name']) {
3924
                        break;
3925
                    }
3926
                }
3927
3928
                /* Clear the list of active formatting elements up to the last
3929
                marker. */
3930
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3931
3932
                /* Switch the insertion mode to "in row". (The current node
3933
                will be a tr element at this point.) */
3934
                $this->mode = self::IN_ROW;
3935
            }
3936
3937
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3938
            "tbody", "td", "tfoot", "th", "thead", "tr" */
3939 View Code Duplication
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3940
                $token['name'],
3941
                array(
3942
                    'caption',
3943
                    'col',
3944
                    'colgroup',
3945
                    'tbody',
3946
                    'td',
3947
                    'tfoot',
3948
                    'th',
3949
                    'thead',
3950
                    'tr'
3951
                )
3952
            )
3953
        ) {
3954
            /* If the stack of open elements does not have a td or th element
3955
            in table scope, then this is a parse error; ignore the token.
3956
            (innerHTML case) */
3957
            if (!$this->elementInScope(array('td', 'th'), true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(array('td', 'th'), true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3958
                // Ignore.
3959
3960
                /* Otherwise, close the cell (see below) and reprocess the current
3961
                token. */
3962
            } else {
3963
                $this->closeCell();
3964
                return $this->inRow($token);
3965
            }
3966
3967
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3968
            "tbody", "td", "tfoot", "th", "thead", "tr" */
3969
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3970
                $token['name'],
3971
                array(
3972
                    'caption',
3973
                    'col',
3974
                    'colgroup',
3975
                    'tbody',
3976
                    'td',
3977
                    'tfoot',
3978
                    'th',
3979
                    'thead',
3980
                    'tr'
3981
                )
3982
            )
3983
        ) {
3984
            /* If the stack of open elements does not have a td or th element
3985
            in table scope, then this is a parse error; ignore the token.
3986
            (innerHTML case) */
3987
            if (!$this->elementInScope(array('td', 'th'), true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(array('td', 'th'), true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3988
                // Ignore.
3989
3990
                /* Otherwise, close the cell (see below) and reprocess the current
3991
                token. */
3992
            } else {
3993
                $this->closeCell();
3994
                return $this->inRow($token);
3995
            }
3996
3997
            /* An end tag whose tag name is one of: "body", "caption", "col",
3998
            "colgroup", "html" */
3999
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
4000
                $token['name'],
4001
                array('body', 'caption', 'col', 'colgroup', 'html')
4002
            )
4003
        ) {
4004
            /* Parse error. Ignore the token. */
4005
4006
            /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
4007
            "thead", "tr" */
4008 View Code Duplication
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
4009
                $token['name'],
4010
                array('table', 'tbody', 'tfoot', 'thead', 'tr')
4011
            )
4012
        ) {
4013
            /* If the stack of open elements does not have an element in table
4014
            scope with the same tag name as that of the token (which can only
4015
            happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
4016
            then this is a parse error and the token must be ignored. */
4017
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4018
                // Ignore.
4019
4020
                /* Otherwise, close the cell (see below) and reprocess the current
4021
                token. */
4022
            } else {
4023
                $this->closeCell();
4024
                return $this->inRow($token);
4025
            }
4026
4027
            /* Anything else */
4028
        } else {
4029
            /* Process the token as if the insertion mode was "in body". */
4030
            $this->inBody($token);
4031
        }
4032
        return null;
4033
    }
4034
4035
    private function inSelect($token)
4036
    {
4037
        /* Handle the token as follows: */
4038
4039
        /* A character token */
4040
        if ($token['type'] === HTML5::CHARACTR) {
4041
            /* Append the token's character to the current node. */
4042
            $this->insertText($token['data']);
4043
4044
            /* A comment token */
4045
        } elseif ($token['type'] === HTML5::COMMENT) {
4046
            /* Append a Comment node to the current node with the data
4047
            attribute set to the data given in the comment token. */
4048
            $this->insertComment($token['data']);
4049
4050
            /* A start tag token whose tag name is "option" */
4051
        } elseif ($token['type'] === HTML5::STARTTAG &&
4052
            $token['name'] === 'option'
4053
        ) {
4054
            /* If the current node is an option element, act as if an end tag
4055
            with the tag name "option" had been seen. */
4056 View Code Duplication
            if (end($this->stack)->nodeName === 'option') {
4057
                $this->inSelect(
4058
                    array(
4059
                        'name' => 'option',
4060
                        'type' => HTML5::ENDTAG
4061
                    )
4062
                );
4063
            }
4064
4065
            /* Insert an HTML element for the token. */
4066
            $this->insertElement($token);
4067
4068
            /* A start tag token whose tag name is "optgroup" */
4069
        } elseif ($token['type'] === HTML5::STARTTAG &&
4070
            $token['name'] === 'optgroup'
4071
        ) {
4072
            /* If the current node is an option element, act as if an end tag
4073
            with the tag name "option" had been seen. */
4074 View Code Duplication
            if (end($this->stack)->nodeName === 'option') {
4075
                $this->inSelect(
4076
                    array(
4077
                        'name' => 'option',
4078
                        'type' => HTML5::ENDTAG
4079
                    )
4080
                );
4081
            }
4082
4083
            /* If the current node is an optgroup element, act as if an end tag
4084
            with the tag name "optgroup" had been seen. */
4085 View Code Duplication
            if (end($this->stack)->nodeName === 'optgroup') {
4086
                $this->inSelect(
4087
                    array(
4088
                        'name' => 'optgroup',
4089
                        'type' => HTML5::ENDTAG
4090
                    )
4091
                );
4092
            }
4093
4094
            /* Insert an HTML element for the token. */
4095
            $this->insertElement($token);
4096
4097
            /* An end tag token whose tag name is "optgroup" */
4098
        } elseif ($token['type'] === HTML5::ENDTAG &&
4099
            $token['name'] === 'optgroup'
4100
        ) {
4101
            /* First, if the current node is an option element, and the node
4102
            immediately before it in the stack of open elements is an optgroup
4103
            element, then act as if an end tag with the tag name "option" had
4104
            been seen. */
4105
            $elements_in_stack = count($this->stack);
4106
4107
            if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
4108
                $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup'
4109
            ) {
4110
                $this->inSelect(
4111
                    array(
4112
                        'name' => 'option',
4113
                        'type' => HTML5::ENDTAG
4114
                    )
4115
                );
4116
            }
4117
4118
            /* If the current node is an optgroup element, then pop that node
4119
            from the stack of open elements. Otherwise, this is a parse error,
4120
            ignore the token. */
4121
            if ($this->stack[$elements_in_stack - 1] === 'optgroup') {
4122
                array_pop($this->stack);
4123
            }
4124
4125
            /* An end tag token whose tag name is "option" */
4126
        } elseif ($token['type'] === HTML5::ENDTAG &&
4127
            $token['name'] === 'option'
4128
        ) {
4129
            /* If the current node is an option element, then pop that node
4130
            from the stack of open elements. Otherwise, this is a parse error,
4131
            ignore the token. */
4132
            if (end($this->stack)->nodeName === 'option') {
4133
                array_pop($this->stack);
4134
            }
4135
4136
            /* An end tag whose tag name is "select" */
4137 View Code Duplication
        } elseif ($token['type'] === HTML5::ENDTAG &&
4138
            $token['name'] === 'select'
4139
        ) {
4140
            /* If the stack of open elements does not have an element in table
4141
            scope with the same tag name as the token, this is a parse error.
4142
            Ignore the token. (innerHTML case) */
4143
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4144
                // w/e
4145
4146
                /* Otherwise: */
4147
            } else {
4148
                /* Pop elements from the stack of open elements until a select
4149
                element has been popped from the stack. */
4150
                while (true) {
4151
                    $current = end($this->stack)->nodeName;
4152
                    array_pop($this->stack);
4153
4154
                    if ($current === 'select') {
4155
                        break;
4156
                    }
4157
                }
4158
4159
                /* Reset the insertion mode appropriately. */
4160
                $this->resetInsertionMode();
4161
            }
4162
4163
            /* A start tag whose tag name is "select" */
4164
        } elseif ($token['name'] === 'select' &&
4165
            $token['type'] === HTML5::STARTTAG
4166
        ) {
4167
            /* Parse error. Act as if the token had been an end tag with the
4168
            tag name "select" instead. */
4169
            $this->inSelect(
4170
                array(
4171
                    'name' => 'select',
4172
                    'type' => HTML5::ENDTAG
4173
                )
4174
            );
4175
4176
            /* An end tag whose tag name is one of: "caption", "table", "tbody",
4177
            "tfoot", "thead", "tr", "td", "th" */
4178 View Code Duplication
        } elseif (in_array(
4179
                $token['name'],
4180
                array(
4181
                    'caption',
4182
                    'table',
4183
                    'tbody',
4184
                    'tfoot',
4185
                    'thead',
4186
                    'tr',
4187
                    'td',
4188
                    'th'
4189
                )
4190
            ) && $token['type'] === HTML5::ENDTAG
4191
        ) {
4192
            /* Parse error. */
4193
            // w/e
4194
4195
            /* If the stack of open elements has an element in table scope with
4196
            the same tag name as that of the token, then act as if an end tag
4197
            with the tag name "select" had been seen, and reprocess the token.
4198
            Otherwise, ignore the token. */
4199
            if ($this->elementInScope($token['name'], true)) {
4200
                $this->inSelect(
4201
                    array(
4202
                        'name' => 'select',
4203
                        'type' => HTML5::ENDTAG
4204
                    )
4205
                );
4206
4207
                $this->mainPhase($token);
4208
            }
4209
4210
            /* Anything else */
4211
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
4212
            /* Parse error. Ignore the token. */
4213
        }
4214
    }
4215
4216
    private function afterBody($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
4217
    {
4218
        /* Handle the token as follows: */
4219
4220
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4221
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4222
        or U+0020 SPACE */
4223
        if ($token['type'] === HTML5::CHARACTR &&
4224
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4225
        ) {
4226
            /* Process the token as it would be processed if the insertion mode
4227
            was "in body". */
4228
            $this->inBody($token);
4229
4230
            /* A comment token */
4231
        } elseif ($token['type'] === HTML5::COMMENT) {
4232
            /* Append a Comment node to the first element in the stack of open
4233
            elements (the html element), with the data attribute set to the
4234
            data given in the comment token. */
4235
            $comment = $this->dom->createComment($token['data']);
4236
            $this->stack[0]->appendChild($comment);
4237
4238
            /* An end tag with the tag name "html" */
4239 View Code Duplication
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
4240
            /* If the parser was originally created in order to handle the
4241
            setting of an element's innerHTML attribute, this is a parse error;
4242
            ignore the token. (The element will be an html element in this
4243
            case.) (innerHTML case) */
4244
4245
            /* Otherwise, switch to the trailing end phase. */
4246
            $this->phase = self::END_PHASE;
4247
4248
            /* Anything else */
4249
        } else {
4250
            /* Parse error. Set the insertion mode to "in body" and reprocess
4251
            the token. */
4252
            $this->mode = self::IN_BODY;
4253
            return $this->inBody($token);
4254
        }
4255
4256
        return null;}
4257
4258
    private function inFrameset($token)
4259
    {
4260
        /* Handle the token as follows: */
4261
4262
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4263
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4264
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
4265
        if ($token['type'] === HTML5::CHARACTR &&
4266
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4267
        ) {
4268
            /* Append the character to the current node. */
4269
            $this->insertText($token['data']);
4270
4271
            /* A comment token */
4272
        } elseif ($token['type'] === HTML5::COMMENT) {
4273
            /* Append a Comment node to the current node with the data
4274
            attribute set to the data given in the comment token. */
4275
            $this->insertComment($token['data']);
4276
4277
            /* A start tag with the tag name "frameset" */
4278
        } elseif ($token['name'] === 'frameset' &&
4279
            $token['type'] === HTML5::STARTTAG
4280
        ) {
4281
            $this->insertElement($token);
4282
4283
            /* An end tag with the tag name "frameset" */
4284 View Code Duplication
        } elseif ($token['name'] === 'frameset' &&
4285
            $token['type'] === HTML5::ENDTAG
4286
        ) {
4287
            /* If the current node is the root html element, then this is a
4288
            parse error; ignore the token. (innerHTML case) */
4289
            if (end($this->stack)->nodeName === 'html') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4290
                // Ignore
4291
4292
            } else {
4293
                /* Otherwise, pop the current node from the stack of open
4294
                elements. */
4295
                array_pop($this->stack);
4296
4297
                /* If the parser was not originally created in order to handle
4298
                the setting of an element's innerHTML attribute (innerHTML case),
4299
                and the current node is no longer a frameset element, then change
4300
                the insertion mode to "after frameset". */
4301
                $this->mode = self::AFTR_FRAME;
4302
            }
4303
4304
            /* A start tag with the tag name "frame" */
4305
        } elseif ($token['name'] === 'frame' &&
4306
            $token['type'] === HTML5::STARTTAG
4307
        ) {
4308
            /* Insert an HTML element for the token. */
4309
            $this->insertElement($token);
4310
4311
            /* Immediately pop the current node off the stack of open elements. */
4312
            array_pop($this->stack);
4313
4314
            /* A start tag with the tag name "noframes" */
4315
        } elseif ($token['name'] === 'noframes' &&
4316
            $token['type'] === HTML5::STARTTAG
4317
        ) {
4318
            /* Process the token as if the insertion mode had been "in body". */
4319
            $this->inBody($token);
4320
4321
            /* Anything else */
4322
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
4323
            /* Parse error. Ignore the token. */
4324
        }
4325
    }
4326
4327
    private function afterFrameset($token)
4328
    {
4329
        /* Handle the token as follows: */
4330
4331
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4332
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4333
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
4334
        if ($token['type'] === HTML5::CHARACTR &&
4335
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4336
        ) {
4337
            /* Append the character to the current node. */
4338
            $this->insertText($token['data']);
4339
4340
            /* A comment token */
4341
        } elseif ($token['type'] === HTML5::COMMENT) {
4342
            /* Append a Comment node to the current node with the data
4343
            attribute set to the data given in the comment token. */
4344
            $this->insertComment($token['data']);
4345
4346
            /* An end tag with the tag name "html" */
4347 View Code Duplication
        } elseif ($token['name'] === 'html' &&
4348
            $token['type'] === HTML5::ENDTAG
4349
        ) {
4350
            /* Switch to the trailing end phase. */
4351
            $this->phase = self::END_PHASE;
4352
4353
            /* A start tag with the tag name "noframes" */
4354
        } elseif ($token['name'] === 'noframes' &&
4355
            $token['type'] === HTML5::STARTTAG
4356
        ) {
4357
            /* Process the token as if the insertion mode had been "in body". */
4358
            $this->inBody($token);
4359
4360
            /* Anything else */
4361
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
4362
            /* Parse error. Ignore the token. */
4363
        }
4364
    }
4365
4366
    private function trailingEndPhase($token)
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
4367
    {
4368
        /* After the main phase, as each token is emitted from the tokenisation
4369
        stage, it must be processed as described in this section. */
4370
4371
        /* A DOCTYPE token */
4372
        if ($token['type'] === HTML5::DOCTYPE) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4373
            // Parse error. Ignore the token.
4374
4375
            /* A comment token */
4376
        } elseif ($token['type'] === HTML5::COMMENT) {
4377
            /* Append a Comment node to the Document object with the data
4378
            attribute set to the data given in the comment token. */
4379
            $comment = $this->dom->createComment($token['data']);
4380
            $this->dom->appendChild($comment);
4381
4382
            /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4383
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4384
            or U+0020 SPACE */
4385
        } elseif ($token['type'] === HTML5::CHARACTR &&
4386
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4387
        ) {
4388
            /* Process the token as it would be processed in the main phase. */
4389
            $this->mainPhase($token);
4390
4391
            /* A character token that is not one of U+0009 CHARACTER TABULATION,
4392
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4393
            or U+0020 SPACE. Or a start tag token. Or an end tag token. */
4394
        } elseif (($token['type'] === HTML5::CHARACTR &&
4395
                preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
4396
            $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG
4397
        ) {
4398
            /* Parse error. Switch back to the main phase and reprocess the
4399
            token. */
4400
            $this->phase = self::MAIN_PHASE;
4401
            return $this->mainPhase($token);
4402
4403
            /* An end-of-file token */
4404
        } elseif ($token['type'] === HTML5::EOF) {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
4405
            /* OMG DONE!! */
4406
        }
4407
4408
        return null;}
4409
4410
    private function insertElement($token, $append = true, $check = false)
0 ignored issues
show
Unused Code introduced by
The parameter $append is not used and could be removed.

This check looks from parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
4411
    {
4412
        // Proprietary workaround for libxml2's limitations with tag names
4413
        if ($check) {
4414
            // Slightly modified HTML5 tag-name modification,
4415
            // removing anything that's not an ASCII letter, digit, or hyphen
4416
            $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
4417
            // Remove leading hyphens and numbers
4418
            $token['name'] = ltrim($token['name'], '-0..9');
4419
            // In theory, this should ever be needed, but just in case
4420
            if ($token['name'] === '') {
4421
                $token['name'] = 'span';
4422
            } // arbitrary generic choice
4423
        }
4424
4425
        $el = $this->dom->createElement($token['name']);
4426
4427
        foreach ($token['attr'] as $attr) {
4428
            if (!$el->hasAttribute($attr['name'])) {
4429
                $el->setAttribute($attr['name'], $attr['value']);
4430
            }
4431
        }
4432
4433
        $this->appendToRealParent($el);
4434
        $this->stack[] = $el;
4435
4436
        return $el;
4437
    }
4438
4439
    private function insertText($data)
4440
    {
4441
        $text = $this->dom->createTextNode($data);
4442
        $this->appendToRealParent($text);
4443
    }
4444
4445
    private function insertComment($data)
4446
    {
4447
        $comment = $this->dom->createComment($data);
4448
        $this->appendToRealParent($comment);
4449
    }
4450
4451
    private function appendToRealParent($node)
4452
    {
4453
        if ($this->foster_parent === null) {
4454
            end($this->stack)->appendChild($node);
4455
4456
        } elseif ($this->foster_parent !== null) {
4457
            /* If the foster parent element is the parent element of the
4458
            last table element in the stack of open elements, then the new
4459
            node must be inserted immediately before the last table element
4460
            in the stack of open elements in the foster parent element;
4461
            otherwise, the new node must be appended to the foster parent
4462
            element. */
4463
            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
4464
                if ($this->stack[$n]->nodeName === 'table' &&
4465
                    $this->stack[$n]->parentNode !== null
4466
                ) {
4467
                    $table = $this->stack[$n];
4468
                    break;
4469
                }
4470
            }
4471
4472
            if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
4473
                $this->foster_parent->insertBefore($node, $table);
4474
            } else {
4475
                $this->foster_parent->appendChild($node);
4476
            }
4477
4478
            $this->foster_parent = null;
4479
        }
4480
    }
4481
4482
    private function elementInScope($el, $table = false)
4483
    {
4484
        if (is_array($el)) {
4485
            foreach ($el as $element) {
4486
                if ($this->elementInScope($element, $table)) {
4487
                    return true;
4488
                }
4489
            }
4490
4491
            return false;
4492
        }
4493
4494
        $leng = count($this->stack);
4495
4496
        for ($n = 0; $n < $leng; $n++) {
4497
            /* 1. Initialise node to be the current node (the bottommost node of
4498
            the stack). */
4499
            $node = $this->stack[$leng - 1 - $n];
4500
4501
            if ($node->tagName === $el) {
4502
                /* 2. If node is the target node, terminate in a match state. */
4503
                return true;
4504
4505
            } elseif ($node->tagName === 'table') {
4506
                /* 3. Otherwise, if node is a table element, terminate in a failure
4507
                state. */
4508
                return false;
4509
4510
            } elseif ($table === true && in_array(
4511
                    $node->tagName,
4512
                    array(
4513
                        'caption',
4514
                        'td',
4515
                        'th',
4516
                        'button',
4517
                        'marquee',
4518
                        'object'
4519
                    )
4520
                )
4521
            ) {
4522
                /* 4. Otherwise, if the algorithm is the "has an element in scope"
4523
                variant (rather than the "has an element in table scope" variant),
4524
                and node is one of the following, terminate in a failure state. */
4525
                return false;
4526
4527
            } elseif ($node === $node->ownerDocument->documentElement) {
4528
                /* 5. Otherwise, if node is an html element (root element), terminate
4529
                in a failure state. (This can only happen if the node is the topmost
4530
                node of the    stack of open elements, and prevents the next step from
4531
                being invoked if there are no more elements in the stack.) */
4532
                return false;
4533
            }
4534
4535
            /* Otherwise, set node to the previous entry in the stack of open
4536
            elements and return to step 2. (This will never fail, since the loop
4537
            will always terminate in the previous step if the top of the stack
4538
            is reached.) */
4539
        }
4540
        return null;
4541
    }
4542
4543
    private function reconstructActiveFormattingElements()
4544
    {
4545
        /* 1. If there are no entries in the list of active formatting elements,
4546
        then there is nothing to reconstruct; stop this algorithm. */
4547
        $formatting_elements = count($this->a_formatting);
4548
4549
        if ($formatting_elements === 0) {
4550
            return false;
4551
        }
4552
4553
        /* 3. Let entry be the last (most recently added) element in the list
4554
        of active formatting elements. */
4555
        $entry = end($this->a_formatting);
4556
4557
        /* 2. If the last (most recently added) entry in the list of active
4558
        formatting elements is a marker, or if it is an element that is in the
4559
        stack of open elements, then there is nothing to reconstruct; stop this
4560
        algorithm. */
4561
        if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
4562
            return false;
4563
        }
4564
4565
        for ($a = $formatting_elements - 1; $a >= 0; true) {
4566
            /* 4. If there are no entries before entry in the list of active
4567
            formatting elements, then jump to step 8. */
4568
            if ($a === 0) {
4569
                $step_seven = false;
4570
                break;
4571
            }
4572
4573
            /* 5. Let entry be the entry one earlier than entry in the list of
4574
            active formatting elements. */
4575
            $a--;
4576
            $entry = $this->a_formatting[$a];
4577
4578
            /* 6. If entry is neither a marker nor an element that is also in
4579
            thetack of open elements, go to step 4. */
4580
            if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
4581
                break;
4582
            }
4583
        }
4584
4585
        while (true) {
4586
            /* 7. Let entry be the element one later than entry in the list of
4587
            active formatting elements. */
4588
            if (isset($step_seven) && $step_seven === true) {
4589
                $a++;
4590
                $entry = $this->a_formatting[$a];
4591
            }
4592
4593
            /* 8. Perform a shallow clone of the element entry to obtain clone. */
4594
            $clone = $entry->cloneNode();
4595
4596
            /* 9. Append clone to the current node and push it onto the stack
4597
            of open elements  so that it is the new current node. */
4598
            end($this->stack)->appendChild($clone);
4599
            $this->stack[] = $clone;
4600
4601
            /* 10. Replace the entry for entry in the list with an entry for
4602
            clone. */
4603
            $this->a_formatting[$a] = $clone;
4604
4605
            /* 11. If the entry for clone in the list of active formatting
4606
            elements is not the last entry in the list, return to step 7. */
4607
            if (end($this->a_formatting) !== $clone) {
4608
                $step_seven = true;
4609
            } else {
4610
                break;
4611
            }
4612
        }
4613
        return null;
4614
    }
4615
4616
    private function clearTheActiveFormattingElementsUpToTheLastMarker()
4617
    {
4618
        /* When the steps below require the UA to clear the list of active
4619
        formatting elements up to the last marker, the UA must perform the
4620
        following steps: */
4621
4622
        while (true) {
4623
            /* 1. Let entry be the last (most recently added) entry in the list
4624
            of active formatting elements. */
4625
            $entry = end($this->a_formatting);
4626
4627
            /* 2. Remove entry from the list of active formatting elements. */
4628
            array_pop($this->a_formatting);
4629
4630
            /* 3. If entry was a marker, then stop the algorithm at this point.
4631
            The list has been cleared up to the last marker. */
4632
            if ($entry === self::MARKER) {
4633
                break;
4634
            }
4635
        }
4636
    }
4637
4638
    private function generateImpliedEndTags($exclude = array())
4639
    {
4640
        /* When the steps below require the UA to generate implied end tags,
4641
        then, if the current node is a dd element, a dt element, an li element,
4642
        a p element, a td element, a th  element, or a tr element, the UA must
4643
        act as if an end tag with the respective tag name had been seen and
4644
        then generate implied end tags again. */
4645
        $node = end($this->stack);
0 ignored issues
show
Unused Code introduced by
$node is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
4646
        $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
4647
4648
        while (in_array(end($this->stack)->nodeName, $elements)) {
4649
            array_pop($this->stack);
4650
        }
4651
    }
4652
4653
    private function getElementCategory($node)
4654
    {
4655
        $name = $node->tagName;
4656
        if (in_array($name, $this->special)) {
4657
            return self::SPECIAL;
4658
        } elseif (in_array($name, $this->scoping)) {
4659
            return self::SCOPING;
4660
        } elseif (in_array($name, $this->formatting)) {
4661
            return self::FORMATTING;
4662
        } else {
4663
            return self::PHRASING;
4664
        }
4665
    }
4666
4667
    private function clearStackToTableContext($elements)
4668
    {
4669
        /* When the steps above require the UA to clear the stack back to a
4670
        table context, it means that the UA must, while the current node is not
4671
        a table element or an html element, pop elements from the stack of open
4672
        elements. If this causes any elements to be popped from the stack, then
4673
        this is a parse error. */
4674
        while (true) {
4675
            $node = end($this->stack)->nodeName;
4676
4677
            if (in_array($node, $elements)) {
4678
                break;
4679
            } else {
4680
                array_pop($this->stack);
4681
            }
4682
        }
4683
    }
4684
4685
    private function resetInsertionMode()
4686
    {
4687
        /* 1. Let last be false. */
4688
        $last = false;
4689
        $leng = count($this->stack);
4690
4691
        for ($n = $leng - 1; $n >= 0; $n--) {
4692
            /* 2. Let node be the last node in the stack of open elements. */
4693
            $node = $this->stack[$n];
4694
4695
            /* 3. If node is the first node in the stack of open elements, then
4696
            set last to true. If the element whose innerHTML  attribute is being
4697
            set is neither a td  element nor a th element, then set node to the
4698
            element whose innerHTML  attribute is being set. (innerHTML  case) */
4699
            if ($this->stack[0]->isSameNode($node)) {
4700
                $last = true;
4701
            }
4702
4703
            /* 4. If node is a select element, then switch the insertion mode to
4704
            "in select" and abort these steps. (innerHTML case) */
4705
            if ($node->nodeName === 'select') {
4706
                $this->mode = self::IN_SELECT;
4707
                break;
4708
4709
                /* 5. If node is a td or th element, then switch the insertion mode
4710
                to "in cell" and abort these steps. */
4711
            } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') {
4712
                $this->mode = self::IN_CELL;
4713
                break;
4714
4715
                /* 6. If node is a tr element, then switch the insertion mode to
4716
                "in    row" and abort these steps. */
4717
            } elseif ($node->nodeName === 'tr') {
4718
                $this->mode = self::IN_ROW;
4719
                break;
4720
4721
                /* 7. If node is a tbody, thead, or tfoot element, then switch the
4722
                insertion mode to "in table body" and abort these steps. */
4723
            } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
4724
                $this->mode = self::IN_TBODY;
4725
                break;
4726
4727
                /* 8. If node is a caption element, then switch the insertion mode
4728
                to "in caption" and abort these steps. */
4729
            } elseif ($node->nodeName === 'caption') {
4730
                $this->mode = self::IN_CAPTION;
4731
                break;
4732
4733
                /* 9. If node is a colgroup element, then switch the insertion mode
4734
                to "in column group" and abort these steps. (innerHTML case) */
4735
            } elseif ($node->nodeName === 'colgroup') {
4736
                $this->mode = self::IN_CGROUP;
4737
                break;
4738
4739
                /* 10. If node is a table element, then switch the insertion mode
4740
                to "in table" and abort these steps. */
4741
            } elseif ($node->nodeName === 'table') {
4742
                $this->mode = self::IN_TABLE;
4743
                break;
4744
4745
                /* 11. If node is a head element, then switch the insertion mode
4746
                to "in body" ("in body"! not "in head"!) and abort these steps.
4747
                (innerHTML case) */
4748
            } elseif ($node->nodeName === 'head') {
4749
                $this->mode = self::IN_BODY;
4750
                break;
4751
4752
                /* 12. If node is a body element, then switch the insertion mode to
4753
                "in body" and abort these steps. */
4754
            } elseif ($node->nodeName === 'body') {
4755
                $this->mode = self::IN_BODY;
4756
                break;
4757
4758
                /* 13. If node is a frameset element, then switch the insertion
4759
                mode to "in frameset" and abort these steps. (innerHTML case) */
4760
            } elseif ($node->nodeName === 'frameset') {
4761
                $this->mode = self::IN_FRAME;
4762
                break;
4763
4764
                /* 14. If node is an html element, then: if the head element
4765
                pointer is null, switch the insertion mode to "before head",
4766
                otherwise, switch the insertion mode to "after head". In either
4767
                case, abort these steps. (innerHTML case) */
4768
            } elseif ($node->nodeName === 'html') {
4769
                $this->mode = ($this->head_pointer === null)
4770
                    ? self::BEFOR_HEAD
4771
                    : self::AFTER_HEAD;
4772
4773
                break;
4774
4775
                /* 15. If last is true, then set the insertion mode to "in body"
4776
                and    abort these steps. (innerHTML case) */
4777
            } elseif ($last) {
4778
                $this->mode = self::IN_BODY;
4779
                break;
4780
            }
4781
        }
4782
    }
4783
4784
    private function closeCell()
4785
    {
4786
        /* If the stack of open elements has a td or th element in table scope,
4787
        then act as if an end tag token with that tag name had been seen. */
4788
        foreach (array('td', 'th') as $cell) {
4789
            if ($this->elementInScope($cell, true)) {
4790
                $this->inCell(
4791
                    array(
4792
                        'name' => $cell,
4793
                        'type' => HTML5::ENDTAG
4794
                    )
4795
                );
4796
4797
                break;
4798
            }
4799
        }
4800
    }
4801
4802
    public function save()
4803
    {
4804
        return $this->dom;
4805
    }
4806
}
4807