HTML5TreeConstructer::inBody()   F
last analyzed

Complexity

Conditions 232
Paths 347

Size

Total Lines 1139
Code Lines 508

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 54056
Metric Value
cc 232
eloc 508
nc 347
nop 1
dl 0
loc 1139
ccs 0
cts 779
cp 0
crap 54056
rs 3.3333

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.
5
 * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.
6
 *
7
 * @note
8
 *    Recent changes to PHP's DOM extension have resulted in some fatal
9
 *    error conditions with the original version of PH5P. Pending changes,
10
 *    this lexer will punt to DirectLex if DOM throws an exception.
11
 */
12
13
class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex
14
{
15
    /**
16
     * @param string $html
17
     * @param HTMLPurifier_Config $config
18
     * @param HTMLPurifier_Context $context
19
     * @return HTMLPurifier_Token[]
20
     */
21
    public function tokenizeHTML($html, $config, $context)
22
    {
23
        $new_html = $this->normalize($html, $config, $context);
24
        $new_html = $this->wrapHTML($new_html, $config, $context);
25
        try {
26
            $parser = new HTML5($new_html);
27
            $doc = $parser->save();
28
        } catch (DOMException $e) {
29
            // Uh oh, it failed. Punt to DirectLex.
30
            $lexer = new HTMLPurifier_Lexer_DirectLex();
31
            $context->register('PH5PError', $e); // save the error, so we can detect it
32
            return $lexer->tokenizeHTML($html, $config, $context); // use original HTML
33
        }
34
        $tokens = array();
35
        $this->tokenizeDOM(
36
            $doc->getElementsByTagName('html')->item(0)-> // <html>
37
                getElementsByTagName('body')->item(0)-> //   <body>
38
                getElementsByTagName('div')->item(0) //     <div>
39
            ,
40
            $tokens
41
        );
42
        return $tokens;
43
    }
44
}
45
46
/*
47
48
Copyright 2007 Jeroen van der Meer <http://jero.net/>
49
50
Permission is hereby granted, free of charge, to any person obtaining a
51
copy of this software and associated documentation files (the
52
"Software"), to deal in the Software without restriction, including
53
without limitation the rights to use, copy, modify, merge, publish,
54
distribute, sublicense, and/or sell copies of the Software, and to
55
permit persons to whom the Software is furnished to do so, subject to
56
the following conditions:
57
58
The above copyright notice and this permission notice shall be included
59
in all copies or substantial portions of the Software.
60
61
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
62
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
63
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
64
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
65
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
66
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
67
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
68
69
*/
70
71
class HTML5
0 ignored issues
show
Coding Style Compatibility introduced by
PSR1 recommends that each class should be in its own file to aid autoloaders.

Having each class in a dedicated file usually plays nice with PSR autoloaders and is therefore a well established practice. If you use other autoloaders, you might not want to follow this rule.

Loading history...
72
{
73
    private $data;
74
    private $char;
75
    private $EOF;
76
    private $state;
77
    private $tree;
78
    private $token;
79
    private $content_model;
80
    private $escape = false;
81
    private $entities = array(
82
        'AElig;',
83
        'AElig',
84
        'AMP;',
85
        'AMP',
86
        'Aacute;',
87
        'Aacute',
88
        'Acirc;',
89
        'Acirc',
90
        'Agrave;',
91
        'Agrave',
92
        'Alpha;',
93
        'Aring;',
94
        'Aring',
95
        'Atilde;',
96
        'Atilde',
97
        'Auml;',
98
        'Auml',
99
        'Beta;',
100
        'COPY;',
101
        'COPY',
102
        'Ccedil;',
103
        'Ccedil',
104
        'Chi;',
105
        'Dagger;',
106
        'Delta;',
107
        'ETH;',
108
        'ETH',
109
        'Eacute;',
110
        'Eacute',
111
        'Ecirc;',
112
        'Ecirc',
113
        'Egrave;',
114
        'Egrave',
115
        'Epsilon;',
116
        'Eta;',
117
        'Euml;',
118
        'Euml',
119
        'GT;',
120
        'GT',
121
        'Gamma;',
122
        'Iacute;',
123
        'Iacute',
124
        'Icirc;',
125
        'Icirc',
126
        'Igrave;',
127
        'Igrave',
128
        'Iota;',
129
        'Iuml;',
130
        'Iuml',
131
        'Kappa;',
132
        'LT;',
133
        'LT',
134
        'Lambda;',
135
        'Mu;',
136
        'Ntilde;',
137
        'Ntilde',
138
        'Nu;',
139
        'OElig;',
140
        'Oacute;',
141
        'Oacute',
142
        'Ocirc;',
143
        'Ocirc',
144
        'Ograve;',
145
        'Ograve',
146
        'Omega;',
147
        'Omicron;',
148
        'Oslash;',
149
        'Oslash',
150
        'Otilde;',
151
        'Otilde',
152
        'Ouml;',
153
        'Ouml',
154
        'Phi;',
155
        'Pi;',
156
        'Prime;',
157
        'Psi;',
158
        'QUOT;',
159
        'QUOT',
160
        'REG;',
161
        'REG',
162
        'Rho;',
163
        'Scaron;',
164
        'Sigma;',
165
        'THORN;',
166
        'THORN',
167
        'TRADE;',
168
        'Tau;',
169
        'Theta;',
170
        'Uacute;',
171
        'Uacute',
172
        'Ucirc;',
173
        'Ucirc',
174
        'Ugrave;',
175
        'Ugrave',
176
        'Upsilon;',
177
        'Uuml;',
178
        'Uuml',
179
        'Xi;',
180
        'Yacute;',
181
        'Yacute',
182
        'Yuml;',
183
        'Zeta;',
184
        'aacute;',
185
        'aacute',
186
        'acirc;',
187
        'acirc',
188
        'acute;',
189
        'acute',
190
        'aelig;',
191
        'aelig',
192
        'agrave;',
193
        'agrave',
194
        'alefsym;',
195
        'alpha;',
196
        'amp;',
197
        'amp',
198
        'and;',
199
        'ang;',
200
        'apos;',
201
        'aring;',
202
        'aring',
203
        'asymp;',
204
        'atilde;',
205
        'atilde',
206
        'auml;',
207
        'auml',
208
        'bdquo;',
209
        'beta;',
210
        'brvbar;',
211
        'brvbar',
212
        'bull;',
213
        'cap;',
214
        'ccedil;',
215
        'ccedil',
216
        'cedil;',
217
        'cedil',
218
        'cent;',
219
        'cent',
220
        'chi;',
221
        'circ;',
222
        'clubs;',
223
        'cong;',
224
        'copy;',
225
        'copy',
226
        'crarr;',
227
        'cup;',
228
        'curren;',
229
        'curren',
230
        'dArr;',
231
        'dagger;',
232
        'darr;',
233
        'deg;',
234
        'deg',
235
        'delta;',
236
        'diams;',
237
        'divide;',
238
        'divide',
239
        'eacute;',
240
        'eacute',
241
        'ecirc;',
242
        'ecirc',
243
        'egrave;',
244
        'egrave',
245
        'empty;',
246
        'emsp;',
247
        'ensp;',
248
        'epsilon;',
249
        'equiv;',
250
        'eta;',
251
        'eth;',
252
        'eth',
253
        'euml;',
254
        'euml',
255
        'euro;',
256
        'exist;',
257
        'fnof;',
258
        'forall;',
259
        'frac12;',
260
        'frac12',
261
        'frac14;',
262
        'frac14',
263
        'frac34;',
264
        'frac34',
265
        'frasl;',
266
        'gamma;',
267
        'ge;',
268
        'gt;',
269
        'gt',
270
        'hArr;',
271
        'harr;',
272
        'hearts;',
273
        'hellip;',
274
        'iacute;',
275
        'iacute',
276
        'icirc;',
277
        'icirc',
278
        'iexcl;',
279
        'iexcl',
280
        'igrave;',
281
        'igrave',
282
        'image;',
283
        'infin;',
284
        'int;',
285
        'iota;',
286
        'iquest;',
287
        'iquest',
288
        'isin;',
289
        'iuml;',
290
        'iuml',
291
        'kappa;',
292
        'lArr;',
293
        'lambda;',
294
        'lang;',
295
        'laquo;',
296
        'laquo',
297
        'larr;',
298
        'lceil;',
299
        'ldquo;',
300
        'le;',
301
        'lfloor;',
302
        'lowast;',
303
        'loz;',
304
        'lrm;',
305
        'lsaquo;',
306
        'lsquo;',
307
        'lt;',
308
        'lt',
309
        'macr;',
310
        'macr',
311
        'mdash;',
312
        'micro;',
313
        'micro',
314
        'middot;',
315
        'middot',
316
        'minus;',
317
        'mu;',
318
        'nabla;',
319
        'nbsp;',
320
        'nbsp',
321
        'ndash;',
322
        'ne;',
323
        'ni;',
324
        'not;',
325
        'not',
326
        'notin;',
327
        'nsub;',
328
        'ntilde;',
329
        'ntilde',
330
        'nu;',
331
        'oacute;',
332
        'oacute',
333
        'ocirc;',
334
        'ocirc',
335
        'oelig;',
336
        'ograve;',
337
        'ograve',
338
        'oline;',
339
        'omega;',
340
        'omicron;',
341
        'oplus;',
342
        'or;',
343
        'ordf;',
344
        'ordf',
345
        'ordm;',
346
        'ordm',
347
        'oslash;',
348
        'oslash',
349
        'otilde;',
350
        'otilde',
351
        'otimes;',
352
        'ouml;',
353
        'ouml',
354
        'para;',
355
        'para',
356
        'part;',
357
        'permil;',
358
        'perp;',
359
        'phi;',
360
        'pi;',
361
        'piv;',
362
        'plusmn;',
363
        'plusmn',
364
        'pound;',
365
        'pound',
366
        'prime;',
367
        'prod;',
368
        'prop;',
369
        'psi;',
370
        'quot;',
371
        'quot',
372
        'rArr;',
373
        'radic;',
374
        'rang;',
375
        'raquo;',
376
        'raquo',
377
        'rarr;',
378
        'rceil;',
379
        'rdquo;',
380
        'real;',
381
        'reg;',
382
        'reg',
383
        'rfloor;',
384
        'rho;',
385
        'rlm;',
386
        'rsaquo;',
387
        'rsquo;',
388
        'sbquo;',
389
        'scaron;',
390
        'sdot;',
391
        'sect;',
392
        'sect',
393
        'shy;',
394
        'shy',
395
        'sigma;',
396
        'sigmaf;',
397
        'sim;',
398
        'spades;',
399
        'sub;',
400
        'sube;',
401
        'sum;',
402
        'sup1;',
403
        'sup1',
404
        'sup2;',
405
        'sup2',
406
        'sup3;',
407
        'sup3',
408
        'sup;',
409
        'supe;',
410
        'szlig;',
411
        'szlig',
412
        'tau;',
413
        'there4;',
414
        'theta;',
415
        'thetasym;',
416
        'thinsp;',
417
        'thorn;',
418
        'thorn',
419
        'tilde;',
420
        'times;',
421
        'times',
422
        'trade;',
423
        'uArr;',
424
        'uacute;',
425
        'uacute',
426
        'uarr;',
427
        'ucirc;',
428
        'ucirc',
429
        'ugrave;',
430
        'ugrave',
431
        'uml;',
432
        'uml',
433
        'upsih;',
434
        'upsilon;',
435
        'uuml;',
436
        'uuml',
437
        'weierp;',
438
        'xi;',
439
        'yacute;',
440
        'yacute',
441
        'yen;',
442
        'yen',
443
        'yuml;',
444
        'yuml',
445
        'zeta;',
446
        'zwj;',
447
        'zwnj;'
448
    );
449
450
    const PCDATA = 0;
451
    const RCDATA = 1;
452
    const CDATA = 2;
453
    const PLAINTEXT = 3;
454
455
    const DOCTYPE = 0;
456
    const STARTTAG = 1;
457
    const ENDTAG = 2;
458
    const COMMENT = 3;
459
    const CHARACTR = 4;
460
    const EOF = 5;
461
462
    public function __construct($data)
463
    {
464
        $this->data = $data;
465
        $this->char = -1;
466
        $this->EOF = strlen($data);
467
        $this->tree = new HTML5TreeConstructer;
468
        $this->content_model = self::PCDATA;
469
470
        $this->state = 'data';
471
472
        while ($this->state !== null) {
473
            $this->{$this->state . 'State'}();
474
        }
475
    }
476
477
    public function save()
478
    {
479
        return $this->tree->save();
480
    }
481
482
    private function char()
483
    {
484
        return ($this->char < $this->EOF)
485
            ? $this->data[$this->char]
486
            : false;
487
    }
488
489
    private function character($s, $l = 0)
490
    {
491
        if ($s + $l < $this->EOF) {
492
            if ($l === 0) {
493
                return $this->data[$s];
494
            } else {
495
                return substr($this->data, $s, $l);
496
            }
497
        }
498
    }
499
500
    private function characters($char_class, $start)
501
    {
502
        return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));
503
    }
504
505
    private function dataState()
506
    {
507
        // Consume the next input character
508
        $this->char++;
509
        $char = $this->char();
510
511
        if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {
512
            /* U+0026 AMPERSAND (&)
513
            When the content model flag is set to one of the PCDATA or RCDATA
514
            states: switch to the entity data state. Otherwise: treat it as per
515
            the "anything else"    entry below. */
516
            $this->state = 'entityData';
517
518
        } elseif ($char === '-') {
519
            /* If the content model flag is set to either the RCDATA state or
520
            the CDATA state, and the escape flag is false, and there are at
521
            least three characters before this one in the input stream, and the
522
            last four characters in the input stream, including this one, are
523
            U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,
524
            and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */
525
            if (($this->content_model === self::RCDATA || $this->content_model ===
526
                    self::CDATA) && $this->escape === false &&
527
                $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'
528
            ) {
529
                $this->escape = true;
530
            }
531
532
            /* In any case, emit the input character as a character token. Stay
533
            in the data state. */
534
            $this->emitToken(
535
                array(
536
                    'type' => self::CHARACTR,
537
                    'data' => $char
538
                )
539
            );
540
541
            /* U+003C LESS-THAN SIGN (<) */
542
        } elseif ($char === '<' && ($this->content_model === self::PCDATA ||
543
                (($this->content_model === self::RCDATA ||
544
                        $this->content_model === self::CDATA) && $this->escape === false))
545
        ) {
546
            /* When the content model flag is set to the PCDATA state: switch
547
            to the tag open state.
548
549
            When the content model flag is set to either the RCDATA state or
550
            the CDATA state and the escape flag is false: switch to the tag
551
            open state.
552
553
            Otherwise: treat it as per the "anything else" entry below. */
554
            $this->state = 'tagOpen';
555
556
            /* U+003E GREATER-THAN SIGN (>) */
557
        } elseif ($char === '>') {
558
            /* If the content model flag is set to either the RCDATA state or
559
            the CDATA state, and the escape flag is true, and the last three
560
            characters in the input stream including this one are U+002D
561
            HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),
562
            set the escape flag to false. */
563
            if (($this->content_model === self::RCDATA ||
564
                    $this->content_model === self::CDATA) && $this->escape === true &&
565
                $this->character($this->char, 3) === '-->'
566
            ) {
567
                $this->escape = false;
568
            }
569
570
            /* In any case, emit the input character as a character token.
571
            Stay in the data state. */
572
            $this->emitToken(
573
                array(
574
                    'type' => self::CHARACTR,
575
                    'data' => $char
576
                )
577
            );
578
579
        } elseif ($this->char === $this->EOF) {
580
            /* EOF
581
            Emit an end-of-file token. */
582
            $this->EOF();
583
584
        } elseif ($this->content_model === self::PLAINTEXT) {
585
            /* When the content model flag is set to the PLAINTEXT state
586
            THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of
587
            the text and emit it as a character token. */
588
            $this->emitToken(
589
                array(
590
                    'type' => self::CHARACTR,
591
                    'data' => substr($this->data, $this->char)
592
                )
593
            );
594
595
            $this->EOF();
596
597
        } else {
598
            /* Anything else
599
            THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that
600
            otherwise would also be treated as a character token and emit it
601
            as a single character token. Stay in the data state. */
602
            $len = strcspn($this->data, '<&', $this->char);
603
            $char = substr($this->data, $this->char, $len);
604
            $this->char += $len - 1;
605
606
            $this->emitToken(
607
                array(
608
                    'type' => self::CHARACTR,
609
                    'data' => $char
610
                )
611
            );
612
613
            $this->state = 'data';
614
        }
615
    }
616
617
    private function entityDataState()
618
    {
619
        // Attempt to consume an entity.
620
        $entity = $this->entity();
621
622
        // If nothing is returned, emit a U+0026 AMPERSAND character token.
623
        // Otherwise, emit the character token that was returned.
624
        $char = (!$entity) ? '&' : $entity;
625
        $this->emitToken(
626
            array(
627
                'type' => self::CHARACTR,
628
                'data' => $char
629
            )
630
        );
631
632
        // Finally, switch to the data state.
633
        $this->state = 'data';
634
    }
635
636
    private function tagOpenState()
637
    {
638
        switch ($this->content_model) {
639
            case self::RCDATA:
640
            case self::CDATA:
641
                /* If the next input character is a U+002F SOLIDUS (/) character,
642
                consume it and switch to the close tag open state. If the next
643
                input character is not a U+002F SOLIDUS (/) character, emit a
644
                U+003C LESS-THAN SIGN character token and switch to the data
645
                state to process the next input character. */
646
                if ($this->character($this->char + 1) === '/') {
647
                    $this->char++;
648
                    $this->state = 'closeTagOpen';
649
650
                } else {
651
                    $this->emitToken(
652
                        array(
653
                            'type' => self::CHARACTR,
654
                            'data' => '<'
655
                        )
656
                    );
657
658
                    $this->state = 'data';
659
                }
660
                break;
661
662
            case self::PCDATA:
663
                // If the content model flag is set to the PCDATA state
664
                // Consume the next input character:
665
                $this->char++;
666
                $char = $this->char();
667
668
                if ($char === '!') {
669
                    /* U+0021 EXCLAMATION MARK (!)
670
                    Switch to the markup declaration open state. */
671
                    $this->state = 'markupDeclarationOpen';
672
673
                } elseif ($char === '/') {
674
                    /* U+002F SOLIDUS (/)
675
                    Switch to the close tag open state. */
676
                    $this->state = 'closeTagOpen';
677
678
                } elseif (preg_match('/^[A-Za-z]$/', $char)) {
679
                    /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
680
                    Create a new start tag token, set its tag name to the lowercase
681
                    version of the input character (add 0x0020 to the character's code
682
                    point), then switch to the tag name state. (Don't emit the token
683
                    yet; further details will be filled in before it is emitted.) */
684
                    $this->token = array(
685
                        'name' => strtolower($char),
686
                        'type' => self::STARTTAG,
687
                        'attr' => array()
688
                    );
689
690
                    $this->state = 'tagName';
691
692
                } elseif ($char === '>') {
693
                    /* U+003E GREATER-THAN SIGN (>)
694
                    Parse error. Emit a U+003C LESS-THAN SIGN character token and a
695
                    U+003E GREATER-THAN SIGN character token. Switch to the data state. */
696
                    $this->emitToken(
697
                        array(
698
                            'type' => self::CHARACTR,
699
                            'data' => '<>'
700
                        )
701
                    );
702
703
                    $this->state = 'data';
704
705
                } elseif ($char === '?') {
706
                    /* U+003F QUESTION MARK (?)
707
                    Parse error. Switch to the bogus comment state. */
708
                    $this->state = 'bogusComment';
709
710
                } else {
711
                    /* Anything else
712
                    Parse error. Emit a U+003C LESS-THAN SIGN character token and
713
                    reconsume the current input character in the data state. */
714
                    $this->emitToken(
715
                        array(
716
                            'type' => self::CHARACTR,
717
                            'data' => '<'
718
                        )
719
                    );
720
721
                    $this->char--;
722
                    $this->state = 'data';
723
                }
724
                break;
725
        }
726
    }
727
728
    private function closeTagOpenState()
729
    {
730
        $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));
731
        $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;
732
733
        if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&
734
            (!$the_same || ($the_same && (!preg_match(
735
                            '/[\t\n\x0b\x0c >\/]/',
736
                            $this->character($this->char + 1 + strlen($next_node))
737
                        ) || $this->EOF === $this->char)))
738
        ) {
739
            /* If the content model flag is set to the RCDATA or CDATA states then
740
            examine the next few characters. If they do not match the tag name of
741
            the last start tag token emitted (case insensitively), or if they do but
742
            they are not immediately followed by one of the following characters:
743
                * U+0009 CHARACTER TABULATION
744
                * U+000A LINE FEED (LF)
745
                * U+000B LINE TABULATION
746
                * U+000C FORM FEED (FF)
747
                * U+0020 SPACE
748
                * U+003E GREATER-THAN SIGN (>)
749
                * U+002F SOLIDUS (/)
750
                * EOF
751
            ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character
752
            token, a U+002F SOLIDUS character token, and switch to the data state
753
            to process the next input character. */
754
            $this->emitToken(
755
                array(
756
                    'type' => self::CHARACTR,
757
                    'data' => '</'
758
                )
759
            );
760
761
            $this->state = 'data';
762
763
        } else {
764
            /* Otherwise, if the content model flag is set to the PCDATA state,
765
            or if the next few characters do match that tag name, consume the
766
            next input character: */
767
            $this->char++;
768
            $char = $this->char();
769
770
            if (preg_match('/^[A-Za-z]$/', $char)) {
771
                /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z
772
                Create a new end tag token, set its tag name to the lowercase version
773
                of the input character (add 0x0020 to the character's code point), then
774
                switch to the tag name state. (Don't emit the token yet; further details
775
                will be filled in before it is emitted.) */
776
                $this->token = array(
777
                    'name' => strtolower($char),
778
                    'type' => self::ENDTAG
779
                );
780
781
                $this->state = 'tagName';
782
783
            } elseif ($char === '>') {
784
                /* U+003E GREATER-THAN SIGN (>)
785
                Parse error. Switch to the data state. */
786
                $this->state = 'data';
787
788
            } elseif ($this->char === $this->EOF) {
789
                /* EOF
790
                Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F
791
                SOLIDUS character token. Reconsume the EOF character in the data state. */
792
                $this->emitToken(
793
                    array(
794
                        'type' => self::CHARACTR,
795
                        'data' => '</'
796
                    )
797
                );
798
799
                $this->char--;
800
                $this->state = 'data';
801
802
            } else {
803
                /* Parse error. Switch to the bogus comment state. */
804
                $this->state = 'bogusComment';
805
            }
806
        }
807
    }
808
809
    private function tagNameState()
810
    {
811
        // Consume the next input character:
812
        $this->char++;
813
        $char = $this->character($this->char);
814
815
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
816
            /* U+0009 CHARACTER TABULATION
817
            U+000A LINE FEED (LF)
818
            U+000B LINE TABULATION
819
            U+000C FORM FEED (FF)
820
            U+0020 SPACE
821
            Switch to the before attribute name state. */
822
            $this->state = 'beforeAttributeName';
823
824
        } elseif ($char === '>') {
825
            /* U+003E GREATER-THAN SIGN (>)
826
            Emit the current tag token. Switch to the data state. */
827
            $this->emitToken($this->token);
828
            $this->state = 'data';
829
830
        } elseif ($this->char === $this->EOF) {
831
            /* EOF
832
            Parse error. Emit the current tag token. Reconsume the EOF
833
            character in the data state. */
834
            $this->emitToken($this->token);
835
836
            $this->char--;
837
            $this->state = 'data';
838
839
        } elseif ($char === '/') {
840
            /* U+002F SOLIDUS (/)
841
            Parse error unless this is a permitted slash. Switch to the before
842
            attribute name state. */
843
            $this->state = 'beforeAttributeName';
844
845
        } else {
846
            /* Anything else
847
            Append the current input character to the current tag token's tag name.
848
            Stay in the tag name state. */
849
            $this->token['name'] .= strtolower($char);
850
            $this->state = 'tagName';
851
        }
852
    }
853
854
    private function beforeAttributeNameState()
855
    {
856
        // Consume the next input character:
857
        $this->char++;
858
        $char = $this->character($this->char);
859
860
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
861
            /* U+0009 CHARACTER TABULATION
862
            U+000A LINE FEED (LF)
863
            U+000B LINE TABULATION
864
            U+000C FORM FEED (FF)
865
            U+0020 SPACE
866
            Stay in the before attribute name state. */
867
            $this->state = 'beforeAttributeName';
868
869
        } elseif ($char === '>') {
870
            /* U+003E GREATER-THAN SIGN (>)
871
            Emit the current tag token. Switch to the data state. */
872
            $this->emitToken($this->token);
873
            $this->state = 'data';
874
875
        } elseif ($char === '/') {
876
            /* U+002F SOLIDUS (/)
877
            Parse error unless this is a permitted slash. Stay in the before
878
            attribute name state. */
879
            $this->state = 'beforeAttributeName';
880
881
        } elseif ($this->char === $this->EOF) {
882
            /* EOF
883
            Parse error. Emit the current tag token. Reconsume the EOF
884
            character in the data state. */
885
            $this->emitToken($this->token);
886
887
            $this->char--;
888
            $this->state = 'data';
889
890
        } else {
891
            /* Anything else
892
            Start a new attribute in the current tag token. Set that attribute's
893
            name to the current input character, and its value to the empty string.
894
            Switch to the attribute name state. */
895
            $this->token['attr'][] = array(
896
                'name' => strtolower($char),
897
                'value' => null
898
            );
899
900
            $this->state = 'attributeName';
901
        }
902
    }
903
904
    private function attributeNameState()
905
    {
906
        // Consume the next input character:
907
        $this->char++;
908
        $char = $this->character($this->char);
909
910
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
911
            /* U+0009 CHARACTER TABULATION
912
            U+000A LINE FEED (LF)
913
            U+000B LINE TABULATION
914
            U+000C FORM FEED (FF)
915
            U+0020 SPACE
916
            Stay in the before attribute name state. */
917
            $this->state = 'afterAttributeName';
918
919
        } elseif ($char === '=') {
920
            /* U+003D EQUALS SIGN (=)
921
            Switch to the before attribute value state. */
922
            $this->state = 'beforeAttributeValue';
923
924
        } elseif ($char === '>') {
925
            /* U+003E GREATER-THAN SIGN (>)
926
            Emit the current tag token. Switch to the data state. */
927
            $this->emitToken($this->token);
928
            $this->state = 'data';
929
930
        } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
931
            /* U+002F SOLIDUS (/)
932
            Parse error unless this is a permitted slash. Switch to the before
933
            attribute name state. */
934
            $this->state = 'beforeAttributeName';
935
936
        } elseif ($this->char === $this->EOF) {
937
            /* EOF
938
            Parse error. Emit the current tag token. Reconsume the EOF
939
            character in the data state. */
940
            $this->emitToken($this->token);
941
942
            $this->char--;
943
            $this->state = 'data';
944
945
        } else {
946
            /* Anything else
947
            Append the current input character to the current attribute's name.
948
            Stay in the attribute name state. */
949
            $last = count($this->token['attr']) - 1;
950
            $this->token['attr'][$last]['name'] .= strtolower($char);
951
952
            $this->state = 'attributeName';
953
        }
954
    }
955
956
    private function afterAttributeNameState()
957
    {
958
        // Consume the next input character:
959
        $this->char++;
960
        $char = $this->character($this->char);
961
962
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
963
            /* U+0009 CHARACTER TABULATION
964
            U+000A LINE FEED (LF)
965
            U+000B LINE TABULATION
966
            U+000C FORM FEED (FF)
967
            U+0020 SPACE
968
            Stay in the after attribute name state. */
969
            $this->state = 'afterAttributeName';
970
971
        } elseif ($char === '=') {
972
            /* U+003D EQUALS SIGN (=)
973
            Switch to the before attribute value state. */
974
            $this->state = 'beforeAttributeValue';
975
976
        } elseif ($char === '>') {
977
            /* U+003E GREATER-THAN SIGN (>)
978
            Emit the current tag token. Switch to the data state. */
979
            $this->emitToken($this->token);
980
            $this->state = 'data';
981
982
        } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {
983
            /* U+002F SOLIDUS (/)
984
            Parse error unless this is a permitted slash. Switch to the
985
            before attribute name state. */
986
            $this->state = 'beforeAttributeName';
987
988
        } elseif ($this->char === $this->EOF) {
989
            /* EOF
990
            Parse error. Emit the current tag token. Reconsume the EOF
991
            character in the data state. */
992
            $this->emitToken($this->token);
993
994
            $this->char--;
995
            $this->state = 'data';
996
997
        } else {
998
            /* Anything else
999
            Start a new attribute in the current tag token. Set that attribute's
1000
            name to the current input character, and its value to the empty string.
1001
            Switch to the attribute name state. */
1002
            $this->token['attr'][] = array(
1003
                'name' => strtolower($char),
1004
                'value' => null
1005
            );
1006
1007
            $this->state = 'attributeName';
1008
        }
1009
    }
1010
1011
    private function beforeAttributeValueState()
1012
    {
1013
        // Consume the next input character:
1014
        $this->char++;
1015
        $char = $this->character($this->char);
1016
1017
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1018
            /* U+0009 CHARACTER TABULATION
1019
            U+000A LINE FEED (LF)
1020
            U+000B LINE TABULATION
1021
            U+000C FORM FEED (FF)
1022
            U+0020 SPACE
1023
            Stay in the before attribute value state. */
1024
            $this->state = 'beforeAttributeValue';
1025
1026
        } elseif ($char === '"') {
1027
            /* U+0022 QUOTATION MARK (")
1028
            Switch to the attribute value (double-quoted) state. */
1029
            $this->state = 'attributeValueDoubleQuoted';
1030
1031
        } elseif ($char === '&') {
1032
            /* U+0026 AMPERSAND (&)
1033
            Switch to the attribute value (unquoted) state and reconsume
1034
            this input character. */
1035
            $this->char--;
1036
            $this->state = 'attributeValueUnquoted';
1037
1038
        } elseif ($char === '\'') {
1039
            /* U+0027 APOSTROPHE (')
1040
            Switch to the attribute value (single-quoted) state. */
1041
            $this->state = 'attributeValueSingleQuoted';
1042
1043
        } elseif ($char === '>') {
1044
            /* U+003E GREATER-THAN SIGN (>)
1045
            Emit the current tag token. Switch to the data state. */
1046
            $this->emitToken($this->token);
1047
            $this->state = 'data';
1048
1049
        } else {
1050
            /* Anything else
1051
            Append the current input character to the current attribute's value.
1052
            Switch to the attribute value (unquoted) state. */
1053
            $last = count($this->token['attr']) - 1;
1054
            $this->token['attr'][$last]['value'] .= $char;
1055
1056
            $this->state = 'attributeValueUnquoted';
1057
        }
1058
    }
1059
1060
    private function attributeValueDoubleQuotedState()
1061
    {
1062
        // Consume the next input character:
1063
        $this->char++;
1064
        $char = $this->character($this->char);
1065
1066
        if ($char === '"') {
1067
            /* U+0022 QUOTATION MARK (")
1068
            Switch to the before attribute name state. */
1069
            $this->state = 'beforeAttributeName';
1070
1071
        } elseif ($char === '&') {
1072
            /* U+0026 AMPERSAND (&)
1073
            Switch to the entity in attribute value state. */
1074
            $this->entityInAttributeValueState('double');
0 ignored issues
show
Unused Code introduced by
The call to HTML5::entityInAttributeValueState() has too many arguments starting with 'double'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
1075
1076
        } elseif ($this->char === $this->EOF) {
1077
            /* EOF
1078
            Parse error. Emit the current tag token. Reconsume the character
1079
            in the data state. */
1080
            $this->emitToken($this->token);
1081
1082
            $this->char--;
1083
            $this->state = 'data';
1084
1085
        } else {
1086
            /* Anything else
1087
            Append the current input character to the current attribute's value.
1088
            Stay in the attribute value (double-quoted) state. */
1089
            $last = count($this->token['attr']) - 1;
1090
            $this->token['attr'][$last]['value'] .= $char;
1091
1092
            $this->state = 'attributeValueDoubleQuoted';
1093
        }
1094
    }
1095
1096
    private function attributeValueSingleQuotedState()
1097
    {
1098
        // Consume the next input character:
1099
        $this->char++;
1100
        $char = $this->character($this->char);
1101
1102
        if ($char === '\'') {
1103
            /* U+0022 QUOTATION MARK (')
1104
            Switch to the before attribute name state. */
1105
            $this->state = 'beforeAttributeName';
1106
1107
        } elseif ($char === '&') {
1108
            /* U+0026 AMPERSAND (&)
1109
            Switch to the entity in attribute value state. */
1110
            $this->entityInAttributeValueState('single');
0 ignored issues
show
Unused Code introduced by
The call to HTML5::entityInAttributeValueState() has too many arguments starting with 'single'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
1111
1112
        } elseif ($this->char === $this->EOF) {
1113
            /* EOF
1114
            Parse error. Emit the current tag token. Reconsume the character
1115
            in the data state. */
1116
            $this->emitToken($this->token);
1117
1118
            $this->char--;
1119
            $this->state = 'data';
1120
1121
        } else {
1122
            /* Anything else
1123
            Append the current input character to the current attribute's value.
1124
            Stay in the attribute value (single-quoted) state. */
1125
            $last = count($this->token['attr']) - 1;
1126
            $this->token['attr'][$last]['value'] .= $char;
1127
1128
            $this->state = 'attributeValueSingleQuoted';
1129
        }
1130
    }
1131
1132
    private function attributeValueUnquotedState()
1133
    {
1134
        // Consume the next input character:
1135
        $this->char++;
1136
        $char = $this->character($this->char);
1137
1138
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1139
            /* U+0009 CHARACTER TABULATION
1140
            U+000A LINE FEED (LF)
1141
            U+000B LINE TABULATION
1142
            U+000C FORM FEED (FF)
1143
            U+0020 SPACE
1144
            Switch to the before attribute name state. */
1145
            $this->state = 'beforeAttributeName';
1146
1147
        } elseif ($char === '&') {
1148
            /* U+0026 AMPERSAND (&)
1149
            Switch to the entity in attribute value state. */
1150
            $this->entityInAttributeValueState();
1151
1152
        } elseif ($char === '>') {
1153
            /* U+003E GREATER-THAN SIGN (>)
1154
            Emit the current tag token. Switch to the data state. */
1155
            $this->emitToken($this->token);
1156
            $this->state = 'data';
1157
1158
        } else {
1159
            /* Anything else
1160
            Append the current input character to the current attribute's value.
1161
            Stay in the attribute value (unquoted) state. */
1162
            $last = count($this->token['attr']) - 1;
1163
            $this->token['attr'][$last]['value'] .= $char;
1164
1165
            $this->state = 'attributeValueUnquoted';
1166
        }
1167
    }
1168
1169
    private function entityInAttributeValueState()
1170
    {
1171
        // Attempt to consume an entity.
1172
        $entity = $this->entity();
1173
1174
        // If nothing is returned, append a U+0026 AMPERSAND character to the
1175
        // current attribute's value. Otherwise, emit the character token that
1176
        // was returned.
1177
        $char = (!$entity)
1178
            ? '&'
1179
            : $entity;
1180
1181
        $last = count($this->token['attr']) - 1;
1182
        $this->token['attr'][$last]['value'] .= $char;
1183
    }
1184
1185
    private function bogusCommentState()
1186
    {
1187
        /* Consume every character up to the first U+003E GREATER-THAN SIGN
1188
        character (>) or the end of the file (EOF), whichever comes first. Emit
1189
        a comment token whose data is the concatenation of all the characters
1190
        starting from and including the character that caused the state machine
1191
        to switch into the bogus comment state, up to and including the last
1192
        consumed character before the U+003E character, if any, or up to the
1193
        end of the file otherwise. (If the comment was started by the end of
1194
        the file (EOF), the token is empty.) */
1195
        $data = $this->characters('^>', $this->char);
1196
        $this->emitToken(
1197
            array(
1198
                'data' => $data,
1199
                'type' => self::COMMENT
1200
            )
1201
        );
1202
1203
        $this->char += strlen($data);
1204
1205
        /* Switch to the data state. */
1206
        $this->state = 'data';
1207
1208
        /* If the end of the file was reached, reconsume the EOF character. */
1209
        if ($this->char === $this->EOF) {
1210
            $this->char = $this->EOF - 1;
1211
        }
1212
    }
1213
1214
    private function markupDeclarationOpenState()
1215
    {
1216
        /* If the next two characters are both U+002D HYPHEN-MINUS (-)
1217
        characters, consume those two characters, create a comment token whose
1218
        data is the empty string, and switch to the comment state. */
1219
        if ($this->character($this->char + 1, 2) === '--') {
1220
            $this->char += 2;
1221
            $this->state = 'comment';
1222
            $this->token = array(
1223
                'data' => null,
1224
                'type' => self::COMMENT
1225
            );
1226
1227
            /* Otherwise if the next seven chacacters are a case-insensitive match
1228
            for the word "DOCTYPE", then consume those characters and switch to the
1229
            DOCTYPE state. */
1230
        } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {
1231
            $this->char += 7;
1232
            $this->state = 'doctype';
1233
1234
            /* Otherwise, is is a parse error. Switch to the bogus comment state.
1235
            The next character that is consumed, if any, is the first character
1236
            that will be in the comment. */
1237
        } else {
1238
            $this->char++;
1239
            $this->state = 'bogusComment';
1240
        }
1241
    }
1242
1243
    private function commentState()
1244
    {
1245
        /* Consume the next input character: */
1246
        $this->char++;
1247
        $char = $this->char();
1248
1249
        /* U+002D HYPHEN-MINUS (-) */
1250
        if ($char === '-') {
1251
            /* Switch to the comment dash state  */
1252
            $this->state = 'commentDash';
1253
1254
            /* EOF */
1255
        } elseif ($this->char === $this->EOF) {
1256
            /* Parse error. Emit the comment token. Reconsume the EOF character
1257
            in the data state. */
1258
            $this->emitToken($this->token);
1259
            $this->char--;
1260
            $this->state = 'data';
1261
1262
            /* Anything else */
1263
        } else {
1264
            /* Append the input character to the comment token's data. Stay in
1265
            the comment state. */
1266
            $this->token['data'] .= $char;
1267
        }
1268
    }
1269
1270
    private function commentDashState()
1271
    {
1272
        /* Consume the next input character: */
1273
        $this->char++;
1274
        $char = $this->char();
1275
1276
        /* U+002D HYPHEN-MINUS (-) */
1277
        if ($char === '-') {
1278
            /* Switch to the comment end state  */
1279
            $this->state = 'commentEnd';
1280
1281
            /* EOF */
1282
        } elseif ($this->char === $this->EOF) {
1283
            /* Parse error. Emit the comment token. Reconsume the EOF character
1284
            in the data state. */
1285
            $this->emitToken($this->token);
1286
            $this->char--;
1287
            $this->state = 'data';
1288
1289
            /* Anything else */
1290
        } else {
1291
            /* Append a U+002D HYPHEN-MINUS (-) character and the input
1292
            character to the comment token's data. Switch to the comment state. */
1293
            $this->token['data'] .= '-' . $char;
1294
            $this->state = 'comment';
1295
        }
1296
    }
1297
1298
    private function commentEndState()
1299
    {
1300
        /* Consume the next input character: */
1301
        $this->char++;
1302
        $char = $this->char();
1303
1304
        if ($char === '>') {
1305
            $this->emitToken($this->token);
1306
            $this->state = 'data';
1307
1308
        } elseif ($char === '-') {
1309
            $this->token['data'] .= '-';
1310
1311
        } elseif ($this->char === $this->EOF) {
1312
            $this->emitToken($this->token);
1313
            $this->char--;
1314
            $this->state = 'data';
1315
1316
        } else {
1317
            $this->token['data'] .= '--' . $char;
1318
            $this->state = 'comment';
1319
        }
1320
    }
1321
1322
    private function doctypeState()
1323
    {
1324
        /* Consume the next input character: */
1325
        $this->char++;
1326
        $char = $this->char();
1327
1328
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1329
            $this->state = 'beforeDoctypeName';
1330
1331
        } else {
1332
            $this->char--;
1333
            $this->state = 'beforeDoctypeName';
1334
        }
1335
    }
1336
1337
    private function beforeDoctypeNameState()
1338
    {
1339
        /* Consume the next input character: */
1340
        $this->char++;
1341
        $char = $this->char();
1342
1343
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1344
            // Stay in the before DOCTYPE name state.
1345
1346
        } elseif (preg_match('/^[a-z]$/', $char)) {
1347
            $this->token = array(
1348
                'name' => strtoupper($char),
1349
                'type' => self::DOCTYPE,
1350
                'error' => true
1351
            );
1352
1353
            $this->state = 'doctypeName';
1354
1355
        } elseif ($char === '>') {
1356
            $this->emitToken(
1357
                array(
1358
                    'name' => null,
1359
                    'type' => self::DOCTYPE,
1360
                    'error' => true
1361
                )
1362
            );
1363
1364
            $this->state = 'data';
1365
1366
        } elseif ($this->char === $this->EOF) {
1367
            $this->emitToken(
1368
                array(
1369
                    'name' => null,
1370
                    'type' => self::DOCTYPE,
1371
                    'error' => true
1372
                )
1373
            );
1374
1375
            $this->char--;
1376
            $this->state = 'data';
1377
1378
        } else {
1379
            $this->token = array(
1380
                'name' => $char,
1381
                'type' => self::DOCTYPE,
1382
                'error' => true
1383
            );
1384
1385
            $this->state = 'doctypeName';
1386
        }
1387
    }
1388
1389
    private function doctypeNameState()
1390
    {
1391
        /* Consume the next input character: */
1392
        $this->char++;
1393
        $char = $this->char();
1394
1395
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
1396
            $this->state = 'AfterDoctypeName';
1397
1398
        } elseif ($char === '>') {
1399
            $this->emitToken($this->token);
1400
            $this->state = 'data';
1401
1402
        } elseif (preg_match('/^[a-z]$/', $char)) {
1403
            $this->token['name'] .= strtoupper($char);
1404
1405
        } elseif ($this->char === $this->EOF) {
1406
            $this->emitToken($this->token);
1407
            $this->char--;
1408
            $this->state = 'data';
1409
1410
        } else {
1411
            $this->token['name'] .= $char;
1412
        }
1413
1414
        $this->token['error'] = ($this->token['name'] === 'HTML')
1415
            ? false
1416
            : true;
1417
    }
1418
1419
    private function afterDoctypeNameState()
1420
    {
1421
        /* Consume the next input character: */
1422
        $this->char++;
1423
        $char = $this->char();
1424
1425
        if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1426
            // Stay in the DOCTYPE name state.
1427
1428
        } elseif ($char === '>') {
1429
            $this->emitToken($this->token);
1430
            $this->state = 'data';
1431
1432
        } elseif ($this->char === $this->EOF) {
1433
            $this->emitToken($this->token);
1434
            $this->char--;
1435
            $this->state = 'data';
1436
1437
        } else {
1438
            $this->token['error'] = true;
1439
            $this->state = 'bogusDoctype';
1440
        }
1441
    }
1442
1443
    private function bogusDoctypeState()
1444
    {
1445
        /* Consume the next input character: */
1446
        $this->char++;
1447
        $char = $this->char();
1448
1449
        if ($char === '>') {
1450
            $this->emitToken($this->token);
1451
            $this->state = 'data';
1452
1453
        } elseif ($this->char === $this->EOF) {
1454
            $this->emitToken($this->token);
1455
            $this->char--;
1456
            $this->state = 'data';
1457
1458
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
1459
            // Stay in the bogus DOCTYPE state.
1460
        }
1461
    }
1462
1463
    private function entity()
1464
    {
1465
        $start = $this->char;
1466
1467
        // This section defines how to consume an entity. This definition is
1468
        // used when parsing entities in text and in attributes.
1469
1470
        // The behaviour depends on the identity of the next character (the
1471
        // one immediately after the U+0026 AMPERSAND character):
1472
1473
        switch ($this->character($this->char + 1)) {
1474
            // U+0023 NUMBER SIGN (#)
1475
            case '#':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1476
1477
                // The behaviour further depends on the character after the
1478
                // U+0023 NUMBER SIGN:
1479
                switch ($this->character($this->char + 1)) {
1480
                    // U+0078 LATIN SMALL LETTER X
1481
                    // U+0058 LATIN CAPITAL LETTER X
1482
                    case 'x':
1483
                    case 'X':
1484
                        // Follow the steps below, but using the range of
1485
                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1486
                        // NINE, U+0061 LATIN SMALL LETTER A through to U+0066
1487
                        // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER
1488
                        // A, through to U+0046 LATIN CAPITAL LETTER F (in other
1489
                        // words, 0-9, A-F, a-f).
1490
                        $char = 1;
1491
                        $char_class = '0-9A-Fa-f';
1492
                        break;
1493
1494
                    // Anything else
1495
                    default:
1496
                        // Follow the steps below, but using the range of
1497
                        // characters U+0030 DIGIT ZERO through to U+0039 DIGIT
1498
                        // NINE (i.e. just 0-9).
1499
                        $char = 0;
1500
                        $char_class = '0-9';
1501
                        break;
1502
                }
1503
1504
                // Consume as many characters as match the range of characters
1505
                // given above.
1506
                $this->char++;
1507
                $e_name = $this->characters($char_class, $this->char + $char + 1);
1508
                $entity = $this->character($start, $this->char);
1509
                $cond = strlen($e_name) > 0;
1510
1511
                // The rest of the parsing happens bellow.
1512
                break;
1513
1514
            // Anything else
1515
            default:
1516
                // Consume the maximum number of characters possible, with the
1517
                // consumed characters case-sensitively matching one of the
1518
                // identifiers in the first column of the entities table.
1519
                $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);
1520
                $len = strlen($e_name);
1521
1522
                for ($c = 1; $c <= $len; $c++) {
1523
                    $id = substr($e_name, 0, $c);
1524
                    $this->char++;
1525
1526
                    if (in_array($id, $this->entities)) {
1527
                        if ($e_name[$c - 1] !== ';') {
1528
                            if ($c < $len && $e_name[$c] == ';') {
1529
                                $this->char++; // consume extra semicolon
1530
                            }
1531
                        }
1532
                        $entity = $id;
1533
                        break;
1534
                    }
1535
                }
1536
1537
                $cond = isset($entity);
1538
                // The rest of the parsing happens bellow.
1539
                break;
1540
        }
1541
1542
        if (!$cond) {
1543
            // If no match can be made, then this is a parse error. No
1544
            // characters are consumed, and nothing is returned.
1545
            $this->char = $start;
1546
            return false;
1547
        }
1548
1549
        // Return a character token for the character corresponding to the
1550
        // entity name (as given by the second column of the entities table).
1551
        return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');
0 ignored issues
show
Unused Code introduced by
The call to html_entity_decode() has too many arguments starting with 'UTF-8'.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
1552
    }
1553
1554
    private function emitToken($token)
1555
    {
1556
        $emit = $this->tree->emitToken($token);
1557
1558
        if (is_int($emit)) {
1559
            $this->content_model = $emit;
1560
1561
        } elseif ($token['type'] === self::ENDTAG) {
1562
            $this->content_model = self::PCDATA;
1563
        }
1564
    }
1565
1566
    private function EOF()
1567
    {
1568
        $this->state = null;
1569
        $this->tree->emitToken(
1570
            array(
1571
                'type' => self::EOF
1572
            )
1573
        );
1574
    }
1575
}
1576
1577
class HTML5TreeConstructer
0 ignored issues
show
Coding Style Compatibility introduced by
PSR1 recommends that each class should be in its own file to aid autoloaders.

Having each class in a dedicated file usually plays nice with PSR autoloaders and is therefore a well established practice. If you use other autoloaders, you might not want to follow this rule.

Loading history...
1578
{
1579
    public $stack = array();
1580
1581
    private $phase;
1582
    private $mode;
1583
    private $dom;
1584
    private $foster_parent = null;
1585
    private $a_formatting = array();
1586
1587
    private $head_pointer = null;
1588
    private $form_pointer = null;
1589
1590
    private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th');
1591
    private $formatting = array(
1592
        'a',
1593
        'b',
1594
        'big',
1595
        'em',
1596
        'font',
1597
        'i',
1598
        'nobr',
1599
        's',
1600
        'small',
1601
        'strike',
1602
        'strong',
1603
        'tt',
1604
        'u'
1605
    );
1606
    private $special = array(
1607
        'address',
1608
        'area',
1609
        'base',
1610
        'basefont',
1611
        'bgsound',
1612
        'blockquote',
1613
        'body',
1614
        'br',
1615
        'center',
1616
        'col',
1617
        'colgroup',
1618
        'dd',
1619
        'dir',
1620
        'div',
1621
        'dl',
1622
        'dt',
1623
        'embed',
1624
        'fieldset',
1625
        'form',
1626
        'frame',
1627
        'frameset',
1628
        'h1',
1629
        'h2',
1630
        'h3',
1631
        'h4',
1632
        'h5',
1633
        'h6',
1634
        'head',
1635
        'hr',
1636
        'iframe',
1637
        'image',
1638
        'img',
1639
        'input',
1640
        'isindex',
1641
        'li',
1642
        'link',
1643
        'listing',
1644
        'menu',
1645
        'meta',
1646
        'noembed',
1647
        'noframes',
1648
        'noscript',
1649
        'ol',
1650
        'optgroup',
1651
        'option',
1652
        'p',
1653
        'param',
1654
        'plaintext',
1655
        'pre',
1656
        'script',
1657
        'select',
1658
        'spacer',
1659
        'style',
1660
        'tbody',
1661
        'textarea',
1662
        'tfoot',
1663
        'thead',
1664
        'title',
1665
        'tr',
1666
        'ul',
1667
        'wbr'
1668
    );
1669
1670
    // The different phases.
1671
    const INIT_PHASE = 0;
1672
    const ROOT_PHASE = 1;
1673
    const MAIN_PHASE = 2;
1674
    const END_PHASE = 3;
1675
1676
    // The different insertion modes for the main phase.
1677
    const BEFOR_HEAD = 0;
1678
    const IN_HEAD = 1;
1679
    const AFTER_HEAD = 2;
1680
    const IN_BODY = 3;
1681
    const IN_TABLE = 4;
1682
    const IN_CAPTION = 5;
1683
    const IN_CGROUP = 6;
1684
    const IN_TBODY = 7;
1685
    const IN_ROW = 8;
1686
    const IN_CELL = 9;
1687
    const IN_SELECT = 10;
1688
    const AFTER_BODY = 11;
1689
    const IN_FRAME = 12;
1690
    const AFTR_FRAME = 13;
1691
1692
    // The different types of elements.
1693
    const SPECIAL = 0;
1694
    const SCOPING = 1;
1695
    const FORMATTING = 2;
1696
    const PHRASING = 3;
1697
1698
    const MARKER = 0;
1699
1700
    public function __construct()
1701
    {
1702
        $this->phase = self::INIT_PHASE;
1703
        $this->mode = self::BEFOR_HEAD;
1704
        $this->dom = new DOMDocument;
1705
1706
        $this->dom->encoding = 'UTF-8';
1707
        $this->dom->preserveWhiteSpace = true;
1708
        $this->dom->substituteEntities = true;
1709
        $this->dom->strictErrorChecking = false;
1710
    }
1711
1712
    // Process tag tokens
1713
    public function emitToken($token)
1714
    {
1715
        switch ($this->phase) {
1716
            case self::INIT_PHASE:
1717
                return $this->initPhase($token);
1718
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1719
            case self::ROOT_PHASE:
1720
                return $this->rootElementPhase($token);
1721
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1722
            case self::MAIN_PHASE:
1723
                return $this->mainPhase($token);
1724
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1725
            case self::END_PHASE :
0 ignored issues
show
Coding Style introduced by
There must be no space before the colon in a CASE statement

As per the PSR-2 coding standard, there must not be a space in front of the colon in case statements.

switch ($selector) {
    case "A": //right
        doSomething();
        break;
    case "B" : //wrong
        doSomethingElse();
        break;
}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1726
                return $this->trailingEndPhase($token);
1727
                break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1728
        }
1729
    }
1730
1731
    private function initPhase($token)
1732
    {
1733
        /* Initially, the tree construction stage must handle each token
1734
        emitted from the tokenisation stage as follows: */
1735
1736
        /* A DOCTYPE token that is marked as being in error
1737
        A comment token
1738
        A start tag token
1739
        An end tag token
1740
        A character token that is not one of one of U+0009 CHARACTER TABULATION,
1741
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1742
            or U+0020 SPACE
1743
        An end-of-file token */
1744
        if ((isset($token['error']) && $token['error']) ||
1745
            $token['type'] === HTML5::COMMENT ||
1746
            $token['type'] === HTML5::STARTTAG ||
1747
            $token['type'] === HTML5::ENDTAG ||
1748
            $token['type'] === HTML5::EOF ||
1749
            ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&
1750
                !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))
1751
        ) {
1752
            /* This specification does not define how to handle this case. In
1753
            particular, user agents may ignore the entirety of this specification
1754
            altogether for such documents, and instead invoke special parse modes
1755
            with a greater emphasis on backwards compatibility. */
1756
1757
            $this->phase = self::ROOT_PHASE;
1758
            return $this->rootElementPhase($token);
1759
1760
            /* A DOCTYPE token marked as being correct */
1761
        } elseif (isset($token['error']) && !$token['error']) {
1762
            /* Append a DocumentType node to the Document  node, with the name
1763
            attribute set to the name given in the DOCTYPE token (which will be
1764
            "HTML"), and the other attributes specific to DocumentType objects
1765
            set to null, empty lists, or the empty string as appropriate. */
1766
            $doctype = new DOMDocumentType(null, null, 'HTML');
0 ignored issues
show
Unused Code introduced by
The call to DOMDocumentType::__construct() has too many arguments starting with null.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
1767
1768
            /* Then, switch to the root element phase of the tree construction
1769
            stage. */
1770
            $this->phase = self::ROOT_PHASE;
1771
1772
            /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1773
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1774
            or U+0020 SPACE */
1775
        } elseif (isset($token['data']) && preg_match(
1776
                '/^[\t\n\x0b\x0c ]+$/',
1777
                $token['data']
1778
            )
1779
        ) {
1780
            /* Append that character  to the Document node. */
1781
            $text = $this->dom->createTextNode($token['data']);
1782
            $this->dom->appendChild($text);
1783
        }
1784
    }
1785
1786
    private function rootElementPhase($token)
1787
    {
1788
        /* After the initial phase, as each token is emitted from the tokenisation
1789
        stage, it must be processed as described in this section. */
1790
1791
        /* A DOCTYPE token */
1792
        if ($token['type'] === HTML5::DOCTYPE) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1793
            // Parse error. Ignore the token.
1794
1795
            /* A comment token */
1796
        } elseif ($token['type'] === HTML5::COMMENT) {
1797
            /* Append a Comment node to the Document object with the data
1798
            attribute set to the data given in the comment token. */
1799
            $comment = $this->dom->createComment($token['data']);
1800
            $this->dom->appendChild($comment);
1801
1802
            /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1803
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1804
            or U+0020 SPACE */
1805
        } elseif ($token['type'] === HTML5::CHARACTR &&
1806
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
1807
        ) {
1808
            /* Append that character  to the Document node. */
1809
            $text = $this->dom->createTextNode($token['data']);
1810
            $this->dom->appendChild($text);
1811
1812
            /* A character token that is not one of U+0009 CHARACTER TABULATION,
1813
                U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED
1814
                (FF), or U+0020 SPACE
1815
            A start tag token
1816
            An end tag token
1817
            An end-of-file token */
1818
        } elseif (($token['type'] === HTML5::CHARACTR &&
1819
                !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
1820
            $token['type'] === HTML5::STARTTAG ||
1821
            $token['type'] === HTML5::ENDTAG ||
1822
            $token['type'] === HTML5::EOF
1823
        ) {
1824
            /* Create an HTMLElement node with the tag name html, in the HTML
1825
            namespace. Append it to the Document object. Switch to the main
1826
            phase and reprocess the current token. */
1827
            $html = $this->dom->createElement('html');
1828
            $this->dom->appendChild($html);
1829
            $this->stack[] = $html;
1830
1831
            $this->phase = self::MAIN_PHASE;
1832
            return $this->mainPhase($token);
1833
        }
1834
    }
1835
1836
    private function mainPhase($token)
1837
    {
1838
        /* Tokens in the main phase must be handled as follows: */
1839
1840
        /* A DOCTYPE token */
1841
        if ($token['type'] === HTML5::DOCTYPE) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
1842
            // Parse error. Ignore the token.
1843
1844
            /* A start tag token with the tag name "html" */
1845
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {
1846
            /* If this start tag token was not the first start tag token, then
1847
            it is a parse error. */
1848
1849
            /* For each attribute on the token, check to see if the attribute
1850
            is already present on the top element of the stack of open elements.
1851
            If it is not, add the attribute and its corresponding value to that
1852
            element. */
1853
            foreach ($token['attr'] as $attr) {
1854
                if (!$this->stack[0]->hasAttribute($attr['name'])) {
1855
                    $this->stack[0]->setAttribute($attr['name'], $attr['value']);
1856
                }
1857
            }
1858
1859
            /* An end-of-file token */
1860
        } elseif ($token['type'] === HTML5::EOF) {
1861
            /* Generate implied end tags. */
1862
            $this->generateImpliedEndTags();
1863
1864
            /* Anything else. */
1865
        } else {
1866
            /* Depends on the insertion mode: */
1867
            switch ($this->mode) {
1868
                case self::BEFOR_HEAD:
1869
                    return $this->beforeHead($token);
1870
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1871
                case self::IN_HEAD:
1872
                    return $this->inHead($token);
1873
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1874
                case self::AFTER_HEAD:
1875
                    return $this->afterHead($token);
1876
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1877
                case self::IN_BODY:
1878
                    return $this->inBody($token);
1879
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1880
                case self::IN_TABLE:
1881
                    return $this->inTable($token);
1882
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1883
                case self::IN_CAPTION:
1884
                    return $this->inCaption($token);
1885
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1886
                case self::IN_CGROUP:
1887
                    return $this->inColumnGroup($token);
1888
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1889
                case self::IN_TBODY:
1890
                    return $this->inTableBody($token);
1891
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1892
                case self::IN_ROW:
1893
                    return $this->inRow($token);
1894
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1895
                case self::IN_CELL:
1896
                    return $this->inCell($token);
1897
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1898
                case self::IN_SELECT:
1899
                    return $this->inSelect($token);
1900
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1901
                case self::AFTER_BODY:
1902
                    return $this->afterBody($token);
1903
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1904
                case self::IN_FRAME:
1905
                    return $this->inFrameset($token);
1906
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1907
                case self::AFTR_FRAME:
1908
                    return $this->afterFrameset($token);
1909
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1910
                case self::END_PHASE:
1911
                    return $this->trailingEndPhase($token);
1912
                    break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
1913
            }
1914
        }
1915
    }
1916
1917
    private function beforeHead($token)
1918
    {
1919
        /* Handle the token as follows: */
1920
1921
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1922
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1923
        or U+0020 SPACE */
1924
        if ($token['type'] === HTML5::CHARACTR &&
1925
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
1926
        ) {
1927
            /* Append the character to the current node. */
1928
            $this->insertText($token['data']);
1929
1930
            /* A comment token */
1931
        } elseif ($token['type'] === HTML5::COMMENT) {
1932
            /* Append a Comment node to the current node with the data attribute
1933
            set to the data given in the comment token. */
1934
            $this->insertComment($token['data']);
1935
1936
            /* A start tag token with the tag name "head" */
1937
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {
1938
            /* Create an element for the token, append the new element to the
1939
            current node and push it onto the stack of open elements. */
1940
            $element = $this->insertElement($token);
1941
1942
            /* Set the head element pointer to this new element node. */
1943
            $this->head_pointer = $element;
1944
1945
            /* Change the insertion mode to "in head". */
1946
            $this->mode = self::IN_HEAD;
1947
1948
            /* A start tag token whose tag name is one of: "base", "link", "meta",
1949
            "script", "style", "title". Or an end tag with the tag name "html".
1950
            Or a character token that is not one of U+0009 CHARACTER TABULATION,
1951
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1952
            or U+0020 SPACE. Or any other start tag token */
1953
        } elseif ($token['type'] === HTML5::STARTTAG ||
1954
            ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||
1955
            ($token['type'] === HTML5::CHARACTR && !preg_match(
1956
                    '/^[\t\n\x0b\x0c ]$/',
1957
                    $token['data']
1958
                ))
1959
        ) {
1960
            /* Act as if a start tag token with the tag name "head" and no
1961
            attributes had been seen, then reprocess the current token. */
1962
            $this->beforeHead(
1963
                array(
1964
                    'name' => 'head',
1965
                    'type' => HTML5::STARTTAG,
1966
                    'attr' => array()
1967
                )
1968
            );
1969
1970
            return $this->inHead($token);
1971
1972
            /* Any other end tag */
1973
        } elseif ($token['type'] === HTML5::ENDTAG) {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
1974
            /* Parse error. Ignore the token. */
1975
        }
1976
    }
1977
1978
    private function inHead($token)
1979
    {
1980
        /* Handle the token as follows: */
1981
1982
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
1983
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1984
        or U+0020 SPACE.
1985
1986
        THIS DIFFERS FROM THE SPEC: If the current node is either a title, style
1987
        or script element, append the character to the current node regardless
1988
        of its content. */
1989
        if (($token['type'] === HTML5::CHARACTR &&
1990
                preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (
1991
                $token['type'] === HTML5::CHARACTR && in_array(
1992
                    end($this->stack)->nodeName,
1993
                    array('title', 'style', 'script')
1994
                ))
1995
        ) {
1996
            /* Append the character to the current node. */
1997
            $this->insertText($token['data']);
1998
1999
            /* A comment token */
2000
        } elseif ($token['type'] === HTML5::COMMENT) {
2001
            /* Append a Comment node to the current node with the data attribute
2002
            set to the data given in the comment token. */
2003
            $this->insertComment($token['data']);
2004
2005
        } elseif ($token['type'] === HTML5::ENDTAG &&
2006
            in_array($token['name'], array('title', 'style', 'script'))
2007
        ) {
2008
            array_pop($this->stack);
2009
            return HTML5::PCDATA;
2010
2011
            /* A start tag with the tag name "title" */
2012
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {
2013
            /* Create an element for the token and append the new element to the
2014
            node pointed to by the head element pointer, or, if that is null
2015
            (innerHTML case), to the current node. */
2016
            if ($this->head_pointer !== null) {
2017
                $element = $this->insertElement($token, false);
2018
                $this->head_pointer->appendChild($element);
2019
2020
            } else {
2021
                $element = $this->insertElement($token);
2022
            }
2023
2024
            /* Switch the tokeniser's content model flag  to the RCDATA state. */
2025
            return HTML5::RCDATA;
2026
2027
            /* A start tag with the tag name "style" */
2028
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {
2029
            /* Create an element for the token and append the new element to the
2030
            node pointed to by the head element pointer, or, if that is null
2031
            (innerHTML case), to the current node. */
2032
            if ($this->head_pointer !== null) {
2033
                $element = $this->insertElement($token, false);
2034
                $this->head_pointer->appendChild($element);
2035
2036
            } else {
2037
                $this->insertElement($token);
2038
            }
2039
2040
            /* Switch the tokeniser's content model flag  to the CDATA state. */
2041
            return HTML5::CDATA;
2042
2043
            /* A start tag with the tag name "script" */
2044
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {
2045
            /* Create an element for the token. */
2046
            $element = $this->insertElement($token, false);
2047
            $this->head_pointer->appendChild($element);
2048
2049
            /* Switch the tokeniser's content model flag  to the CDATA state. */
2050
            return HTML5::CDATA;
2051
2052
            /* A start tag with the tag name "base", "link", or "meta" */
2053
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
2054
                $token['name'],
2055
                array('base', 'link', 'meta')
2056
            )
2057
        ) {
2058
            /* Create an element for the token and append the new element to the
2059
            node pointed to by the head element pointer, or, if that is null
2060
            (innerHTML case), to the current node. */
2061
            if ($this->head_pointer !== null) {
2062
                $element = $this->insertElement($token, false);
2063
                $this->head_pointer->appendChild($element);
2064
                array_pop($this->stack);
2065
2066
            } else {
2067
                $this->insertElement($token);
2068
            }
2069
2070
            /* An end tag with the tag name "head" */
2071
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {
2072
            /* If the current node is a head element, pop the current node off
2073
            the stack of open elements. */
2074
            if ($this->head_pointer->isSameNode(end($this->stack))) {
2075
                array_pop($this->stack);
2076
2077
                /* Otherwise, this is a parse error. */
2078
            } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
2079
                // k
2080
            }
2081
2082
            /* Change the insertion mode to "after head". */
2083
            $this->mode = self::AFTER_HEAD;
2084
2085
            /* A start tag with the tag name "head" or an end tag except "html". */
2086
        } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2087
            ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')
2088
        ) {
2089
            // Parse error. Ignore the token.
2090
2091
            /* Anything else */
2092
        } else {
2093
            /* If the current node is a head element, act as if an end tag
2094
            token with the tag name "head" had been seen. */
2095
            if ($this->head_pointer->isSameNode(end($this->stack))) {
2096
                $this->inHead(
2097
                    array(
2098
                        'name' => 'head',
2099
                        'type' => HTML5::ENDTAG
2100
                    )
2101
                );
2102
2103
                /* Otherwise, change the insertion mode to "after head". */
2104
            } else {
2105
                $this->mode = self::AFTER_HEAD;
2106
            }
2107
2108
            /* Then, reprocess the current token. */
2109
            return $this->afterHead($token);
2110
        }
2111
    }
2112
2113
    private function afterHead($token)
2114
    {
2115
        /* Handle the token as follows: */
2116
2117
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2118
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2119
        or U+0020 SPACE */
2120
        if ($token['type'] === HTML5::CHARACTR &&
2121
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
2122
        ) {
2123
            /* Append the character to the current node. */
2124
            $this->insertText($token['data']);
2125
2126
            /* A comment token */
2127
        } elseif ($token['type'] === HTML5::COMMENT) {
2128
            /* Append a Comment node to the current node with the data attribute
2129
            set to the data given in the comment token. */
2130
            $this->insertComment($token['data']);
2131
2132
            /* A start tag token with the tag name "body" */
2133
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {
2134
            /* Insert a body element for the token. */
2135
            $this->insertElement($token);
2136
2137
            /* Change the insertion mode to "in body". */
2138
            $this->mode = self::IN_BODY;
2139
2140
            /* A start tag token with the tag name "frameset" */
2141
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {
2142
            /* Insert a frameset element for the token. */
2143
            $this->insertElement($token);
2144
2145
            /* Change the insertion mode to "in frameset". */
2146
            $this->mode = self::IN_FRAME;
2147
2148
            /* A start tag token whose tag name is one of: "base", "link", "meta",
2149
            "script", "style", "title" */
2150
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
2151
                $token['name'],
2152
                array('base', 'link', 'meta', 'script', 'style', 'title')
2153
            )
2154
        ) {
2155
            /* Parse error. Switch the insertion mode back to "in head" and
2156
            reprocess the token. */
2157
            $this->mode = self::IN_HEAD;
2158
            return $this->inHead($token);
2159
2160
            /* Anything else */
2161
        } else {
2162
            /* Act as if a start tag token with the tag name "body" and no
2163
            attributes had been seen, and then reprocess the current token. */
2164
            $this->afterHead(
2165
                array(
2166
                    'name' => 'body',
2167
                    'type' => HTML5::STARTTAG,
2168
                    'attr' => array()
2169
                )
2170
            );
2171
2172
            return $this->inBody($token);
2173
        }
2174
    }
2175
2176
    private function inBody($token)
2177
    {
2178
        /* Handle the token as follows: */
2179
2180
        switch ($token['type']) {
2181
            /* A character token */
2182
            case HTML5::CHARACTR:
2183
                /* Reconstruct the active formatting elements, if any. */
2184
                $this->reconstructActiveFormattingElements();
2185
2186
                /* Append the token's character to the current node. */
2187
                $this->insertText($token['data']);
2188
                break;
2189
2190
            /* A comment token */
2191
            case HTML5::COMMENT:
2192
                /* Append a Comment node to the current node with the data
2193
                attribute set to the data given in the comment token. */
2194
                $this->insertComment($token['data']);
2195
                break;
2196
2197
            case HTML5::STARTTAG:
2198
                switch ($token['name']) {
2199
                    /* A start tag token whose tag name is one of: "script",
2200
                    "style" */
2201
                    case 'script':
2202
                    case 'style':
2203
                        /* Process the token as if the insertion mode had been "in
2204
                        head". */
2205
                        return $this->inHead($token);
2206
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2207
2208
                    /* A start tag token whose tag name is one of: "base", "link",
2209
                    "meta", "title" */
2210
                    case 'base':
2211
                    case 'link':
2212
                    case 'meta':
2213
                    case 'title':
2214
                        /* Parse error. Process the token as if the insertion mode
2215
                        had    been "in head". */
2216
                        return $this->inHead($token);
2217
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2218
2219
                    /* A start tag token with the tag name "body" */
2220
                    case 'body':
2221
                        /* Parse error. If the second element on the stack of open
2222
                        elements is not a body element, or, if the stack of open
2223
                        elements has only one node on it, then ignore the token.
2224
                        (innerHTML case) */
2225
                        if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2226
                            // Ignore
2227
2228
                            /* Otherwise, for each attribute on the token, check to see
2229
                            if the attribute is already present on the body element (the
2230
                            second element)    on the stack of open elements. If it is not,
2231
                            add the attribute and its corresponding value to that
2232
                            element. */
2233
                        } else {
2234
                            foreach ($token['attr'] as $attr) {
2235
                                if (!$this->stack[1]->hasAttribute($attr['name'])) {
2236
                                    $this->stack[1]->setAttribute($attr['name'], $attr['value']);
2237
                                }
2238
                            }
2239
                        }
2240
                        break;
2241
2242
                    /* A start tag whose tag name is one of: "address",
2243
                    "blockquote", "center", "dir", "div", "dl", "fieldset",
2244
                    "listing", "menu", "ol", "p", "ul" */
2245
                    case 'address':
2246
                    case 'blockquote':
2247
                    case 'center':
2248
                    case 'dir':
2249
                    case 'div':
2250
                    case 'dl':
2251
                    case 'fieldset':
2252
                    case 'listing':
2253
                    case 'menu':
2254
                    case 'ol':
2255
                    case 'p':
2256
                    case 'ul':
2257
                        /* If the stack of open elements has a p element in scope,
2258
                        then act as if an end tag with the tag name p had been
2259
                        seen. */
2260
                        if ($this->elementInScope('p')) {
2261
                            $this->emitToken(
2262
                                array(
2263
                                    'name' => 'p',
2264
                                    'type' => HTML5::ENDTAG
2265
                                )
2266
                            );
2267
                        }
2268
2269
                        /* Insert an HTML element for the token. */
2270
                        $this->insertElement($token);
2271
                        break;
2272
2273
                    /* A start tag whose tag name is "form" */
2274
                    case 'form':
2275
                        /* If the form element pointer is not null, ignore the
2276
                        token with a parse error. */
2277
                        if ($this->form_pointer !== null) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2278
                            // Ignore.
2279
2280
                            /* Otherwise: */
2281
                        } else {
2282
                            /* If the stack of open elements has a p element in
2283
                            scope, then act as if an end tag with the tag name p
2284
                            had been seen. */
2285
                            if ($this->elementInScope('p')) {
2286
                                $this->emitToken(
2287
                                    array(
2288
                                        'name' => 'p',
2289
                                        'type' => HTML5::ENDTAG
2290
                                    )
2291
                                );
2292
                            }
2293
2294
                            /* Insert an HTML element for the token, and set the
2295
                            form element pointer to point to the element created. */
2296
                            $element = $this->insertElement($token);
2297
                            $this->form_pointer = $element;
2298
                        }
2299
                        break;
2300
2301
                    /* A start tag whose tag name is "li", "dd" or "dt" */
2302
                    case 'li':
2303
                    case 'dd':
2304
                    case 'dt':
2305
                        /* If the stack of open elements has a p  element in scope,
2306
                        then act as if an end tag with the tag name p had been
2307
                        seen. */
2308
                        if ($this->elementInScope('p')) {
2309
                            $this->emitToken(
2310
                                array(
2311
                                    'name' => 'p',
2312
                                    'type' => HTML5::ENDTAG
2313
                                )
2314
                            );
2315
                        }
2316
2317
                        $stack_length = count($this->stack) - 1;
2318
2319
                        for ($n = $stack_length; 0 <= $n; $n--) {
2320
                            /* 1. Initialise node to be the current node (the
2321
                            bottommost node of the stack). */
2322
                            $stop = false;
2323
                            $node = $this->stack[$n];
2324
                            $cat = $this->getElementCategory($node->tagName);
2325
2326
                            /* 2. If node is an li, dd or dt element, then pop all
2327
                            the    nodes from the current node up to node, including
2328
                            node, then stop this algorithm. */
2329
                            if ($token['name'] === $node->tagName || ($token['name'] !== 'li'
2330
                                    && ($node->tagName === 'dd' || $node->tagName === 'dt'))
2331
                            ) {
2332
                                for ($x = $stack_length; $x >= $n; $x--) {
2333
                                    array_pop($this->stack);
2334
                                }
2335
2336
                                break;
2337
                            }
2338
2339
                            /* 3. If node is not in the formatting category, and is
2340
                            not    in the phrasing category, and is not an address or
2341
                            div element, then stop this algorithm. */
2342
                            if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&
2343
                                $node->tagName !== 'address' && $node->tagName !== 'div'
2344
                            ) {
2345
                                break;
2346
                            }
2347
                        }
2348
2349
                        /* Finally, insert an HTML element with the same tag
2350
                        name as the    token's. */
2351
                        $this->insertElement($token);
2352
                        break;
2353
2354
                    /* A start tag token whose tag name is "plaintext" */
2355
                    case 'plaintext':
2356
                        /* If the stack of open elements has a p  element in scope,
2357
                        then act as if an end tag with the tag name p had been
2358
                        seen. */
2359
                        if ($this->elementInScope('p')) {
2360
                            $this->emitToken(
2361
                                array(
2362
                                    'name' => 'p',
2363
                                    'type' => HTML5::ENDTAG
2364
                                )
2365
                            );
2366
                        }
2367
2368
                        /* Insert an HTML element for the token. */
2369
                        $this->insertElement($token);
2370
2371
                        return HTML5::PLAINTEXT;
2372
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2373
2374
                    /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
2375
                    "h5", "h6" */
2376
                    case 'h1':
2377
                    case 'h2':
2378
                    case 'h3':
2379
                    case 'h4':
2380
                    case 'h5':
2381
                    case 'h6':
2382
                        /* If the stack of open elements has a p  element in scope,
2383
                        then act as if an end tag with the tag name p had been seen. */
2384
                        if ($this->elementInScope('p')) {
2385
                            $this->emitToken(
2386
                                array(
2387
                                    'name' => 'p',
2388
                                    'type' => HTML5::ENDTAG
2389
                                )
2390
                            );
2391
                        }
2392
2393
                        /* If the stack of open elements has in scope an element whose
2394
                        tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
2395
                        this is a parse error; pop elements from the stack until an
2396
                        element with one of those tag names has been popped from the
2397
                        stack. */
2398
                        while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {
2399
                            array_pop($this->stack);
2400
                        }
2401
2402
                        /* Insert an HTML element for the token. */
2403
                        $this->insertElement($token);
2404
                        break;
2405
2406
                    /* A start tag whose tag name is "a" */
2407
                    case 'a':
2408
                        /* If the list of active formatting elements contains
2409
                        an element whose tag name is "a" between the end of the
2410
                        list and the last marker on the list (or the start of
2411
                        the list if there is no marker on the list), then this
2412
                        is a parse error; act as if an end tag with the tag name
2413
                        "a" had been seen, then remove that element from the list
2414
                        of active formatting elements and the stack of open
2415
                        elements if the end tag didn't already remove it (it
2416
                        might not have if the element is not in table scope). */
2417
                        $leng = count($this->a_formatting);
2418
2419
                        for ($n = $leng - 1; $n >= 0; $n--) {
2420
                            if ($this->a_formatting[$n] === self::MARKER) {
2421
                                break;
2422
2423
                            } elseif ($this->a_formatting[$n]->nodeName === 'a') {
2424
                                $this->emitToken(
2425
                                    array(
2426
                                        'name' => 'a',
2427
                                        'type' => HTML5::ENDTAG
2428
                                    )
2429
                                );
2430
                                break;
2431
                            }
2432
                        }
2433
2434
                        /* Reconstruct the active formatting elements, if any. */
2435
                        $this->reconstructActiveFormattingElements();
2436
2437
                        /* Insert an HTML element for the token. */
2438
                        $el = $this->insertElement($token);
2439
2440
                        /* Add that element to the list of active formatting
2441
                        elements. */
2442
                        $this->a_formatting[] = $el;
2443
                        break;
2444
2445
                    /* A start tag whose tag name is one of: "b", "big", "em", "font",
2446
                    "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
2447
                    case 'b':
2448
                    case 'big':
2449
                    case 'em':
2450
                    case 'font':
2451
                    case 'i':
2452
                    case 'nobr':
2453
                    case 's':
2454
                    case 'small':
2455
                    case 'strike':
2456
                    case 'strong':
2457
                    case 'tt':
2458
                    case 'u':
2459
                        /* Reconstruct the active formatting elements, if any. */
2460
                        $this->reconstructActiveFormattingElements();
2461
2462
                        /* Insert an HTML element for the token. */
2463
                        $el = $this->insertElement($token);
2464
2465
                        /* Add that element to the list of active formatting
2466
                        elements. */
2467
                        $this->a_formatting[] = $el;
2468
                        break;
2469
2470
                    /* A start tag token whose tag name is "button" */
2471
                    case 'button':
2472
                        /* If the stack of open elements has a button element in scope,
2473
                        then this is a parse error; act as if an end tag with the tag
2474
                        name "button" had been seen, then reprocess the token. (We don't
2475
                        do that. Unnecessary.) */
2476
                        if ($this->elementInScope('button')) {
2477
                            $this->inBody(
2478
                                array(
2479
                                    'name' => 'button',
2480
                                    'type' => HTML5::ENDTAG
2481
                                )
2482
                            );
2483
                        }
2484
2485
                        /* Reconstruct the active formatting elements, if any. */
2486
                        $this->reconstructActiveFormattingElements();
2487
2488
                        /* Insert an HTML element for the token. */
2489
                        $this->insertElement($token);
2490
2491
                        /* Insert a marker at the end of the list of active
2492
                        formatting elements. */
2493
                        $this->a_formatting[] = self::MARKER;
2494
                        break;
2495
2496
                    /* A start tag token whose tag name is one of: "marquee", "object" */
2497
                    case 'marquee':
2498
                    case 'object':
2499
                        /* Reconstruct the active formatting elements, if any. */
2500
                        $this->reconstructActiveFormattingElements();
2501
2502
                        /* Insert an HTML element for the token. */
2503
                        $this->insertElement($token);
2504
2505
                        /* Insert a marker at the end of the list of active
2506
                        formatting elements. */
2507
                        $this->a_formatting[] = self::MARKER;
2508
                        break;
2509
2510
                    /* A start tag token whose tag name is "xmp" */
2511
                    case 'xmp':
2512
                        /* Reconstruct the active formatting elements, if any. */
2513
                        $this->reconstructActiveFormattingElements();
2514
2515
                        /* Insert an HTML element for the token. */
2516
                        $this->insertElement($token);
2517
2518
                        /* Switch the content model flag to the CDATA state. */
2519
                        return HTML5::CDATA;
2520
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2521
2522
                    /* A start tag whose tag name is "table" */
2523
                    case 'table':
2524
                        /* If the stack of open elements has a p element in scope,
2525
                        then act as if an end tag with the tag name p had been seen. */
2526
                        if ($this->elementInScope('p')) {
2527
                            $this->emitToken(
2528
                                array(
2529
                                    'name' => 'p',
2530
                                    'type' => HTML5::ENDTAG
2531
                                )
2532
                            );
2533
                        }
2534
2535
                        /* Insert an HTML element for the token. */
2536
                        $this->insertElement($token);
2537
2538
                        /* Change the insertion mode to "in table". */
2539
                        $this->mode = self::IN_TABLE;
2540
                        break;
2541
2542
                    /* A start tag whose tag name is one of: "area", "basefont",
2543
                    "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
2544
                    case 'area':
2545
                    case 'basefont':
2546
                    case 'bgsound':
2547
                    case 'br':
2548
                    case 'embed':
2549
                    case 'img':
2550
                    case 'param':
2551
                    case 'spacer':
2552
                    case 'wbr':
2553
                        /* Reconstruct the active formatting elements, if any. */
2554
                        $this->reconstructActiveFormattingElements();
2555
2556
                        /* Insert an HTML element for the token. */
2557
                        $this->insertElement($token);
2558
2559
                        /* Immediately pop the current node off the stack of open elements. */
2560
                        array_pop($this->stack);
2561
                        break;
2562
2563
                    /* A start tag whose tag name is "hr" */
2564
                    case 'hr':
2565
                        /* If the stack of open elements has a p element in scope,
2566
                        then act as if an end tag with the tag name p had been seen. */
2567
                        if ($this->elementInScope('p')) {
2568
                            $this->emitToken(
2569
                                array(
2570
                                    'name' => 'p',
2571
                                    'type' => HTML5::ENDTAG
2572
                                )
2573
                            );
2574
                        }
2575
2576
                        /* Insert an HTML element for the token. */
2577
                        $this->insertElement($token);
2578
2579
                        /* Immediately pop the current node off the stack of open elements. */
2580
                        array_pop($this->stack);
2581
                        break;
2582
2583
                    /* A start tag whose tag name is "image" */
2584
                    case 'image':
2585
                        /* Parse error. Change the token's tag name to "img" and
2586
                        reprocess it. (Don't ask.) */
2587
                        $token['name'] = 'img';
2588
                        return $this->inBody($token);
2589
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2590
2591
                    /* A start tag whose tag name is "input" */
2592
                    case 'input':
2593
                        /* Reconstruct the active formatting elements, if any. */
2594
                        $this->reconstructActiveFormattingElements();
2595
2596
                        /* Insert an input element for the token. */
2597
                        $element = $this->insertElement($token, false);
2598
2599
                        /* If the form element pointer is not null, then associate the
2600
                        input element with the form element pointed to by the form
2601
                        element pointer. */
2602
                        $this->form_pointer !== null
2603
                            ? $this->form_pointer->appendChild($element)
2604
                            : end($this->stack)->appendChild($element);
2605
2606
                        /* Pop that input element off the stack of open elements. */
2607
                        array_pop($this->stack);
2608
                        break;
2609
2610
                    /* A start tag whose tag name is "isindex" */
2611
                    case 'isindex':
2612
                        /* Parse error. */
2613
                        // w/e
2614
2615
                        /* If the form element pointer is not null,
2616
                        then ignore the token. */
2617
                        if ($this->form_pointer === null) {
2618
                            /* Act as if a start tag token with the tag name "form" had
2619
                            been seen. */
2620
                            $this->inBody(
2621
                                array(
2622
                                    'name' => 'body',
2623
                                    'type' => HTML5::STARTTAG,
2624
                                    'attr' => array()
2625
                                )
2626
                            );
2627
2628
                            /* Act as if a start tag token with the tag name "hr" had
2629
                            been seen. */
2630
                            $this->inBody(
2631
                                array(
2632
                                    'name' => 'hr',
2633
                                    'type' => HTML5::STARTTAG,
2634
                                    'attr' => array()
2635
                                )
2636
                            );
2637
2638
                            /* Act as if a start tag token with the tag name "p" had
2639
                            been seen. */
2640
                            $this->inBody(
2641
                                array(
2642
                                    'name' => 'p',
2643
                                    'type' => HTML5::STARTTAG,
2644
                                    'attr' => array()
2645
                                )
2646
                            );
2647
2648
                            /* Act as if a start tag token with the tag name "label"
2649
                            had been seen. */
2650
                            $this->inBody(
2651
                                array(
2652
                                    'name' => 'label',
2653
                                    'type' => HTML5::STARTTAG,
2654
                                    'attr' => array()
2655
                                )
2656
                            );
2657
2658
                            /* Act as if a stream of character tokens had been seen. */
2659
                            $this->insertText(
2660
                                'This is a searchable index. ' .
2661
                                'Insert your search keywords here: '
2662
                            );
2663
2664
                            /* Act as if a start tag token with the tag name "input"
2665
                            had been seen, with all the attributes from the "isindex"
2666
                            token, except with the "name" attribute set to the value
2667
                            "isindex" (ignoring any explicit "name" attribute). */
2668
                            $attr = $token['attr'];
2669
                            $attr[] = array('name' => 'name', 'value' => 'isindex');
2670
2671
                            $this->inBody(
2672
                                array(
2673
                                    'name' => 'input',
2674
                                    'type' => HTML5::STARTTAG,
2675
                                    'attr' => $attr
2676
                                )
2677
                            );
2678
2679
                            /* Act as if a stream of character tokens had been seen
2680
                            (see below for what they should say). */
2681
                            $this->insertText(
2682
                                'This is a searchable index. ' .
2683
                                'Insert your search keywords here: '
2684
                            );
2685
2686
                            /* Act as if an end tag token with the tag name "label"
2687
                            had been seen. */
2688
                            $this->inBody(
2689
                                array(
2690
                                    'name' => 'label',
2691
                                    'type' => HTML5::ENDTAG
2692
                                )
2693
                            );
2694
2695
                            /* Act as if an end tag token with the tag name "p" had
2696
                            been seen. */
2697
                            $this->inBody(
2698
                                array(
2699
                                    'name' => 'p',
2700
                                    'type' => HTML5::ENDTAG
2701
                                )
2702
                            );
2703
2704
                            /* Act as if a start tag token with the tag name "hr" had
2705
                            been seen. */
2706
                            $this->inBody(
2707
                                array(
2708
                                    'name' => 'hr',
2709
                                    'type' => HTML5::ENDTAG
2710
                                )
2711
                            );
2712
2713
                            /* Act as if an end tag token with the tag name "form" had
2714
                            been seen. */
2715
                            $this->inBody(
2716
                                array(
2717
                                    'name' => 'form',
2718
                                    'type' => HTML5::ENDTAG
2719
                                )
2720
                            );
2721
                        }
2722
                        break;
2723
2724
                    /* A start tag whose tag name is "textarea" */
2725
                    case 'textarea':
2726
                        $this->insertElement($token);
2727
2728
                        /* Switch the tokeniser's content model flag to the
2729
                        RCDATA state. */
2730
                        return HTML5::RCDATA;
2731
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2732
2733
                    /* A start tag whose tag name is one of: "iframe", "noembed",
2734
                    "noframes" */
2735
                    case 'iframe':
2736
                    case 'noembed':
2737
                    case 'noframes':
2738
                        $this->insertElement($token);
2739
2740
                        /* Switch the tokeniser's content model flag to the CDATA state. */
2741
                        return HTML5::CDATA;
2742
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2743
2744
                    /* A start tag whose tag name is "select" */
2745
                    case 'select':
2746
                        /* Reconstruct the active formatting elements, if any. */
2747
                        $this->reconstructActiveFormattingElements();
2748
2749
                        /* Insert an HTML element for the token. */
2750
                        $this->insertElement($token);
2751
2752
                        /* Change the insertion mode to "in select". */
2753
                        $this->mode = self::IN_SELECT;
2754
                        break;
2755
2756
                    /* A start or end tag whose tag name is one of: "caption", "col",
2757
                    "colgroup", "frame", "frameset", "head", "option", "optgroup",
2758
                    "tbody", "td", "tfoot", "th", "thead", "tr". */
2759
                    case 'caption':
2760
                    case 'col':
2761
                    case 'colgroup':
2762
                    case 'frame':
2763
                    case 'frameset':
2764
                    case 'head':
2765
                    case 'option':
2766
                    case 'optgroup':
2767
                    case 'tbody':
2768
                    case 'td':
2769
                    case 'tfoot':
2770
                    case 'th':
2771
                    case 'thead':
2772
                    case 'tr':
2773
                        // Parse error. Ignore the token.
2774
                        break;
2775
2776
                    /* A start or end tag whose tag name is one of: "event-source",
2777
                    "section", "nav", "article", "aside", "header", "footer",
2778
                    "datagrid", "command" */
2779
                    case 'event-source':
2780
                    case 'section':
2781
                    case 'nav':
2782
                    case 'article':
2783
                    case 'aside':
2784
                    case 'header':
2785
                    case 'footer':
2786
                    case 'datagrid':
2787
                    case 'command':
2788
                        // Work in progress!
2789
                        break;
2790
2791
                    /* A start tag token not covered by the previous entries */
2792
                    default:
2793
                        /* Reconstruct the active formatting elements, if any. */
2794
                        $this->reconstructActiveFormattingElements();
2795
2796
                        $this->insertElement($token, true, true);
2797
                        break;
2798
                }
2799
                break;
2800
2801
            case HTML5::ENDTAG:
2802
                switch ($token['name']) {
2803
                    /* An end tag with the tag name "body" */
2804
                    case 'body':
2805
                        /* If the second element in the stack of open elements is
2806
                        not a body element, this is a parse error. Ignore the token.
2807
                        (innerHTML case) */
2808
                        if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2809
                            // Ignore.
2810
2811
                            /* If the current node is not the body element, then this
2812
                            is a parse error. */
2813
                        } elseif (end($this->stack)->nodeName !== 'body') {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
2814
                            // Parse error.
2815
                        }
2816
2817
                        /* Change the insertion mode to "after body". */
2818
                        $this->mode = self::AFTER_BODY;
2819
                        break;
2820
2821
                    /* An end tag with the tag name "html" */
2822
                    case 'html':
2823
                        /* Act as if an end tag with tag name "body" had been seen,
2824
                        then, if that token wasn't ignored, reprocess the current
2825
                        token. */
2826
                        $this->inBody(
2827
                            array(
2828
                                'name' => 'body',
2829
                                'type' => HTML5::ENDTAG
2830
                            )
2831
                        );
2832
2833
                        return $this->afterBody($token);
2834
                        break;
0 ignored issues
show
Unused Code introduced by
break is not strictly necessary here and could be removed.

The break statement is not necessary if it is preceded for example by a return statement:

switch ($x) {
    case 1:
        return 'foo';
        break; // This break is not necessary and can be left off.
}

If you would like to keep this construct to be consistent with other case statements, you can safely mark this issue as a false-positive.

Loading history...
2835
2836
                    /* An end tag whose tag name is one of: "address", "blockquote",
2837
                    "center", "dir", "div", "dl", "fieldset", "listing", "menu",
2838
                    "ol", "pre", "ul" */
2839
                    case 'address':
2840
                    case 'blockquote':
2841
                    case 'center':
2842
                    case 'dir':
2843
                    case 'div':
2844
                    case 'dl':
2845
                    case 'fieldset':
2846
                    case 'listing':
2847
                    case 'menu':
2848
                    case 'ol':
2849
                    case 'pre':
2850
                    case 'ul':
2851
                        /* If the stack of open elements has an element in scope
2852
                        with the same tag name as that of the token, then generate
2853
                        implied end tags. */
2854
                        if ($this->elementInScope($token['name'])) {
2855
                            $this->generateImpliedEndTags();
2856
2857
                            /* Now, if the current node is not an element with
2858
                            the same tag name as that of the token, then this
2859
                            is a parse error. */
2860
                            // w/e
2861
2862
                            /* If the stack of open elements has an element in
2863
                            scope with the same tag name as that of the token,
2864
                            then pop elements from this stack until an element
2865
                            with that tag name has been popped from the stack. */
2866
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2867
                                if ($this->stack[$n]->nodeName === $token['name']) {
2868
                                    $n = -1;
2869
                                }
2870
2871
                                array_pop($this->stack);
2872
                            }
2873
                        }
2874
                        break;
2875
2876
                    /* An end tag whose tag name is "form" */
2877
                    case 'form':
2878
                        /* If the stack of open elements has an element in scope
2879
                        with the same tag name as that of the token, then generate
2880
                        implied    end tags. */
2881
                        if ($this->elementInScope($token['name'])) {
2882
                            $this->generateImpliedEndTags();
2883
2884
                        }
2885
2886
                        if (end($this->stack)->nodeName !== $token['name']) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
2887
                            /* Now, if the current node is not an element with the
2888
                            same tag name as that of the token, then this is a parse
2889
                            error. */
2890
                            // w/e
2891
2892
                        } else {
2893
                            /* Otherwise, if the current node is an element with
2894
                            the same tag name as that of the token pop that element
2895
                            from the stack. */
2896
                            array_pop($this->stack);
2897
                        }
2898
2899
                        /* In any case, set the form element pointer to null. */
2900
                        $this->form_pointer = null;
2901
                        break;
2902
2903
                    /* An end tag whose tag name is "p" */
2904
                    case 'p':
2905
                        /* If the stack of open elements has a p element in scope,
2906
                        then generate implied end tags, except for p elements. */
2907
                        if ($this->elementInScope('p')) {
2908
                            $this->generateImpliedEndTags(array('p'));
2909
2910
                            /* If the current node is not a p element, then this is
2911
                            a parse error. */
2912
                            // k
2913
2914
                            /* If the stack of open elements has a p element in
2915
                            scope, then pop elements from this stack until the stack
2916
                            no longer has a p element in scope. */
2917
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2918
                                if ($this->elementInScope('p')) {
2919
                                    array_pop($this->stack);
2920
2921
                                } else {
2922
                                    break;
2923
                                }
2924
                            }
2925
                        }
2926
                        break;
2927
2928
                    /* An end tag whose tag name is "dd", "dt", or "li" */
2929
                    case 'dd':
2930
                    case 'dt':
2931
                    case 'li':
2932
                        /* If the stack of open elements has an element in scope
2933
                        whose tag name matches the tag name of the token, then
2934
                        generate implied end tags, except for elements with the
2935
                        same tag name as the token. */
2936
                        if ($this->elementInScope($token['name'])) {
2937
                            $this->generateImpliedEndTags(array($token['name']));
2938
2939
                            /* If the current node is not an element with the same
2940
                            tag name as the token, then this is a parse error. */
2941
                            // w/e
2942
2943
                            /* If the stack of open elements has an element in scope
2944
                            whose tag name matches the tag name of the token, then
2945
                            pop elements from this stack until an element with that
2946
                            tag name has been popped from the stack. */
2947
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
2948
                                if ($this->stack[$n]->nodeName === $token['name']) {
2949
                                    $n = -1;
2950
                                }
2951
2952
                                array_pop($this->stack);
2953
                            }
2954
                        }
2955
                        break;
2956
2957
                    /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
2958
                    "h5", "h6" */
2959
                    case 'h1':
2960
                    case 'h2':
2961
                    case 'h3':
2962
                    case 'h4':
2963
                    case 'h5':
2964
                    case 'h6':
2965
                        $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
2966
2967
                        /* If the stack of open elements has in scope an element whose
2968
                        tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
2969
                        generate implied end tags. */
2970
                        if ($this->elementInScope($elements)) {
2971
                            $this->generateImpliedEndTags();
2972
2973
                            /* Now, if the current node is not an element with the same
2974
                            tag name as that of the token, then this is a parse error. */
2975
                            // w/e
2976
2977
                            /* If the stack of open elements has in scope an element
2978
                            whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
2979
                            "h6", then pop elements from the stack until an element
2980
                            with one of those tag names has been popped from the stack. */
2981
                            while ($this->elementInScope($elements)) {
2982
                                array_pop($this->stack);
2983
                            }
2984
                        }
2985
                        break;
2986
2987
                    /* An end tag whose tag name is one of: "a", "b", "big", "em",
2988
                    "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
2989
                    case 'a':
2990
                    case 'b':
2991
                    case 'big':
2992
                    case 'em':
2993
                    case 'font':
2994
                    case 'i':
2995
                    case 'nobr':
2996
                    case 's':
2997
                    case 'small':
2998
                    case 'strike':
2999
                    case 'strong':
3000
                    case 'tt':
3001
                    case 'u':
3002
                        /* 1. Let the formatting element be the last element in
3003
                        the list of active formatting elements that:
3004
                            * is between the end of the list and the last scope
3005
                            marker in the list, if any, or the start of the list
3006
                            otherwise, and
3007
                            * has the same tag name as the token.
3008
                        */
3009
                        while (true) {
3010
                            for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
3011
                                if ($this->a_formatting[$a] === self::MARKER) {
3012
                                    break;
3013
3014
                                } elseif ($this->a_formatting[$a]->tagName === $token['name']) {
3015
                                    $formatting_element = $this->a_formatting[$a];
3016
                                    $in_stack = in_array($formatting_element, $this->stack, true);
3017
                                    $fe_af_pos = $a;
3018
                                    break;
3019
                                }
3020
                            }
3021
3022
                            /* If there is no such node, or, if that node is
3023
                            also in the stack of open elements but the element
3024
                            is not in scope, then this is a parse error. Abort
3025
                            these steps. The token is ignored. */
3026
                            if (!isset($formatting_element) || ($in_stack &&
3027
                                    !$this->elementInScope($token['name']))
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name']) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
3028
                            ) {
3029
                                break;
3030
3031
                                /* Otherwise, if there is such a node, but that node
3032
                                is not in the stack of open elements, then this is a
3033
                                parse error; remove the element from the list, and
3034
                                abort these steps. */
3035
                            } elseif (isset($formatting_element) && !$in_stack) {
3036
                                unset($this->a_formatting[$fe_af_pos]);
3037
                                $this->a_formatting = array_merge($this->a_formatting);
3038
                                break;
3039
                            }
3040
3041
                            /* 2. Let the furthest block be the topmost node in the
3042
                            stack of open elements that is lower in the stack
3043
                            than the formatting element, and is not an element in
3044
                            the phrasing or formatting categories. There might
3045
                            not be one. */
3046
                            $fe_s_pos = array_search($formatting_element, $this->stack, true);
3047
                            $length = count($this->stack);
3048
3049
                            for ($s = $fe_s_pos + 1; $s < $length; $s++) {
3050
                                $category = $this->getElementCategory($this->stack[$s]->nodeName);
3051
3052
                                if ($category !== self::PHRASING && $category !== self::FORMATTING) {
3053
                                    $furthest_block = $this->stack[$s];
3054
                                }
3055
                            }
3056
3057
                            /* 3. If there is no furthest block, then the UA must
3058
                            skip the subsequent steps and instead just pop all
3059
                            the nodes from the bottom of the stack of open
3060
                            elements, from the current node up to the formatting
3061
                            element, and remove the formatting element from the
3062
                            list of active formatting elements. */
3063
                            if (!isset($furthest_block)) {
3064
                                for ($n = $length - 1; $n >= $fe_s_pos; $n--) {
3065
                                    array_pop($this->stack);
3066
                                }
3067
3068
                                unset($this->a_formatting[$fe_af_pos]);
3069
                                $this->a_formatting = array_merge($this->a_formatting);
3070
                                break;
3071
                            }
3072
3073
                            /* 4. Let the common ancestor be the element
3074
                            immediately above the formatting element in the stack
3075
                            of open elements. */
3076
                            $common_ancestor = $this->stack[$fe_s_pos - 1];
3077
3078
                            /* 5. If the furthest block has a parent node, then
3079
                            remove the furthest block from its parent node. */
3080
                            if ($furthest_block->parentNode !== null) {
3081
                                $furthest_block->parentNode->removeChild($furthest_block);
3082
                            }
3083
3084
                            /* 6. Let a bookmark note the position of the
3085
                            formatting element in the list of active formatting
3086
                            elements relative to the elements on either side
3087
                            of it in the list. */
3088
                            $bookmark = $fe_af_pos;
3089
3090
                            /* 7. Let node and last node  be the furthest block.
3091
                            Follow these steps: */
3092
                            $node = $furthest_block;
3093
                            $last_node = $furthest_block;
3094
3095
                            while (true) {
3096
                                for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
3097
                                    /* 7.1 Let node be the element immediately
3098
                                    prior to node in the stack of open elements. */
3099
                                    $node = $this->stack[$n];
3100
3101
                                    /* 7.2 If node is not in the list of active
3102
                                    formatting elements, then remove node from
3103
                                    the stack of open elements and then go back
3104
                                    to step 1. */
3105
                                    if (!in_array($node, $this->a_formatting, true)) {
3106
                                        unset($this->stack[$n]);
3107
                                        $this->stack = array_merge($this->stack);
3108
3109
                                    } else {
3110
                                        break;
3111
                                    }
3112
                                }
3113
3114
                                /* 7.3 Otherwise, if node is the formatting
3115
                                element, then go to the next step in the overall
3116
                                algorithm. */
3117
                                if ($node === $formatting_element) {
3118
                                    break;
3119
3120
                                    /* 7.4 Otherwise, if last node is the furthest
3121
                                    block, then move the aforementioned bookmark to
3122
                                    be immediately after the node in the list of
3123
                                    active formatting elements. */
3124
                                } elseif ($last_node === $furthest_block) {
3125
                                    $bookmark = array_search($node, $this->a_formatting, true) + 1;
3126
                                }
3127
3128
                                /* 7.5 If node has any children, perform a
3129
                                shallow clone of node, replace the entry for
3130
                                node in the list of active formatting elements
3131
                                with an entry for the clone, replace the entry
3132
                                for node in the stack of open elements with an
3133
                                entry for the clone, and let node be the clone. */
3134
                                if ($node->hasChildNodes()) {
3135
                                    $clone = $node->cloneNode();
3136
                                    $s_pos = array_search($node, $this->stack, true);
3137
                                    $a_pos = array_search($node, $this->a_formatting, true);
3138
3139
                                    $this->stack[$s_pos] = $clone;
3140
                                    $this->a_formatting[$a_pos] = $clone;
3141
                                    $node = $clone;
3142
                                }
3143
3144
                                /* 7.6 Insert last node into node, first removing
3145
                                it from its previous parent node if any. */
3146
                                if ($last_node->parentNode !== null) {
3147
                                    $last_node->parentNode->removeChild($last_node);
3148
                                }
3149
3150
                                $node->appendChild($last_node);
3151
3152
                                /* 7.7 Let last node be node. */
3153
                                $last_node = $node;
3154
                            }
3155
3156
                            /* 8. Insert whatever last node ended up being in
3157
                            the previous step into the common ancestor node,
3158
                            first removing it from its previous parent node if
3159
                            any. */
3160
                            if ($last_node->parentNode !== null) {
3161
                                $last_node->parentNode->removeChild($last_node);
3162
                            }
3163
3164
                            $common_ancestor->appendChild($last_node);
3165
3166
                            /* 9. Perform a shallow clone of the formatting
3167
                            element. */
3168
                            $clone = $formatting_element->cloneNode();
3169
3170
                            /* 10. Take all of the child nodes of the furthest
3171
                            block and append them to the clone created in the
3172
                            last step. */
3173
                            while ($furthest_block->hasChildNodes()) {
3174
                                $child = $furthest_block->firstChild;
3175
                                $furthest_block->removeChild($child);
3176
                                $clone->appendChild($child);
3177
                            }
3178
3179
                            /* 11. Append that clone to the furthest block. */
3180
                            $furthest_block->appendChild($clone);
3181
3182
                            /* 12. Remove the formatting element from the list
3183
                            of active formatting elements, and insert the clone
3184
                            into the list of active formatting elements at the
3185
                            position of the aforementioned bookmark. */
3186
                            $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
3187
                            unset($this->a_formatting[$fe_af_pos]);
3188
                            $this->a_formatting = array_merge($this->a_formatting);
3189
3190
                            $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
3191
                            $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));
3192
                            $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
3193
3194
                            /* 13. Remove the formatting element from the stack
3195
                            of open elements, and insert the clone into the stack
3196
                            of open elements immediately after (i.e. in a more
3197
                            deeply nested position than) the position of the
3198
                            furthest block in that stack. */
3199
                            $fe_s_pos = array_search($formatting_element, $this->stack, true);
3200
                            $fb_s_pos = array_search($furthest_block, $this->stack, true);
3201
                            unset($this->stack[$fe_s_pos]);
3202
3203
                            $s_part1 = array_slice($this->stack, 0, $fb_s_pos);
3204
                            $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));
3205
                            $this->stack = array_merge($s_part1, array($clone), $s_part2);
3206
3207
                            /* 14. Jump back to step 1 in this series of steps. */
3208
                            unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
3209
                        }
3210
                        break;
3211
3212
                    /* An end tag token whose tag name is one of: "button",
3213
                    "marquee", "object" */
3214
                    case 'button':
3215
                    case 'marquee':
3216
                    case 'object':
3217
                        /* If the stack of open elements has an element in scope whose
3218
                        tag name matches the tag name of the token, then generate implied
3219
                        tags. */
3220
                        if ($this->elementInScope($token['name'])) {
3221
                            $this->generateImpliedEndTags();
3222
3223
                            /* Now, if the current node is not an element with the same
3224
                            tag name as the token, then this is a parse error. */
3225
                            // k
3226
3227
                            /* Now, if the stack of open elements has an element in scope
3228
                            whose tag name matches the tag name of the token, then pop
3229
                            elements from the stack until that element has been popped from
3230
                            the stack, and clear the list of active formatting elements up
3231
                            to the last marker. */
3232
                            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3233
                                if ($this->stack[$n]->nodeName === $token['name']) {
3234
                                    $n = -1;
3235
                                }
3236
3237
                                array_pop($this->stack);
3238
                            }
3239
3240
                            $marker = end(array_keys($this->a_formatting, self::MARKER, true));
0 ignored issues
show
Bug introduced by
array_keys($this->a_form...ng, self::MARKER, true) cannot be passed to end() as the parameter $array expects a reference.
Loading history...
3241
3242
                            for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
3243
                                array_pop($this->a_formatting);
3244
                            }
3245
                        }
3246
                        break;
3247
3248
                    /* Or an end tag whose tag name is one of: "area", "basefont",
3249
                    "bgsound", "br", "embed", "hr", "iframe", "image", "img",
3250
                    "input", "isindex", "noembed", "noframes", "param", "select",
3251
                    "spacer", "table", "textarea", "wbr" */
3252
                    case 'area':
3253
                    case 'basefont':
3254
                    case 'bgsound':
3255
                    case 'br':
3256
                    case 'embed':
3257
                    case 'hr':
3258
                    case 'iframe':
3259
                    case 'image':
3260
                    case 'img':
3261
                    case 'input':
3262
                    case 'isindex':
3263
                    case 'noembed':
3264
                    case 'noframes':
3265
                    case 'param':
3266
                    case 'select':
3267
                    case 'spacer':
3268
                    case 'table':
3269
                    case 'textarea':
3270
                    case 'wbr':
3271
                        // Parse error. Ignore the token.
3272
                        break;
3273
3274
                    /* An end tag token not covered by the previous entries */
3275
                    default:
3276
                        for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3277
                            /* Initialise node to be the current node (the bottommost
3278
                            node of the stack). */
3279
                            $node = end($this->stack);
3280
3281
                            /* If node has the same tag name as the end tag token,
3282
                            then: */
3283
                            if ($token['name'] === $node->nodeName) {
3284
                                /* Generate implied end tags. */
3285
                                $this->generateImpliedEndTags();
3286
3287
                                /* If the tag name of the end tag token does not
3288
                                match the tag name of the current node, this is a
3289
                                parse error. */
3290
                                // k
3291
3292
                                /* Pop all the nodes from the current node up to
3293
                                node, including node, then stop this algorithm. */
3294
                                for ($x = count($this->stack) - $n; $x >= $n; $x--) {
3295
                                    array_pop($this->stack);
3296
                                }
3297
3298
                            } else {
3299
                                $category = $this->getElementCategory($node);
3300
3301
                                if ($category !== self::SPECIAL && $category !== self::SCOPING) {
3302
                                    /* Otherwise, if node is in neither the formatting
3303
                                    category nor the phrasing category, then this is a
3304
                                    parse error. Stop this algorithm. The end tag token
3305
                                    is ignored. */
3306
                                    return false;
3307
                                }
3308
                            }
3309
                        }
3310
                        break;
3311
                }
3312
                break;
3313
        }
3314
    }
3315
3316
    private function inTable($token)
3317
    {
3318
        $clear = array('html', 'table');
3319
3320
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3321
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3322
        or U+0020 SPACE */
3323
        if ($token['type'] === HTML5::CHARACTR &&
3324
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
3325
        ) {
3326
            /* Append the character to the current node. */
3327
            $text = $this->dom->createTextNode($token['data']);
3328
            end($this->stack)->appendChild($text);
3329
3330
            /* A comment token */
3331
        } elseif ($token['type'] === HTML5::COMMENT) {
3332
            /* Append a Comment node to the current node with the data
3333
            attribute set to the data given in the comment token. */
3334
            $comment = $this->dom->createComment($token['data']);
3335
            end($this->stack)->appendChild($comment);
3336
3337
            /* A start tag whose tag name is "caption" */
3338
        } elseif ($token['type'] === HTML5::STARTTAG &&
3339
            $token['name'] === 'caption'
3340
        ) {
3341
            /* Clear the stack back to a table context. */
3342
            $this->clearStackToTableContext($clear);
3343
3344
            /* Insert a marker at the end of the list of active
3345
            formatting elements. */
3346
            $this->a_formatting[] = self::MARKER;
3347
3348
            /* Insert an HTML element for the token, then switch the
3349
            insertion mode to "in caption". */
3350
            $this->insertElement($token);
3351
            $this->mode = self::IN_CAPTION;
3352
3353
            /* A start tag whose tag name is "colgroup" */
3354
        } elseif ($token['type'] === HTML5::STARTTAG &&
3355
            $token['name'] === 'colgroup'
3356
        ) {
3357
            /* Clear the stack back to a table context. */
3358
            $this->clearStackToTableContext($clear);
3359
3360
            /* Insert an HTML element for the token, then switch the
3361
            insertion mode to "in column group". */
3362
            $this->insertElement($token);
3363
            $this->mode = self::IN_CGROUP;
3364
3365
            /* A start tag whose tag name is "col" */
3366
        } elseif ($token['type'] === HTML5::STARTTAG &&
3367
            $token['name'] === 'col'
3368
        ) {
3369
            $this->inTable(
3370
                array(
3371
                    'name' => 'colgroup',
3372
                    'type' => HTML5::STARTTAG,
3373
                    'attr' => array()
3374
                )
3375
            );
3376
3377
            $this->inColumnGroup($token);
3378
3379
            /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
3380
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3381
                $token['name'],
3382
                array('tbody', 'tfoot', 'thead')
3383
            )
3384
        ) {
3385
            /* Clear the stack back to a table context. */
3386
            $this->clearStackToTableContext($clear);
3387
3388
            /* Insert an HTML element for the token, then switch the insertion
3389
            mode to "in table body". */
3390
            $this->insertElement($token);
3391
            $this->mode = self::IN_TBODY;
3392
3393
            /* A start tag whose tag name is one of: "td", "th", "tr" */
3394
        } elseif ($token['type'] === HTML5::STARTTAG &&
3395
            in_array($token['name'], array('td', 'th', 'tr'))
3396
        ) {
3397
            /* Act as if a start tag token with the tag name "tbody" had been
3398
            seen, then reprocess the current token. */
3399
            $this->inTable(
3400
                array(
3401
                    'name' => 'tbody',
3402
                    'type' => HTML5::STARTTAG,
3403
                    'attr' => array()
3404
                )
3405
            );
3406
3407
            return $this->inTableBody($token);
3408
3409
            /* A start tag whose tag name is "table" */
3410
        } elseif ($token['type'] === HTML5::STARTTAG &&
3411
            $token['name'] === 'table'
3412
        ) {
3413
            /* Parse error. Act as if an end tag token with the tag name "table"
3414
            had been seen, then, if that token wasn't ignored, reprocess the
3415
            current token. */
3416
            $this->inTable(
3417
                array(
3418
                    'name' => 'table',
3419
                    'type' => HTML5::ENDTAG
3420
                )
3421
            );
3422
3423
            return $this->mainPhase($token);
3424
3425
            /* An end tag whose tag name is "table" */
3426
        } elseif ($token['type'] === HTML5::ENDTAG &&
3427
            $token['name'] === 'table'
3428
        ) {
3429
            /* If the stack of open elements does not have an element in table
3430
            scope with the same tag name as the token, this is a parse error.
3431
            Ignore the token. (innerHTML case) */
3432
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
3433
                return false;
3434
3435
                /* Otherwise: */
3436
            } else {
3437
                /* Generate implied end tags. */
3438
                $this->generateImpliedEndTags();
3439
3440
                /* Now, if the current node is not a table element, then this
3441
                is a parse error. */
3442
                // w/e
3443
3444
                /* Pop elements from this stack until a table element has been
3445
                popped from the stack. */
3446
                while (true) {
3447
                    $current = end($this->stack)->nodeName;
3448
                    array_pop($this->stack);
3449
3450
                    if ($current === 'table') {
3451
                        break;
3452
                    }
3453
                }
3454
3455
                /* Reset the insertion mode appropriately. */
3456
                $this->resetInsertionMode();
3457
            }
3458
3459
            /* An end tag whose tag name is one of: "body", "caption", "col",
3460
            "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
3461
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3462
                $token['name'],
3463
                array(
3464
                    'body',
3465
                    'caption',
3466
                    'col',
3467
                    'colgroup',
3468
                    'html',
3469
                    'tbody',
3470
                    'td',
3471
                    'tfoot',
3472
                    'th',
3473
                    'thead',
3474
                    'tr'
3475
                )
3476
            )
3477
        ) {
3478
            // Parse error. Ignore the token.
3479
3480
            /* Anything else */
3481
        } else {
3482
            /* Parse error. Process the token as if the insertion mode was "in
3483
            body", with the following exception: */
3484
3485
            /* If the current node is a table, tbody, tfoot, thead, or tr
3486
            element, then, whenever a node would be inserted into the current
3487
            node, it must instead be inserted into the foster parent element. */
3488
            if (in_array(
3489
                end($this->stack)->nodeName,
3490
                array('table', 'tbody', 'tfoot', 'thead', 'tr')
3491
            )
3492
            ) {
3493
                /* The foster parent element is the parent element of the last
3494
                table element in the stack of open elements, if there is a
3495
                table element and it has such a parent element. If there is no
3496
                table element in the stack of open elements (innerHTML case),
3497
                then the foster parent element is the first element in the
3498
                stack of open elements (the html  element). Otherwise, if there
3499
                is a table element in the stack of open elements, but the last
3500
                table element in the stack of open elements has no parent, or
3501
                its parent node is not an element, then the foster parent
3502
                element is the element before the last table element in the
3503
                stack of open elements. */
3504
                for ($n = count($this->stack) - 1; $n >= 0; $n--) {
3505
                    if ($this->stack[$n]->nodeName === 'table') {
3506
                        $table = $this->stack[$n];
3507
                        break;
3508
                    }
3509
                }
3510
3511
                if (isset($table) && $table->parentNode !== null) {
3512
                    $this->foster_parent = $table->parentNode;
3513
3514
                } elseif (!isset($table)) {
3515
                    $this->foster_parent = $this->stack[0];
3516
3517
                } elseif (isset($table) && ($table->parentNode === null ||
3518
                        $table->parentNode->nodeType !== XML_ELEMENT_NODE)
3519
                ) {
3520
                    $this->foster_parent = $this->stack[$n - 1];
3521
                }
3522
            }
3523
3524
            $this->inBody($token);
3525
        }
3526
    }
3527
3528
    private function inCaption($token)
3529
    {
3530
        /* An end tag whose tag name is "caption" */
3531
        if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {
3532
            /* If the stack of open elements does not have an element in table
3533
            scope with the same tag name as the token, this is a parse error.
3534
            Ignore the token. (innerHTML case) */
3535
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3536
                // Ignore
3537
3538
                /* Otherwise: */
3539
            } else {
3540
                /* Generate implied end tags. */
3541
                $this->generateImpliedEndTags();
3542
3543
                /* Now, if the current node is not a caption element, then this
3544
                is a parse error. */
3545
                // w/e
3546
3547
                /* Pop elements from this stack until a caption element has
3548
                been popped from the stack. */
3549
                while (true) {
3550
                    $node = end($this->stack)->nodeName;
3551
                    array_pop($this->stack);
3552
3553
                    if ($node === 'caption') {
3554
                        break;
3555
                    }
3556
                }
3557
3558
                /* Clear the list of active formatting elements up to the last
3559
                marker. */
3560
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3561
3562
                /* Switch the insertion mode to "in table". */
3563
                $this->mode = self::IN_TABLE;
3564
            }
3565
3566
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3567
            "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
3568
            name is "table" */
3569
        } elseif (($token['type'] === HTML5::STARTTAG && in_array(
3570
                    $token['name'],
3571
                    array(
3572
                        'caption',
3573
                        'col',
3574
                        'colgroup',
3575
                        'tbody',
3576
                        'td',
3577
                        'tfoot',
3578
                        'th',
3579
                        'thead',
3580
                        'tr'
3581
                    )
3582
                )) || ($token['type'] === HTML5::ENDTAG &&
3583
                $token['name'] === 'table')
3584
        ) {
3585
            /* Parse error. Act as if an end tag with the tag name "caption"
3586
            had been seen, then, if that token wasn't ignored, reprocess the
3587
            current token. */
3588
            $this->inCaption(
3589
                array(
3590
                    'name' => 'caption',
3591
                    'type' => HTML5::ENDTAG
3592
                )
3593
            );
3594
3595
            return $this->inTable($token);
3596
3597
            /* An end tag whose tag name is one of: "body", "col", "colgroup",
3598
            "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
3599
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3600
                $token['name'],
3601
                array(
3602
                    'body',
3603
                    'col',
3604
                    'colgroup',
3605
                    'html',
3606
                    'tbody',
3607
                    'tfoot',
3608
                    'th',
3609
                    'thead',
3610
                    'tr'
3611
                )
3612
            )
3613
        ) {
3614
            // Parse error. Ignore the token.
3615
3616
            /* Anything else */
3617
        } else {
3618
            /* Process the token as if the insertion mode was "in body". */
3619
            $this->inBody($token);
3620
        }
3621
    }
3622
3623
    private function inColumnGroup($token)
3624
    {
3625
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3626
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3627
        or U+0020 SPACE */
3628
        if ($token['type'] === HTML5::CHARACTR &&
3629
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
3630
        ) {
3631
            /* Append the character to the current node. */
3632
            $text = $this->dom->createTextNode($token['data']);
3633
            end($this->stack)->appendChild($text);
3634
3635
            /* A comment token */
3636
        } elseif ($token['type'] === HTML5::COMMENT) {
3637
            /* Append a Comment node to the current node with the data
3638
            attribute set to the data given in the comment token. */
3639
            $comment = $this->dom->createComment($token['data']);
3640
            end($this->stack)->appendChild($comment);
3641
3642
            /* A start tag whose tag name is "col" */
3643
        } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {
3644
            /* Insert a col element for the token. Immediately pop the current
3645
            node off the stack of open elements. */
3646
            $this->insertElement($token);
3647
            array_pop($this->stack);
3648
3649
            /* An end tag whose tag name is "colgroup" */
3650
        } elseif ($token['type'] === HTML5::ENDTAG &&
3651
            $token['name'] === 'colgroup'
3652
        ) {
3653
            /* If the current node is the root html element, then this is a
3654
            parse error, ignore the token. (innerHTML case) */
3655
            if (end($this->stack)->nodeName === 'html') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3656
                // Ignore
3657
3658
                /* Otherwise, pop the current node (which will be a colgroup
3659
                element) from the stack of open elements. Switch the insertion
3660
                mode to "in table". */
3661
            } else {
3662
                array_pop($this->stack);
3663
                $this->mode = self::IN_TABLE;
3664
            }
3665
3666
            /* An end tag whose tag name is "col" */
3667
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3668
            /* Parse error. Ignore the token. */
3669
3670
            /* Anything else */
3671
        } else {
3672
            /* Act as if an end tag with the tag name "colgroup" had been seen,
3673
            and then, if that token wasn't ignored, reprocess the current token. */
3674
            $this->inColumnGroup(
3675
                array(
3676
                    'name' => 'colgroup',
3677
                    'type' => HTML5::ENDTAG
3678
                )
3679
            );
3680
3681
            return $this->inTable($token);
3682
        }
3683
    }
3684
3685
    private function inTableBody($token)
3686
    {
3687
        $clear = array('tbody', 'tfoot', 'thead', 'html');
3688
3689
        /* A start tag whose tag name is "tr" */
3690
        if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {
3691
            /* Clear the stack back to a table body context. */
3692
            $this->clearStackToTableContext($clear);
3693
3694
            /* Insert a tr element for the token, then switch the insertion
3695
            mode to "in row". */
3696
            $this->insertElement($token);
3697
            $this->mode = self::IN_ROW;
3698
3699
            /* A start tag whose tag name is one of: "th", "td" */
3700
        } elseif ($token['type'] === HTML5::STARTTAG &&
3701
            ($token['name'] === 'th' || $token['name'] === 'td')
3702
        ) {
3703
            /* Parse error. Act as if a start tag with the tag name "tr" had
3704
            been seen, then reprocess the current token. */
3705
            $this->inTableBody(
3706
                array(
3707
                    'name' => 'tr',
3708
                    'type' => HTML5::STARTTAG,
3709
                    'attr' => array()
3710
                )
3711
            );
3712
3713
            return $this->inRow($token);
3714
3715
            /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
3716
        } elseif ($token['type'] === HTML5::ENDTAG &&
3717
            in_array($token['name'], array('tbody', 'tfoot', 'thead'))
3718
        ) {
3719
            /* If the stack of open elements does not have an element in table
3720
            scope with the same tag name as the token, this is a parse error.
3721
            Ignore the token. */
3722
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3723
                // Ignore
3724
3725
                /* Otherwise: */
3726
            } else {
3727
                /* Clear the stack back to a table body context. */
3728
                $this->clearStackToTableContext($clear);
3729
3730
                /* Pop the current node from the stack of open elements. Switch
3731
                the insertion mode to "in table". */
3732
                array_pop($this->stack);
3733
                $this->mode = self::IN_TABLE;
3734
            }
3735
3736
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3737
            "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
3738
        } elseif (($token['type'] === HTML5::STARTTAG && in_array(
3739
                    $token['name'],
3740
                    array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead')
3741
                )) ||
3742
            ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')
3743
        ) {
3744
            /* If the stack of open elements does not have a tbody, thead, or
3745
            tfoot element in table scope, this is a parse error. Ignore the
3746
            token. (innerHTML case) */
3747
            if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(ar...thead', 'tfoot'), true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3748
                // Ignore.
3749
3750
                /* Otherwise: */
3751
            } else {
3752
                /* Clear the stack back to a table body context. */
3753
                $this->clearStackToTableContext($clear);
3754
3755
                /* Act as if an end tag with the same tag name as the current
3756
                node ("tbody", "tfoot", or "thead") had been seen, then
3757
                reprocess the current token. */
3758
                $this->inTableBody(
3759
                    array(
3760
                        'name' => end($this->stack)->nodeName,
3761
                        'type' => HTML5::ENDTAG
3762
                    )
3763
                );
3764
3765
                return $this->mainPhase($token);
3766
            }
3767
3768
            /* An end tag whose tag name is one of: "body", "caption", "col",
3769
            "colgroup", "html", "td", "th", "tr" */
3770
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3771
                $token['name'],
3772
                array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
3773
            )
3774
        ) {
3775
            /* Parse error. Ignore the token. */
3776
3777
            /* Anything else */
3778
        } else {
3779
            /* Process the token as if the insertion mode was "in table". */
3780
            $this->inTable($token);
3781
        }
3782
    }
3783
3784
    private function inRow($token)
3785
    {
3786
        $clear = array('tr', 'html');
3787
3788
        /* A start tag whose tag name is one of: "th", "td" */
3789
        if ($token['type'] === HTML5::STARTTAG &&
3790
            ($token['name'] === 'th' || $token['name'] === 'td')
3791
        ) {
3792
            /* Clear the stack back to a table row context. */
3793
            $this->clearStackToTableContext($clear);
3794
3795
            /* Insert an HTML element for the token, then switch the insertion
3796
            mode to "in cell". */
3797
            $this->insertElement($token);
3798
            $this->mode = self::IN_CELL;
3799
3800
            /* Insert a marker at the end of the list of active formatting
3801
            elements. */
3802
            $this->a_formatting[] = self::MARKER;
3803
3804
            /* An end tag whose tag name is "tr" */
3805
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {
3806
            /* If the stack of open elements does not have an element in table
3807
            scope with the same tag name as the token, this is a parse error.
3808
            Ignore the token. (innerHTML case) */
3809
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3810
                // Ignore.
3811
3812
                /* Otherwise: */
3813
            } else {
3814
                /* Clear the stack back to a table row context. */
3815
                $this->clearStackToTableContext($clear);
3816
3817
                /* Pop the current node (which will be a tr element) from the
3818
                stack of open elements. Switch the insertion mode to "in table
3819
                body". */
3820
                array_pop($this->stack);
3821
                $this->mode = self::IN_TBODY;
3822
            }
3823
3824
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3825
            "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
3826
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3827
                $token['name'],
3828
                array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr')
3829
            )
3830
        ) {
3831
            /* Act as if an end tag with the tag name "tr" had been seen, then,
3832
            if that token wasn't ignored, reprocess the current token. */
3833
            $this->inRow(
3834
                array(
3835
                    'name' => 'tr',
3836
                    'type' => HTML5::ENDTAG
3837
                )
3838
            );
3839
3840
            return $this->inCell($token);
3841
3842
            /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
3843
        } elseif ($token['type'] === HTML5::ENDTAG &&
3844
            in_array($token['name'], array('tbody', 'tfoot', 'thead'))
3845
        ) {
3846
            /* If the stack of open elements does not have an element in table
3847
            scope with the same tag name as the token, this is a parse error.
3848
            Ignore the token. */
3849
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3850
                // Ignore.
3851
3852
                /* Otherwise: */
3853
            } else {
3854
                /* Otherwise, act as if an end tag with the tag name "tr" had
3855
                been seen, then reprocess the current token. */
3856
                $this->inRow(
3857
                    array(
3858
                        'name' => 'tr',
3859
                        'type' => HTML5::ENDTAG
3860
                    )
3861
                );
3862
3863
                return $this->inCell($token);
3864
            }
3865
3866
            /* An end tag whose tag name is one of: "body", "caption", "col",
3867
            "colgroup", "html", "td", "th" */
3868
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3869
                $token['name'],
3870
                array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')
3871
            )
3872
        ) {
3873
            /* Parse error. Ignore the token. */
3874
3875
            /* Anything else */
3876
        } else {
3877
            /* Process the token as if the insertion mode was "in table". */
3878
            $this->inTable($token);
3879
        }
3880
    }
3881
3882
    private function inCell($token)
3883
    {
3884
        /* An end tag whose tag name is one of: "td", "th" */
3885
        if ($token['type'] === HTML5::ENDTAG &&
3886
            ($token['name'] === 'td' || $token['name'] === 'th')
3887
        ) {
3888
            /* If the stack of open elements does not have an element in table
3889
            scope with the same tag name as that of the token, then this is a
3890
            parse error and the token must be ignored. */
3891
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3892
                // Ignore.
3893
3894
                /* Otherwise: */
3895
            } else {
3896
                /* Generate implied end tags, except for elements with the same
3897
                tag name as the token. */
3898
                $this->generateImpliedEndTags(array($token['name']));
3899
3900
                /* Now, if the current node is not an element with the same tag
3901
                name as the token, then this is a parse error. */
3902
                // k
3903
3904
                /* Pop elements from this stack until an element with the same
3905
                tag name as the token has been popped from the stack. */
3906
                while (true) {
3907
                    $node = end($this->stack)->nodeName;
3908
                    array_pop($this->stack);
3909
3910
                    if ($node === $token['name']) {
3911
                        break;
3912
                    }
3913
                }
3914
3915
                /* Clear the list of active formatting elements up to the last
3916
                marker. */
3917
                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
3918
3919
                /* Switch the insertion mode to "in row". (The current node
3920
                will be a tr element at this point.) */
3921
                $this->mode = self::IN_ROW;
3922
            }
3923
3924
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3925
            "tbody", "td", "tfoot", "th", "thead", "tr" */
3926
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3927
                $token['name'],
3928
                array(
3929
                    'caption',
3930
                    'col',
3931
                    'colgroup',
3932
                    'tbody',
3933
                    'td',
3934
                    'tfoot',
3935
                    'th',
3936
                    'thead',
3937
                    'tr'
3938
                )
3939
            )
3940
        ) {
3941
            /* If the stack of open elements does not have a td or th element
3942
            in table scope, then this is a parse error; ignore the token.
3943
            (innerHTML case) */
3944
            if (!$this->elementInScope(array('td', 'th'), true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(array('td', 'th'), true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3945
                // Ignore.
3946
3947
                /* Otherwise, close the cell (see below) and reprocess the current
3948
                token. */
3949
            } else {
3950
                $this->closeCell();
3951
                return $this->inRow($token);
3952
            }
3953
3954
            /* A start tag whose tag name is one of: "caption", "col", "colgroup",
3955
            "tbody", "td", "tfoot", "th", "thead", "tr" */
3956
        } elseif ($token['type'] === HTML5::STARTTAG && in_array(
3957
                $token['name'],
3958
                array(
3959
                    'caption',
3960
                    'col',
3961
                    'colgroup',
3962
                    'tbody',
3963
                    'td',
3964
                    'tfoot',
3965
                    'th',
3966
                    'thead',
3967
                    'tr'
3968
                )
3969
            )
3970
        ) {
3971
            /* If the stack of open elements does not have a td or th element
3972
            in table scope, then this is a parse error; ignore the token.
3973
            (innerHTML case) */
3974
            if (!$this->elementInScope(array('td', 'th'), true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope(array('td', 'th'), true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
3975
                // Ignore.
3976
3977
                /* Otherwise, close the cell (see below) and reprocess the current
3978
                token. */
3979
            } else {
3980
                $this->closeCell();
3981
                return $this->inRow($token);
3982
            }
3983
3984
            /* An end tag whose tag name is one of: "body", "caption", "col",
3985
            "colgroup", "html" */
3986
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
3987
                $token['name'],
3988
                array('body', 'caption', 'col', 'colgroup', 'html')
3989
            )
3990
        ) {
3991
            /* Parse error. Ignore the token. */
3992
3993
            /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
3994
            "thead", "tr" */
3995
        } elseif ($token['type'] === HTML5::ENDTAG && in_array(
3996
                $token['name'],
3997
                array('table', 'tbody', 'tfoot', 'thead', 'tr')
3998
            )
3999
        ) {
4000
            /* If the stack of open elements does not have an element in table
4001
            scope with the same tag name as that of the token (which can only
4002
            happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),
4003
            then this is a parse error and the token must be ignored. */
4004
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4005
                // Ignore.
4006
4007
                /* Otherwise, close the cell (see below) and reprocess the current
4008
                token. */
4009
            } else {
4010
                $this->closeCell();
4011
                return $this->inRow($token);
4012
            }
4013
4014
            /* Anything else */
4015
        } else {
4016
            /* Process the token as if the insertion mode was "in body". */
4017
            $this->inBody($token);
4018
        }
4019
    }
4020
4021
    private function inSelect($token)
4022
    {
4023
        /* Handle the token as follows: */
4024
4025
        /* A character token */
4026
        if ($token['type'] === HTML5::CHARACTR) {
4027
            /* Append the token's character to the current node. */
4028
            $this->insertText($token['data']);
4029
4030
            /* A comment token */
4031
        } elseif ($token['type'] === HTML5::COMMENT) {
4032
            /* Append a Comment node to the current node with the data
4033
            attribute set to the data given in the comment token. */
4034
            $this->insertComment($token['data']);
4035
4036
            /* A start tag token whose tag name is "option" */
4037
        } elseif ($token['type'] === HTML5::STARTTAG &&
4038
            $token['name'] === 'option'
4039
        ) {
4040
            /* If the current node is an option element, act as if an end tag
4041
            with the tag name "option" had been seen. */
4042
            if (end($this->stack)->nodeName === 'option') {
4043
                $this->inSelect(
4044
                    array(
4045
                        'name' => 'option',
4046
                        'type' => HTML5::ENDTAG
4047
                    )
4048
                );
4049
            }
4050
4051
            /* Insert an HTML element for the token. */
4052
            $this->insertElement($token);
4053
4054
            /* A start tag token whose tag name is "optgroup" */
4055
        } elseif ($token['type'] === HTML5::STARTTAG &&
4056
            $token['name'] === 'optgroup'
4057
        ) {
4058
            /* If the current node is an option element, act as if an end tag
4059
            with the tag name "option" had been seen. */
4060
            if (end($this->stack)->nodeName === 'option') {
4061
                $this->inSelect(
4062
                    array(
4063
                        'name' => 'option',
4064
                        'type' => HTML5::ENDTAG
4065
                    )
4066
                );
4067
            }
4068
4069
            /* If the current node is an optgroup element, act as if an end tag
4070
            with the tag name "optgroup" had been seen. */
4071
            if (end($this->stack)->nodeName === 'optgroup') {
4072
                $this->inSelect(
4073
                    array(
4074
                        'name' => 'optgroup',
4075
                        'type' => HTML5::ENDTAG
4076
                    )
4077
                );
4078
            }
4079
4080
            /* Insert an HTML element for the token. */
4081
            $this->insertElement($token);
4082
4083
            /* An end tag token whose tag name is "optgroup" */
4084
        } elseif ($token['type'] === HTML5::ENDTAG &&
4085
            $token['name'] === 'optgroup'
4086
        ) {
4087
            /* First, if the current node is an option element, and the node
4088
            immediately before it in the stack of open elements is an optgroup
4089
            element, then act as if an end tag with the tag name "option" had
4090
            been seen. */
4091
            $elements_in_stack = count($this->stack);
4092
4093
            if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&
4094
                $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup'
4095
            ) {
4096
                $this->inSelect(
4097
                    array(
4098
                        'name' => 'option',
4099
                        'type' => HTML5::ENDTAG
4100
                    )
4101
                );
4102
            }
4103
4104
            /* If the current node is an optgroup element, then pop that node
4105
            from the stack of open elements. Otherwise, this is a parse error,
4106
            ignore the token. */
4107
            if ($this->stack[$elements_in_stack - 1] === 'optgroup') {
4108
                array_pop($this->stack);
4109
            }
4110
4111
            /* An end tag token whose tag name is "option" */
4112
        } elseif ($token['type'] === HTML5::ENDTAG &&
4113
            $token['name'] === 'option'
4114
        ) {
4115
            /* If the current node is an option element, then pop that node
4116
            from the stack of open elements. Otherwise, this is a parse error,
4117
            ignore the token. */
4118
            if (end($this->stack)->nodeName === 'option') {
4119
                array_pop($this->stack);
4120
            }
4121
4122
            /* An end tag whose tag name is "select" */
4123
        } elseif ($token['type'] === HTML5::ENDTAG &&
4124
            $token['name'] === 'select'
4125
        ) {
4126
            /* If the stack of open elements does not have an element in table
4127
            scope with the same tag name as the token, this is a parse error.
4128
            Ignore the token. (innerHTML case) */
4129
            if (!$this->elementInScope($token['name'], true)) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->elementInScope($token['name'], true) of type boolean|null is loosely compared to false; this is ambiguous if the boolean can be false. You might want to explicitly use !== null instead.

If an expression can have both false, and null as possible values. It is generally a good practice to always use strict comparison to clearly distinguish between those two values.

$a = canBeFalseAndNull();

// Instead of
if ( ! $a) { }

// Better use one of the explicit versions:
if ($a !== null) { }
if ($a !== false) { }
if ($a !== null && $a !== false) { }
Loading history...
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4130
                // w/e
4131
4132
                /* Otherwise: */
4133
            } else {
4134
                /* Pop elements from the stack of open elements until a select
4135
                element has been popped from the stack. */
4136
                while (true) {
4137
                    $current = end($this->stack)->nodeName;
4138
                    array_pop($this->stack);
4139
4140
                    if ($current === 'select') {
4141
                        break;
4142
                    }
4143
                }
4144
4145
                /* Reset the insertion mode appropriately. */
4146
                $this->resetInsertionMode();
4147
            }
4148
4149
            /* A start tag whose tag name is "select" */
4150
        } elseif ($token['name'] === 'select' &&
4151
            $token['type'] === HTML5::STARTTAG
4152
        ) {
4153
            /* Parse error. Act as if the token had been an end tag with the
4154
            tag name "select" instead. */
4155
            $this->inSelect(
4156
                array(
4157
                    'name' => 'select',
4158
                    'type' => HTML5::ENDTAG
4159
                )
4160
            );
4161
4162
            /* An end tag whose tag name is one of: "caption", "table", "tbody",
4163
            "tfoot", "thead", "tr", "td", "th" */
4164
        } elseif (in_array(
4165
                $token['name'],
4166
                array(
4167
                    'caption',
4168
                    'table',
4169
                    'tbody',
4170
                    'tfoot',
4171
                    'thead',
4172
                    'tr',
4173
                    'td',
4174
                    'th'
4175
                )
4176
            ) && $token['type'] === HTML5::ENDTAG
4177
        ) {
4178
            /* Parse error. */
4179
            // w/e
4180
4181
            /* If the stack of open elements has an element in table scope with
4182
            the same tag name as that of the token, then act as if an end tag
4183
            with the tag name "select" had been seen, and reprocess the token.
4184
            Otherwise, ignore the token. */
4185
            if ($this->elementInScope($token['name'], true)) {
4186
                $this->inSelect(
4187
                    array(
4188
                        'name' => 'select',
4189
                        'type' => HTML5::ENDTAG
4190
                    )
4191
                );
4192
4193
                $this->mainPhase($token);
4194
            }
4195
4196
            /* Anything else */
4197
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
4198
            /* Parse error. Ignore the token. */
4199
        }
4200
    }
4201
4202
    private function afterBody($token)
4203
    {
4204
        /* Handle the token as follows: */
4205
4206
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4207
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4208
        or U+0020 SPACE */
4209
        if ($token['type'] === HTML5::CHARACTR &&
4210
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4211
        ) {
4212
            /* Process the token as it would be processed if the insertion mode
4213
            was "in body". */
4214
            $this->inBody($token);
4215
4216
            /* A comment token */
4217
        } elseif ($token['type'] === HTML5::COMMENT) {
4218
            /* Append a Comment node to the first element in the stack of open
4219
            elements (the html element), with the data attribute set to the
4220
            data given in the comment token. */
4221
            $comment = $this->dom->createComment($token['data']);
4222
            $this->stack[0]->appendChild($comment);
4223
4224
            /* An end tag with the tag name "html" */
4225
        } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {
4226
            /* If the parser was originally created in order to handle the
4227
            setting of an element's innerHTML attribute, this is a parse error;
4228
            ignore the token. (The element will be an html element in this
4229
            case.) (innerHTML case) */
4230
4231
            /* Otherwise, switch to the trailing end phase. */
4232
            $this->phase = self::END_PHASE;
4233
4234
            /* Anything else */
4235
        } else {
4236
            /* Parse error. Set the insertion mode to "in body" and reprocess
4237
            the token. */
4238
            $this->mode = self::IN_BODY;
4239
            return $this->inBody($token);
4240
        }
4241
    }
4242
4243
    private function inFrameset($token)
4244
    {
4245
        /* Handle the token as follows: */
4246
4247
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4248
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4249
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
4250
        if ($token['type'] === HTML5::CHARACTR &&
4251
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4252
        ) {
4253
            /* Append the character to the current node. */
4254
            $this->insertText($token['data']);
4255
4256
            /* A comment token */
4257
        } elseif ($token['type'] === HTML5::COMMENT) {
4258
            /* Append a Comment node to the current node with the data
4259
            attribute set to the data given in the comment token. */
4260
            $this->insertComment($token['data']);
4261
4262
            /* A start tag with the tag name "frameset" */
4263
        } elseif ($token['name'] === 'frameset' &&
4264
            $token['type'] === HTML5::STARTTAG
4265
        ) {
4266
            $this->insertElement($token);
4267
4268
            /* An end tag with the tag name "frameset" */
4269
        } elseif ($token['name'] === 'frameset' &&
4270
            $token['type'] === HTML5::ENDTAG
4271
        ) {
4272
            /* If the current node is the root html element, then this is a
4273
            parse error; ignore the token. (innerHTML case) */
4274
            if (end($this->stack)->nodeName === 'html') {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4275
                // Ignore
4276
4277
            } else {
4278
                /* Otherwise, pop the current node from the stack of open
4279
                elements. */
4280
                array_pop($this->stack);
4281
4282
                /* If the parser was not originally created in order to handle
4283
                the setting of an element's innerHTML attribute (innerHTML case),
4284
                and the current node is no longer a frameset element, then change
4285
                the insertion mode to "after frameset". */
4286
                $this->mode = self::AFTR_FRAME;
4287
            }
4288
4289
            /* A start tag with the tag name "frame" */
4290
        } elseif ($token['name'] === 'frame' &&
4291
            $token['type'] === HTML5::STARTTAG
4292
        ) {
4293
            /* Insert an HTML element for the token. */
4294
            $this->insertElement($token);
4295
4296
            /* Immediately pop the current node off the stack of open elements. */
4297
            array_pop($this->stack);
4298
4299
            /* A start tag with the tag name "noframes" */
4300
        } elseif ($token['name'] === 'noframes' &&
4301
            $token['type'] === HTML5::STARTTAG
4302
        ) {
4303
            /* Process the token as if the insertion mode had been "in body". */
4304
            $this->inBody($token);
4305
4306
            /* Anything else */
4307
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
4308
            /* Parse error. Ignore the token. */
4309
        }
4310
    }
4311
4312
    private function afterFrameset($token)
4313
    {
4314
        /* Handle the token as follows: */
4315
4316
        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4317
        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4318
        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
4319
        if ($token['type'] === HTML5::CHARACTR &&
4320
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4321
        ) {
4322
            /* Append the character to the current node. */
4323
            $this->insertText($token['data']);
4324
4325
            /* A comment token */
4326
        } elseif ($token['type'] === HTML5::COMMENT) {
4327
            /* Append a Comment node to the current node with the data
4328
            attribute set to the data given in the comment token. */
4329
            $this->insertComment($token['data']);
4330
4331
            /* An end tag with the tag name "html" */
4332
        } elseif ($token['name'] === 'html' &&
4333
            $token['type'] === HTML5::ENDTAG
4334
        ) {
4335
            /* Switch to the trailing end phase. */
4336
            $this->phase = self::END_PHASE;
4337
4338
            /* A start tag with the tag name "noframes" */
4339
        } elseif ($token['name'] === 'noframes' &&
4340
            $token['type'] === HTML5::STARTTAG
4341
        ) {
4342
            /* Process the token as if the insertion mode had been "in body". */
4343
            $this->inBody($token);
4344
4345
            /* Anything else */
4346
        } else {
0 ignored issues
show
Unused Code introduced by
This else statement is empty and can be removed.

This check looks for the else branches of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These else branches can be removed.

if (rand(1, 6) > 3) {
print "Check failed";
} else {
    //print "Check succeeded";
}

could be turned into

if (rand(1, 6) > 3) {
    print "Check failed";
}

This is much more concise to read.

Loading history...
4347
            /* Parse error. Ignore the token. */
4348
        }
4349
    }
4350
4351
    private function trailingEndPhase($token)
4352
    {
4353
        /* After the main phase, as each token is emitted from the tokenisation
4354
        stage, it must be processed as described in this section. */
4355
4356
        /* A DOCTYPE token */
4357
        if ($token['type'] === HTML5::DOCTYPE) {
0 ignored issues
show
Unused Code introduced by
This if statement is empty and can be removed.

This check looks for the bodies of if statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These if bodies can be removed. If you have an empty if but statements in the else branch, consider inverting the condition.

if (rand(1, 6) > 3) {
//print "Check failed";
} else {
    print "Check succeeded";
}

could be turned into

if (rand(1, 6) <= 3) {
    print "Check succeeded";
}

This is much more concise to read.

Loading history...
4358
            // Parse error. Ignore the token.
4359
4360
            /* A comment token */
4361
        } elseif ($token['type'] === HTML5::COMMENT) {
4362
            /* Append a Comment node to the Document object with the data
4363
            attribute set to the data given in the comment token. */
4364
            $comment = $this->dom->createComment($token['data']);
4365
            $this->dom->appendChild($comment);
4366
4367
            /* A character token that is one of one of U+0009 CHARACTER TABULATION,
4368
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4369
            or U+0020 SPACE */
4370
        } elseif ($token['type'] === HTML5::CHARACTR &&
4371
            preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])
4372
        ) {
4373
            /* Process the token as it would be processed in the main phase. */
4374
            $this->mainPhase($token);
4375
4376
            /* A character token that is not one of U+0009 CHARACTER TABULATION,
4377
            U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
4378
            or U+0020 SPACE. Or a start tag token. Or an end tag token. */
4379
        } elseif (($token['type'] === HTML5::CHARACTR &&
4380
                preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||
4381
            $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG
4382
        ) {
4383
            /* Parse error. Switch back to the main phase and reprocess the
4384
            token. */
4385
            $this->phase = self::MAIN_PHASE;
4386
            return $this->mainPhase($token);
4387
4388
            /* An end-of-file token */
4389
        } elseif ($token['type'] === HTML5::EOF) {
0 ignored issues
show
Unused Code introduced by
This elseif statement is empty, and could be removed.

This check looks for the bodies of elseif statements that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.

These elseif bodies can be removed. If you have an empty elseif but statements in the else branch, consider inverting the condition.

Loading history...
4390
            /* OMG DONE!! */
4391
        }
4392
    }
4393
4394
    private function insertElement($token, $append = true, $check = false)
4395
    {
4396
        // Proprietary workaround for libxml2's limitations with tag names
4397
        if ($check) {
4398
            // Slightly modified HTML5 tag-name modification,
4399
            // removing anything that's not an ASCII letter, digit, or hyphen
4400
            $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
4401
            // Remove leading hyphens and numbers
4402
            $token['name'] = ltrim($token['name'], '-0..9');
4403
            // In theory, this should ever be needed, but just in case
4404
            if ($token['name'] === '') {
4405
                $token['name'] = 'span';
4406
            } // arbitrary generic choice
4407
        }
4408
4409
        $el = $this->dom->createElement($token['name']);
4410
4411
        foreach ($token['attr'] as $attr) {
4412
            if (!$el->hasAttribute($attr['name'])) {
4413
                $el->setAttribute($attr['name'], $attr['value']);
4414
            }
4415
        }
4416
4417
        $this->appendToRealParent($el);
4418
        $this->stack[] = $el;
4419
4420
        return $el;
4421
    }
4422
4423
    private function insertText($data)
4424
    {
4425
        $text = $this->dom->createTextNode($data);
4426
        $this->appendToRealParent($text);
4427
    }
4428
4429
    private function insertComment($data)
4430
    {
4431
        $comment = $this->dom->createComment($data);
4432
        $this->appendToRealParent($comment);
4433
    }
4434
4435
    private function appendToRealParent($node)
4436
    {
4437
        if ($this->foster_parent === null) {
4438
            end($this->stack)->appendChild($node);
4439
4440
        } elseif ($this->foster_parent !== null) {
4441
            /* If the foster parent element is the parent element of the
4442
            last table element in the stack of open elements, then the new
4443
            node must be inserted immediately before the last table element
4444
            in the stack of open elements in the foster parent element;
4445
            otherwise, the new node must be appended to the foster parent
4446
            element. */
4447
            for ($n = count($this->stack) - 1; $n >= 0; $n--) {
4448
                if ($this->stack[$n]->nodeName === 'table' &&
4449
                    $this->stack[$n]->parentNode !== null
4450
                ) {
4451
                    $table = $this->stack[$n];
4452
                    break;
4453
                }
4454
            }
4455
4456
            if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {
4457
                $this->foster_parent->insertBefore($node, $table);
4458
            } else {
4459
                $this->foster_parent->appendChild($node);
4460
            }
4461
4462
            $this->foster_parent = null;
4463
        }
4464
    }
4465
4466
    private function elementInScope($el, $table = false)
4467
    {
4468
        if (is_array($el)) {
4469
            foreach ($el as $element) {
4470
                if ($this->elementInScope($element, $table)) {
4471
                    return true;
4472
                }
4473
            }
4474
4475
            return false;
4476
        }
4477
4478
        $leng = count($this->stack);
4479
4480
        for ($n = 0; $n < $leng; $n++) {
4481
            /* 1. Initialise node to be the current node (the bottommost node of
4482
            the stack). */
4483
            $node = $this->stack[$leng - 1 - $n];
4484
4485
            if ($node->tagName === $el) {
4486
                /* 2. If node is the target node, terminate in a match state. */
4487
                return true;
4488
4489
            } elseif ($node->tagName === 'table') {
4490
                /* 3. Otherwise, if node is a table element, terminate in a failure
4491
                state. */
4492
                return false;
4493
4494
            } elseif ($table === true && in_array(
4495
                    $node->tagName,
4496
                    array(
4497
                        'caption',
4498
                        'td',
4499
                        'th',
4500
                        'button',
4501
                        'marquee',
4502
                        'object'
4503
                    )
4504
                )
4505
            ) {
4506
                /* 4. Otherwise, if the algorithm is the "has an element in scope"
4507
                variant (rather than the "has an element in table scope" variant),
4508
                and node is one of the following, terminate in a failure state. */
4509
                return false;
4510
4511
            } elseif ($node === $node->ownerDocument->documentElement) {
4512
                /* 5. Otherwise, if node is an html element (root element), terminate
4513
                in a failure state. (This can only happen if the node is the topmost
4514
                node of the    stack of open elements, and prevents the next step from
4515
                being invoked if there are no more elements in the stack.) */
4516
                return false;
4517
            }
4518
4519
            /* Otherwise, set node to the previous entry in the stack of open
4520
            elements and return to step 2. (This will never fail, since the loop
4521
            will always terminate in the previous step if the top of the stack
4522
            is reached.) */
4523
        }
4524
    }
4525
4526
    private function reconstructActiveFormattingElements()
4527
    {
4528
        /* 1. If there are no entries in the list of active formatting elements,
4529
        then there is nothing to reconstruct; stop this algorithm. */
4530
        $formatting_elements = count($this->a_formatting);
4531
4532
        if ($formatting_elements === 0) {
4533
            return false;
4534
        }
4535
4536
        /* 3. Let entry be the last (most recently added) element in the list
4537
        of active formatting elements. */
4538
        $entry = end($this->a_formatting);
4539
4540
        /* 2. If the last (most recently added) entry in the list of active
4541
        formatting elements is a marker, or if it is an element that is in the
4542
        stack of open elements, then there is nothing to reconstruct; stop this
4543
        algorithm. */
4544
        if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
4545
            return false;
4546
        }
4547
4548
        for ($a = $formatting_elements - 1; $a >= 0; true) {
4549
            /* 4. If there are no entries before entry in the list of active
4550
            formatting elements, then jump to step 8. */
4551
            if ($a === 0) {
4552
                $step_seven = false;
4553
                break;
4554
            }
4555
4556
            /* 5. Let entry be the entry one earlier than entry in the list of
4557
            active formatting elements. */
4558
            $a--;
4559
            $entry = $this->a_formatting[$a];
4560
4561
            /* 6. If entry is neither a marker nor an element that is also in
4562
            thetack of open elements, go to step 4. */
4563
            if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {
4564
                break;
4565
            }
4566
        }
4567
4568
        while (true) {
4569
            /* 7. Let entry be the element one later than entry in the list of
4570
            active formatting elements. */
4571
            if (isset($step_seven) && $step_seven === true) {
4572
                $a++;
4573
                $entry = $this->a_formatting[$a];
4574
            }
4575
4576
            /* 8. Perform a shallow clone of the element entry to obtain clone. */
4577
            $clone = $entry->cloneNode();
4578
4579
            /* 9. Append clone to the current node and push it onto the stack
4580
            of open elements  so that it is the new current node. */
4581
            end($this->stack)->appendChild($clone);
4582
            $this->stack[] = $clone;
4583
4584
            /* 10. Replace the entry for entry in the list with an entry for
4585
            clone. */
4586
            $this->a_formatting[$a] = $clone;
4587
4588
            /* 11. If the entry for clone in the list of active formatting
4589
            elements is not the last entry in the list, return to step 7. */
4590
            if (end($this->a_formatting) !== $clone) {
4591
                $step_seven = true;
4592
            } else {
4593
                break;
4594
            }
4595
        }
4596
    }
4597
4598
    private function clearTheActiveFormattingElementsUpToTheLastMarker()
4599
    {
4600
        /* When the steps below require the UA to clear the list of active
4601
        formatting elements up to the last marker, the UA must perform the
4602
        following steps: */
4603
4604
        while (true) {
4605
            /* 1. Let entry be the last (most recently added) entry in the list
4606
            of active formatting elements. */
4607
            $entry = end($this->a_formatting);
4608
4609
            /* 2. Remove entry from the list of active formatting elements. */
4610
            array_pop($this->a_formatting);
4611
4612
            /* 3. If entry was a marker, then stop the algorithm at this point.
4613
            The list has been cleared up to the last marker. */
4614
            if ($entry === self::MARKER) {
4615
                break;
4616
            }
4617
        }
4618
    }
4619
4620
    private function generateImpliedEndTags($exclude = array())
4621
    {
4622
        /* When the steps below require the UA to generate implied end tags,
4623
        then, if the current node is a dd element, a dt element, an li element,
4624
        a p element, a td element, a th  element, or a tr element, the UA must
4625
        act as if an end tag with the respective tag name had been seen and
4626
        then generate implied end tags again. */
4627
        $node = end($this->stack);
4628
        $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
4629
4630
        while (in_array(end($this->stack)->nodeName, $elements)) {
4631
            array_pop($this->stack);
4632
        }
4633
    }
4634
4635
    private function getElementCategory($node)
4636
    {
4637
        $name = $node->tagName;
4638
        if (in_array($name, $this->special)) {
4639
            return self::SPECIAL;
4640
        } elseif (in_array($name, $this->scoping)) {
4641
            return self::SCOPING;
4642
        } elseif (in_array($name, $this->formatting)) {
4643
            return self::FORMATTING;
4644
        } else {
4645
            return self::PHRASING;
4646
        }
4647
    }
4648
4649
    private function clearStackToTableContext($elements)
4650
    {
4651
        /* When the steps above require the UA to clear the stack back to a
4652
        table context, it means that the UA must, while the current node is not
4653
        a table element or an html element, pop elements from the stack of open
4654
        elements. If this causes any elements to be popped from the stack, then
4655
        this is a parse error. */
4656
        while (true) {
4657
            $node = end($this->stack)->nodeName;
4658
4659
            if (in_array($node, $elements)) {
4660
                break;
4661
            } else {
4662
                array_pop($this->stack);
4663
            }
4664
        }
4665
    }
4666
4667
    private function resetInsertionMode()
4668
    {
4669
        /* 1. Let last be false. */
4670
        $last = false;
4671
        $leng = count($this->stack);
4672
4673
        for ($n = $leng - 1; $n >= 0; $n--) {
4674
            /* 2. Let node be the last node in the stack of open elements. */
4675
            $node = $this->stack[$n];
4676
4677
            /* 3. If node is the first node in the stack of open elements, then
4678
            set last to true. If the element whose innerHTML  attribute is being
4679
            set is neither a td  element nor a th element, then set node to the
4680
            element whose innerHTML  attribute is being set. (innerHTML  case) */
4681
            if ($this->stack[0]->isSameNode($node)) {
4682
                $last = true;
4683
            }
4684
4685
            /* 4. If node is a select element, then switch the insertion mode to
4686
            "in select" and abort these steps. (innerHTML case) */
4687
            if ($node->nodeName === 'select') {
4688
                $this->mode = self::IN_SELECT;
4689
                break;
4690
4691
                /* 5. If node is a td or th element, then switch the insertion mode
4692
                to "in cell" and abort these steps. */
4693
            } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') {
4694
                $this->mode = self::IN_CELL;
4695
                break;
4696
4697
                /* 6. If node is a tr element, then switch the insertion mode to
4698
                "in    row" and abort these steps. */
4699
            } elseif ($node->nodeName === 'tr') {
4700
                $this->mode = self::IN_ROW;
4701
                break;
4702
4703
                /* 7. If node is a tbody, thead, or tfoot element, then switch the
4704
                insertion mode to "in table body" and abort these steps. */
4705
            } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {
4706
                $this->mode = self::IN_TBODY;
4707
                break;
4708
4709
                /* 8. If node is a caption element, then switch the insertion mode
4710
                to "in caption" and abort these steps. */
4711
            } elseif ($node->nodeName === 'caption') {
4712
                $this->mode = self::IN_CAPTION;
4713
                break;
4714
4715
                /* 9. If node is a colgroup element, then switch the insertion mode
4716
                to "in column group" and abort these steps. (innerHTML case) */
4717
            } elseif ($node->nodeName === 'colgroup') {
4718
                $this->mode = self::IN_CGROUP;
4719
                break;
4720
4721
                /* 10. If node is a table element, then switch the insertion mode
4722
                to "in table" and abort these steps. */
4723
            } elseif ($node->nodeName === 'table') {
4724
                $this->mode = self::IN_TABLE;
4725
                break;
4726
4727
                /* 11. If node is a head element, then switch the insertion mode
4728
                to "in body" ("in body"! not "in head"!) and abort these steps.
4729
                (innerHTML case) */
4730
            } elseif ($node->nodeName === 'head') {
4731
                $this->mode = self::IN_BODY;
4732
                break;
4733
4734
                /* 12. If node is a body element, then switch the insertion mode to
4735
                "in body" and abort these steps. */
4736
            } elseif ($node->nodeName === 'body') {
4737
                $this->mode = self::IN_BODY;
4738
                break;
4739
4740
                /* 13. If node is a frameset element, then switch the insertion
4741
                mode to "in frameset" and abort these steps. (innerHTML case) */
4742
            } elseif ($node->nodeName === 'frameset') {
4743
                $this->mode = self::IN_FRAME;
4744
                break;
4745
4746
                /* 14. If node is an html element, then: if the head element
4747
                pointer is null, switch the insertion mode to "before head",
4748
                otherwise, switch the insertion mode to "after head". In either
4749
                case, abort these steps. (innerHTML case) */
4750
            } elseif ($node->nodeName === 'html') {
4751
                $this->mode = ($this->head_pointer === null)
4752
                    ? self::BEFOR_HEAD
4753
                    : self::AFTER_HEAD;
4754
4755
                break;
4756
4757
                /* 15. If last is true, then set the insertion mode to "in body"
4758
                and    abort these steps. (innerHTML case) */
4759
            } elseif ($last) {
4760
                $this->mode = self::IN_BODY;
4761
                break;
4762
            }
4763
        }
4764
    }
4765
4766
    private function closeCell()
4767
    {
4768
        /* If the stack of open elements has a td or th element in table scope,
4769
        then act as if an end tag token with that tag name had been seen. */
4770
        foreach (array('td', 'th') as $cell) {
4771
            if ($this->elementInScope($cell, true)) {
4772
                $this->inCell(
4773
                    array(
4774
                        'name' => $cell,
4775
                        'type' => HTML5::ENDTAG
4776
                    )
4777
                );
4778
4779
                break;
4780
            }
4781
        }
4782
    }
4783
4784
    public function save()
4785
    {
4786
        return $this->dom;
4787
    }
4788
}
4789